269 lines
10 KiB
Python
269 lines
10 KiB
Python
from langchain_openai import ChatOpenAI
|
|
from langchain_core.prompts.prompt import PromptTemplate
|
|
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
|
|
import os
|
|
import requests
|
|
from googleapiclient.discovery import build
|
|
from googleapiclient.errors import HttpError
|
|
import requests
|
|
from loguru import logger
|
|
from dotenv import load_dotenv
|
|
load_dotenv()
|
|
|
|
|
|
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
|
|
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
|
|
PERPLEXITYAI_API_KEY = os.getenv('PERPLEXITY_AI_API')
|
|
llm = ChatOpenAI(model="gpt-4o")
|
|
|
|
|
|
|
|
def get_chat_completion(prompt, api_key=PERPLEXITYAI_API_KEY):
|
|
url = "https://api.perplexity.ai/chat/completions"
|
|
|
|
payload = {
|
|
"model": "llama-3.1-sonar-small-128k-online",
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": "Be precise and concise."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": prompt
|
|
}
|
|
],
|
|
"temperature": 0.2,
|
|
"top_p": 0.9,
|
|
"return_citations": True,
|
|
"search_domain_filter": ["perplexity.ai"],
|
|
"return_images": False,
|
|
"return_related_questions": False,
|
|
"search_recency_filter": "month",
|
|
"top_k": 0,
|
|
"stream": False,
|
|
"presence_penalty": 0,
|
|
"frequency_penalty": 1
|
|
}
|
|
|
|
headers = {
|
|
"Authorization": f"Bearer {api_key}",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
response = requests.post(url, json=payload, headers=headers)
|
|
|
|
# Check if the request was successful
|
|
if response.status_code == 200:
|
|
response_data = response.json()
|
|
try:
|
|
# Extract the message content
|
|
message_content = response_data['choices'][0]['message']['content']
|
|
return message_content
|
|
except (KeyError, IndexError):
|
|
return "Unexpected response format."
|
|
else:
|
|
return f"Request failed with status code: {response.status_code}"
|
|
|
|
|
|
def influencer_data(search_result: str, prompt:str) -> dict:
|
|
logger.info(f"Formatting Influencer Data")
|
|
initiator_prompt = PromptTemplate(
|
|
template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
|
You are a Influencer Data Extractor AI Agent tasked with extracting information from a search result\n
|
|
Backstory: \n
|
|
A business brand is looking for some influencer in a specific location or area. He used a tool called Perplexity AI to get this data. \n
|
|
This is an amazing too, and yet it can struggle a bit (ai isn't perfect you know). \n
|
|
The information returned by this ai can be incomplete, not properly structured and all. \n
|
|
This brand needs youe help. \n
|
|
This is how you will help the brand: \n
|
|
1. You will be given two things, the ai search result and the prompt used to query the ai. \n
|
|
2. Your major interest here is formatting and making the structure right. \n
|
|
3. You will process the ai result, create a JSON structure of the key information needed by the user and add the corresponding values to it. \n
|
|
4. The user prompt is your guide on of how the JSON should be formatted. \n
|
|
5. If there's are missing data or something in the ai response, send it back as NA. \n
|
|
6. At the end of your processing you want to return a structured response and also make sure it in the best order as expected by the user. \n
|
|
|
|
Return a structured JSON or dictionary as output. \n
|
|
Ensure that the data is properly arranged and in a good format. \n
|
|
Please do this carefully and excellently.
|
|
|
|
<|eot_id|><|start_header_id|>user<|end_header_id|>
|
|
AI_SEARCH_RESULT: {search_result} \n
|
|
PROMPT: {prompt}
|
|
|
|
<|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
|
|
input_variables=["search_result", "prompt"],
|
|
)
|
|
|
|
initiator_router = initiator_prompt | llm | JsonOutputParser()
|
|
output = initiator_router.invoke({"search_result":search_result, "prompt":prompt})
|
|
return output
|
|
|
|
|
|
def combined_influencer_data(prompt: str) -> dict:
|
|
# Step 1: Get data using the get_chat_completion function
|
|
logger.info("Using Perplexity Ai to get the influencer data")
|
|
search_result = get_chat_completion(prompt)
|
|
|
|
# Step 2: Process the search result using the influencer_data function
|
|
logger.info("Formatting the data with OpenAI")
|
|
formatted_data = influencer_data(search_result, prompt)
|
|
|
|
# Step 3: Return the final output
|
|
return formatted_data
|
|
|
|
|
|
product_categories = [
|
|
"Beauty & Skincare",
|
|
"Fashion & Lifestyle",
|
|
"Health & Fitness",
|
|
"Travel & Adventure",
|
|
"Food & Beverage",
|
|
"Technology & Gadgets",
|
|
"Gaming & Esports",
|
|
"Parenting & Family",
|
|
"Finance & Business",
|
|
"Wellness & Mental Health",
|
|
"Automotive & Motorsports",
|
|
"Entertainment & Pop Culture",
|
|
"Photography & Visual Arts",
|
|
"Education & Learning",
|
|
"Environmental & Sustainability"
|
|
]
|
|
|
|
|
|
def product_categorizer(product_lists: list, product_categories=product_categories) -> str:
|
|
logger.info(f"Categorizing products")
|
|
initiator_prompt = PromptTemplate(
|
|
template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
|
You are a Brand AI Agent tasked with categorizing products into categories\n
|
|
There are some categories given by the brand, these are the categories the products are expected to be categorized into.\n
|
|
You will be given a list of products and asked to categorize them.\n
|
|
You are meant to categorize all the given product into one category. \n
|
|
You take the following steps:
|
|
1. Looking into all the list of products. \n
|
|
2. Understand where they belong to. \n
|
|
3. Look into the kist of categories. \n
|
|
4. Select the category that this products falls under. \n
|
|
|
|
You should return the selected category as output. \n
|
|
Please do this carefully and correctly.
|
|
|
|
<|eot_id|><|start_header_id|>user<|end_header_id|>
|
|
PRODUCT_LISTS: {product_lists} \n
|
|
PRODUCT_CATEGORY: {product_categories}
|
|
|
|
<|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
|
|
input_variables=["product_lists", "product_categories"],
|
|
)
|
|
|
|
initiator_router = initiator_prompt | llm | StrOutputParser()
|
|
output = initiator_router.invoke({"product_lists":product_lists, "product_categories":product_categories})
|
|
return output
|
|
|
|
def get_youtube_channel_info(channel_name: str):
|
|
API_KEY = 'YOUTUBE_API_KEY' # Replace with your API key
|
|
|
|
url = 'https://www.googleapis.com/youtube/v3/search'
|
|
params = {
|
|
'part': 'id',
|
|
'q': channel_name,
|
|
'type': 'channel',
|
|
'key': API_KEY
|
|
}
|
|
|
|
response = requests.get(url, params=params)
|
|
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
if 'items' in data and len(data['items']) > 0:
|
|
channel_id = data['items'][0]['id']['channelId']
|
|
channel_info_url = 'https://www.googleapis.com/youtube/v3/channels'
|
|
channel_info_params = {
|
|
'part': 'snippet,statistics',
|
|
'id': channel_id,
|
|
'key': API_KEY
|
|
}
|
|
channel_info_response = requests.get(channel_info_url, params=channel_info_params)
|
|
|
|
if channel_info_response.status_code == 200:
|
|
channel_info_data = channel_info_response.json()
|
|
if 'items' in channel_info_data and len(channel_info_data['items']) > 0:
|
|
channel_info = channel_info_data['items'][0]
|
|
title = channel_info['snippet']['title']
|
|
description = channel_info['snippet']['description']
|
|
subscriber_count = channel_info['statistics']['subscriberCount']
|
|
view_count = channel_info['statistics']['viewCount']
|
|
video_count = channel_info['statistics']['videoCount']
|
|
return {
|
|
"Title": title,
|
|
"Description": description,
|
|
"Subscribers": subscriber_count,
|
|
"Total Views": view_count,
|
|
"Total Videos": video_count
|
|
}
|
|
else:
|
|
return "Channel not found."
|
|
else:
|
|
return f"Error: {channel_info_response.status_code}"
|
|
else:
|
|
return "Channel not found."
|
|
else:
|
|
return f"Error: {response.status_code}"
|
|
|
|
def get_channel_videos(channel_name, max_results=5):
|
|
try:
|
|
API_KEY = 'YOUTUBE_API_KEY'
|
|
# Build the YouTube service object
|
|
youtube = build('youtube', 'v3', developerKey=API_KEY)
|
|
|
|
# Search for the channel by name
|
|
search_response = youtube.search().list(
|
|
q=channel_name,
|
|
type='channel',
|
|
part='id',
|
|
maxResults=1
|
|
).execute()
|
|
|
|
# Extract the channel ID
|
|
items = search_response.get('items')
|
|
if not items:
|
|
print("No channel found with that name.")
|
|
return []
|
|
|
|
channel_id = items[0]['id']['channelId']
|
|
|
|
# Retrieve the uploads playlist ID
|
|
channels_response = youtube.channels().list(
|
|
part='contentDetails',
|
|
id=channel_id
|
|
).execute()
|
|
|
|
uploads_playlist_id = channels_response['items'][0]['contentDetails']['relatedPlaylists']['uploads']
|
|
|
|
# Retrieve the videos from the uploads playlist
|
|
playlist_items_response = youtube.playlistItems().list(
|
|
part='snippet',
|
|
playlistId=uploads_playlist_id,
|
|
maxResults=max_results
|
|
).execute()
|
|
|
|
# Extract video URLs
|
|
video_urls = []
|
|
for item in playlist_items_response['items']:
|
|
video_id = item['snippet']['resourceId']['videoId']
|
|
video_url = f"https://www.youtube.com/watch?v={video_id}"
|
|
video_urls.append(video_url)
|
|
|
|
return video_urls
|
|
|
|
except HttpError as e:
|
|
print(f"An HTTP error occurred: {e}")
|
|
return []
|
|
|
|
def get_channel_details(channel_name):
|
|
channel_info = get_youtube_channel_info(channel_name)
|
|
recent_videos = get_channel_videos(channel_name)
|
|
channel_info['recent_videos'] = recent_videos
|
|
return channel_info |