from langchain_openai import ChatOpenAI from langchain_core.prompts.prompt import PromptTemplate from langchain_core.output_parsers import StrOutputParser, JsonOutputParser import os import requests from googleapiclient.discovery import build from googleapiclient.errors import HttpError import requests from loguru import logger from dotenv import load_dotenv load_dotenv() os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY") PERPLEXITYAI_API_KEY = os.getenv('PERPLEXITY_AI_API') llm = ChatOpenAI(model="gpt-4o") def get_chat_completion(prompt, api_key=PERPLEXITYAI_API_KEY): url = "https://api.perplexity.ai/chat/completions" payload = { "model": "llama-3.1-sonar-small-128k-online", "messages": [ { "role": "system", "content": "Be precise and concise." }, { "role": "user", "content": prompt } ], "temperature": 0.2, "top_p": 0.9, "return_citations": True, "search_domain_filter": ["perplexity.ai"], "return_images": False, "return_related_questions": False, "search_recency_filter": "month", "top_k": 0, "stream": False, "presence_penalty": 0, "frequency_penalty": 1 } headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json" } response = requests.post(url, json=payload, headers=headers) # Check if the request was successful if response.status_code == 200: response_data = response.json() try: # Extract the message content message_content = response_data['choices'][0]['message']['content'] return message_content except (KeyError, IndexError): return "Unexpected response format." else: return f"Request failed with status code: {response.status_code}" def influencer_data(search_result: str, prompt:str) -> dict: logger.info(f"Formatting Influencer Data") initiator_prompt = PromptTemplate( template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a Influencer Data Extractor AI Agent tasked with extracting information from a search result\n Backstory: \n A business brand is looking for some influencer in a specific location or area. He used a tool called Perplexity AI to get this data. \n This is an amazing too, and yet it can struggle a bit (ai isn't perfect you know). \n The information returned by this ai can be incomplete, not properly structured and all. \n This brand needs youe help. \n This is how you will help the brand: \n 1. You will be given two things, the ai search result and the prompt used to query the ai. \n 2. Your major interest here is formatting and making the structure right. \n 3. You will process the ai result, create a JSON structure of the key information needed by the user and add the corresponding values to it. \n 4. The user prompt is your guide on of how the JSON should be formatted. \n 5. If there's are missing data or something in the ai response, send it back as NA. \n 6. At the end of your processing you want to return a structured response and also make sure it in the best order as expected by the user. \n Return a structured JSON or dictionary as output. \n Ensure that the data is properly arranged and in a good format. \n Please do this carefully and excellently. <|eot_id|><|start_header_id|>user<|end_header_id|> AI_SEARCH_RESULT: {search_result} \n PROMPT: {prompt} <|eot_id|><|start_header_id|>assistant<|end_header_id|>""", input_variables=["search_result", "prompt"], ) initiator_router = initiator_prompt | llm | JsonOutputParser() output = initiator_router.invoke({"search_result":search_result, "prompt":prompt}) return output def combined_influencer_data(prompt: str) -> dict: # Step 1: Get data using the get_chat_completion function logger.info("Using Perplexity Ai to get the influencer data") search_result = get_chat_completion(prompt) # Step 2: Process the search result using the influencer_data function logger.info("Formatting the data with OpenAI") formatted_data = influencer_data(search_result, prompt) # Step 3: Return the final output return formatted_data product_categories = [ "Beauty & Skincare", "Fashion & Lifestyle", "Health & Fitness", "Travel & Adventure", "Food & Beverage", "Technology & Gadgets", "Gaming & Esports", "Parenting & Family", "Finance & Business", "Wellness & Mental Health", "Automotive & Motorsports", "Entertainment & Pop Culture", "Photography & Visual Arts", "Education & Learning", "Environmental & Sustainability" ] def product_categorizer(product_lists: list, product_categories=product_categories) -> str: logger.info(f"Categorizing products") initiator_prompt = PromptTemplate( template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a Brand AI Agent tasked with categorizing products into categories\n There are some categories given by the brand, these are the categories the products are expected to be categorized into.\n You will be given a list of products and asked to categorize them.\n You are meant to categorize all the given product into one category. \n You take the following steps: 1. Looking into all the list of products. \n 2. Understand where they belong to. \n 3. Look into the kist of categories. \n 4. Select the category that this products falls under. \n You should return the selected category as output. \n Please do this carefully and correctly. <|eot_id|><|start_header_id|>user<|end_header_id|> PRODUCT_LISTS: {product_lists} \n PRODUCT_CATEGORY: {product_categories} <|eot_id|><|start_header_id|>assistant<|end_header_id|>""", input_variables=["product_lists", "product_categories"], ) initiator_router = initiator_prompt | llm | StrOutputParser() output = initiator_router.invoke({"product_lists":product_lists, "product_categories":product_categories}) return output def get_youtube_channel_info(channel_name: str): API_KEY = YOUTUBE_API_KEY # Replace with your API key url = 'https://www.googleapis.com/youtube/v3/search' params = { 'part': 'id', 'q': channel_name, 'type': 'channel', 'key': API_KEY } response = requests.get(url, params=params) if response.status_code == 200: data = response.json() if 'items' in data and len(data['items']) > 0: channel_id = data['items'][0]['id']['channelId'] channel_info_url = 'https://www.googleapis.com/youtube/v3/channels' channel_info_params = { 'part': 'snippet,statistics', 'id': channel_id, 'key': API_KEY } channel_info_response = requests.get(channel_info_url, params=channel_info_params) if channel_info_response.status_code == 200: channel_info_data = channel_info_response.json() if 'items' in channel_info_data and len(channel_info_data['items']) > 0: channel_info = channel_info_data['items'][0] title = channel_info['snippet']['title'] description = channel_info['snippet']['description'] subscriber_count = channel_info['statistics']['subscriberCount'] view_count = channel_info['statistics']['viewCount'] video_count = channel_info['statistics']['videoCount'] return { "Title": title, "Description": description, "Subscribers": subscriber_count, "Total Views": view_count, "Total Videos": video_count } else: return "Channel not found." else: return f"Error: {channel_info_response.status_code}" else: return "Channel not found." else: return f"Error: {response.status_code}" def get_channel_videos(channel_name, max_results=5): try: API_KEY = YOUTUBE_API_KEY # Build the YouTube service object youtube = build('youtube', 'v3', developerKey=API_KEY) # Search for the channel by name search_response = youtube.search().list( q=channel_name, type='channel', part='id', maxResults=1 ).execute() # Extract the channel ID items = search_response.get('items') if not items: print("No channel found with that name.") return [] channel_id = items[0]['id']['channelId'] # Retrieve the uploads playlist ID channels_response = youtube.channels().list( part='contentDetails', id=channel_id ).execute() uploads_playlist_id = channels_response['items'][0]['contentDetails']['relatedPlaylists']['uploads'] # Retrieve the videos from the uploads playlist playlist_items_response = youtube.playlistItems().list( part='snippet', playlistId=uploads_playlist_id, maxResults=max_results ).execute() # Extract video URLs video_urls = [] for item in playlist_items_response['items']: video_id = item['snippet']['resourceId']['videoId'] video_url = f"https://www.youtube.com/watch?v={video_id}" video_urls.append(video_url) return video_urls except HttpError as e: print(f"An HTTP error occurred: {e}") return [] def get_channel_details(channel_name: str): channel_info = get_youtube_channel_info(channel_name) recent_videos = get_channel_videos(channel_name) # Ensure channel_info is a dictionary if isinstance(channel_info, dict): if recent_videos != "No channel found with that name.": channel_info['recent_videos'] = recent_videos else: channel_info['recent_videos'] = "None" else: # If channel_info is not a dict, set it to a default structure channel_info = { "Title": "Not Found", "Description": "Channel not found.", "Subscribers": "NA", "Total Views": "NA", "Total Videos": "NA", "recent_videos": "None" } return channel_info # print(get_channel_details("UCb1j-wdogEnOXXS5B5EhnGg"))