from openai import OpenAI import os import requests from dotenv import load_dotenv from langchain_openai import ChatOpenAI from tavily import TavilyClient from langchain_core.prompts.prompt import PromptTemplate from langchain_core.output_parsers import StrOutputParser, JsonOutputParser import concurrent.futures import json from loguru import logger load_dotenv() os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") llm = ChatOpenAI(model="gpt-4o") API_KEY = os.getenv('PERPLEXITY_AI_API') def perplexity_data(prompt, api_key=API_KEY): url = "https://api.perplexity.ai/chat/completions" payload = { "model": "llama-3.1-sonar-huge-128k-online", "messages": [ { "role": "system", "content": "Be precise and concise." }, { "role": "user", "content": prompt } ], "temperature": 0.2, "top_p": 0.9, "return_citations": True, "search_domain_filter": ["perplexity.ai"], "return_images": False, "return_related_questions": False, "search_recency_filter": "month", "top_k": 0, "stream": False, "presence_penalty": 0, "frequency_penalty": 1 } headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json" } response = requests.post(url, json=payload, headers=headers) # Check if the request was successful if response.status_code == 200: response_data = response.json() try: # Extract the message content message_content = response_data['choices'][0]['message']['content'] return message_content except (KeyError, IndexError): return "Unexpected response format." else: return f"Request failed with status code: {response.status_code}" def extract_names(influencer_names:str) -> dict: logger.info(f"Formatting Influencer Data") initiator_prompt = PromptTemplate( template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a Influencer Data Extractor AI Agent tasked with extracting information from a search result\n You are provided with three informations: \n 1. Search result of influencers names Your job is to extract all the influencers names\n Make sure to extract all the names there \n You are to return this as a JSON output. The key should be 'names' in the JSON \n Do not add to the search result, just return the JSON data of all the influencer names as expected.\n <|eot_id|><|start_header_id|>user<|end_header_id|> INFLUENCER_NAMEs: {influencer_names}\n <|eot_id|><|start_header_id|>assistant<|end_header_id|>""", input_variables=["influencer_names"], ) initiator_router = initiator_prompt | llm | JsonOutputParser() output = initiator_router.invoke({"influencer_names":influencer_names}) return output def get_category_influencers(product_category: str): logger.info("Search for influencers names") names_prompt = f"Give me a list of the names of the all the top and popular {product_category} influencers in USA. People that are known across the internet. I only need their names. Give me all that's available as much as possible." search_names = perplexity_data(names_prompt) logger.info("Extracting influencers names") influencers_names = extract_names(search_names) return influencers_names # This is the function to get influencers for all product categories using multithreading concurrency def get_all_names(product_categories: list): all_influencers = {} # Using ThreadPoolExecutor for multithreading concurrency with concurrent.futures.ThreadPoolExecutor() as executor: # Create a dictionary where futures map to their category name future_to_category = {executor.submit(get_category_influencers, category): category for category in product_categories} for future in concurrent.futures.as_completed(future_to_category): category = future_to_category[future] try: influencers = future.result() # Get result of the completed future all_influencers[category] = influencers except Exception as exc: logger.error(f"{category} generated an exception: {exc}") return all_influencers