new data added

2024-10-28 23:20:31 +01:00
parent 17b3f3dcc1
commit 707b896826
11 changed files with 5325 additions and 153 deletions
@@ -0,0 +1,116 @@
+from openai import OpenAI
+import os
+import requests
+from dotenv import load_dotenv
+from langchain_openai import ChatOpenAI
+from tavily import TavilyClient
+from langchain_core.prompts.prompt import PromptTemplate
+from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
+import concurrent.futures
+import json
+from loguru import logger
+load_dotenv()
+
+os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
+llm = ChatOpenAI(model="gpt-4o")
+
+API_KEY = os.getenv('PERPLEXITY_AI_API')
+
+def perplexity_data(prompt, api_key=API_KEY):
+    url = "https://api.perplexity.ai/chat/completions"
+
+    payload = {
+        "model": "llama-3.1-sonar-huge-128k-online",
+        "messages": [
+            {
+                "role": "system",
+                "content": "Be precise and concise."
+            },
+            {
+                "role": "user",
+                "content": prompt
+            }
+        ],
+        "temperature": 0.2,
+        "top_p": 0.9,
+        "return_citations": True,
+        "search_domain_filter": ["perplexity.ai"],
+        "return_images": False,
+        "return_related_questions": False,
+        "search_recency_filter": "month",
+        "top_k": 0,
+        "stream": False,
+        "presence_penalty": 0,
+        "frequency_penalty": 1
+    }
+    
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+    
+    response = requests.post(url, json=payload, headers=headers)
+    
+    # Check if the request was successful
+    if response.status_code == 200:
+        response_data = response.json()
+        try:
+            # Extract the message content
+            message_content = response_data['choices'][0]['message']['content']
+            return message_content
+        except (KeyError, IndexError):
+            return "Unexpected response format."
+    else:
+        return f"Request failed with status code: {response.status_code}"
+    
+def extract_names(influencer_names:str) -> dict:
+    logger.info(f"Formatting Influencer Data")
+    initiator_prompt = PromptTemplate(
+        template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
+        You are a Influencer Data Extractor AI Agent tasked with extracting information from a search result\n
+        You are provided with three informations: \n
+            1. Search result of influencers names
+        
+        Your job is to extract all the influencers names\n
+        Make sure to extract all the names there \n
+        You are to return this as a JSON output. The key should be 'names' in the JSON \n
+        Do not add to the search result, just return the JSON data of all the influencer names as expected.\n
+
+    <|eot_id|><|start_header_id|>user<|end_header_id|>
+    INFLUENCER_NAMEs: {influencer_names}\n
+    
+    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
+            input_variables=["influencer_names"],
+        )
+
+    initiator_router = initiator_prompt | llm | JsonOutputParser()
+    output = initiator_router.invoke({"influencer_names":influencer_names})
+    return output
+
+def get_category_influencers(product_category: str):
+    logger.info("Search for influencers names")
+    names_prompt = f"Give me a list of the names of the all the top and popular {product_category} influencers in USA. People that are known across the internet. I only need their names. Give me all that's available as much as possible."
+    search_names = perplexity_data(names_prompt)
+    logger.info("Extracting influencers names")
+    influencers_names = extract_names(search_names)
+    return influencers_names
+    
+    
+# This is the function to get influencers for all product categories using multithreading concurrency
+def get_all_names(product_categories: list):
+    all_influencers = {}
+
+    # Using ThreadPoolExecutor for multithreading concurrency
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        # Create a dictionary where futures map to their category name
+        future_to_category = {executor.submit(get_category_influencers, category): category for category in product_categories}
+
+        for future in concurrent.futures.as_completed(future_to_category):
+            category = future_to_category[future]
+            try:
+                influencers = future.result()  # Get result of the completed future
+                all_influencers[category] = influencers
+            except Exception as exc:
+                logger.error(f"{category} generated an exception: {exc}")
+    
+    return all_influencers