562 lines
27 KiB
Plaintext
562 lines
27 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"True"
|
|
]
|
|
},
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"from openai import OpenAI\n",
|
|
"import os\n",
|
|
"import requests\n",
|
|
"from dotenv import load_dotenv\n",
|
|
"from langchain_openai import ChatOpenAI\n",
|
|
"from tavily import TavilyClient\n",
|
|
"from langchain_core.prompts.prompt import PromptTemplate\n",
|
|
"from langchain_core.output_parsers import StrOutputParser, JsonOutputParser\n",
|
|
"import concurrent.futures\n",
|
|
"import json\n",
|
|
"from loguru import logger\n",
|
|
"load_dotenv()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"OPENAI_API_KEY\")\n",
|
|
"llm = ChatOpenAI(model=\"gpt-4o\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"API_KEY = os.getenv('PERPLEXITY_AI_API')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def perplexity_data(prompt, api_key=API_KEY):\n",
|
|
" url = \"https://api.perplexity.ai/chat/completions\"\n",
|
|
"\n",
|
|
" payload = {\n",
|
|
" \"model\": \"llama-3.1-sonar-huge-128k-online\",\n",
|
|
" \"messages\": [\n",
|
|
" {\n",
|
|
" \"role\": \"system\",\n",
|
|
" \"content\": \"Be precise and concise.\"\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"role\": \"user\",\n",
|
|
" \"content\": prompt\n",
|
|
" }\n",
|
|
" ],\n",
|
|
" \"temperature\": 0.2,\n",
|
|
" \"top_p\": 0.9,\n",
|
|
" \"return_citations\": True,\n",
|
|
" \"search_domain_filter\": [\"perplexity.ai\"],\n",
|
|
" \"return_images\": False,\n",
|
|
" \"return_related_questions\": False,\n",
|
|
" \"search_recency_filter\": \"month\",\n",
|
|
" \"top_k\": 0,\n",
|
|
" \"stream\": False,\n",
|
|
" \"presence_penalty\": 0,\n",
|
|
" \"frequency_penalty\": 1\n",
|
|
" }\n",
|
|
" \n",
|
|
" headers = {\n",
|
|
" \"Authorization\": f\"Bearer {api_key}\",\n",
|
|
" \"Content-Type\": \"application/json\"\n",
|
|
" }\n",
|
|
" \n",
|
|
" response = requests.post(url, json=payload, headers=headers)\n",
|
|
" \n",
|
|
" # Check if the request was successful\n",
|
|
" if response.status_code == 200:\n",
|
|
" response_data = response.json()\n",
|
|
" try:\n",
|
|
" # Extract the message content\n",
|
|
" message_content = response_data['choices'][0]['message']['content']\n",
|
|
" return message_content\n",
|
|
" except (KeyError, IndexError):\n",
|
|
" return \"Unexpected response format.\"\n",
|
|
" else:\n",
|
|
" return f\"Request failed with status code: {response.status_code}\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"product_categories = [\n",
|
|
" \"Beauty & Skincare\",\n",
|
|
" \"Fashion & Lifestyle\",\n",
|
|
" \"Health & Fitness\",\n",
|
|
" \"Travel & Adventure\",\n",
|
|
" \"Food & Beverage\",\n",
|
|
" \"Technology & Gadgets\",\n",
|
|
" \"Gaming & Esports\",\n",
|
|
" \"Parenting & Family\",\n",
|
|
" \"Finance & Business\",\n",
|
|
" \"Wellness & Mental Health\",\n",
|
|
" \"Automotive & Motorsports\",\n",
|
|
" \"Entertainment & Pop Culture\",\n",
|
|
" \"Photography & Visual Arts\",\n",
|
|
" \"Education & Learning\",\n",
|
|
" \"Environmental & Sustainability\"\n",
|
|
"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"us_states = [\n",
|
|
" \"Alabama\", \"Alaska\", \"Arizona\", \"Arkansas\", \"California\", \"Colorado\", \"Connecticut\",\n",
|
|
" \"Delaware\", \"Florida\", \"Georgia\", \"Hawaii\", \"Idaho\", \"Illinois\", \"Indiana\", \"Iowa\",\n",
|
|
" \"Kansas\", \"Kentucky\", \"Louisiana\", \"Maine\", \"Maryland\", \"Massachusetts\", \"Michigan\",\n",
|
|
" \"Minnesota\", \"Mississippi\", \"Missouri\", \"Montana\", \"Nebraska\", \"Nevada\", \"New Hampshire\",\n",
|
|
" \"New Jersey\", \"New Mexico\", \"New York\", \"North Carolina\", \"North Dakota\", \"Ohio\",\n",
|
|
" \"Oklahoma\", \"Oregon\", \"Pennsylvania\", \"Rhode Island\", \"South Carolina\", \"South Dakota\",\n",
|
|
" \"Tennessee\", \"Texas\", \"Utah\", \"Vermont\", \"Virginia\", \"Washington\", \"West Virginia\",\n",
|
|
" \"Wisconsin\", \"Wyoming\"\n",
|
|
"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 21,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"prompt = \"\"\"\n",
|
|
" Give me a list of the names of the all the top and popular beauty and skincare influencers in USA. People that are known across the internet. I only need their names. Give me all that's available as much as possible. \n",
|
|
"\"\"\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 22,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
" Give me a list of the names of the all the top and popular beauty and skincare influencers in USA. People that are known across the internet. I only need their names. Give me all that's available as much as possible. \n",
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(prompt)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 23,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Here is a comprehensive list of popular beauty and skincare influencers in the USA, based on the provided sources:\n",
|
|
"\n",
|
|
"1. **Caroline Hirons**\n",
|
|
"2. **Hyram Yarbro**\n",
|
|
"3. **Shalini Kutti**\n",
|
|
"4. **Amy Serrano**\n",
|
|
"5. **Cecilia**\n",
|
|
"6. **Ozy**\n",
|
|
"7. **Divina Wong**\n",
|
|
"8. **Shreya Jain**\n",
|
|
"9. **Debasree Banerjee**\n",
|
|
"10. **Shalini Srivastava**\n",
|
|
"11. **Ankita Chaturvedi**\n",
|
|
"12. **Adrianna Kalisz**\n",
|
|
"13. **Bethany Mota**\n",
|
|
"14. **Zoe Elizabeth Sugg (Zoella)**\n",
|
|
"15. **Rachel Levin**\n",
|
|
"16. **Nikkie De Jager**\n",
|
|
"17. **Eva Gutowski**\n",
|
|
"18. **Marzia Bisognin**\n",
|
|
"19. **Jeffree Star**\n",
|
|
"20. **Carli Bybel**\n",
|
|
"21. **Mindy McKnight**\n",
|
|
"22. **Promise Phan**\n",
|
|
"23. **Jessica Lyn**\n",
|
|
"24. **Cordelia (Phyrra)**\n",
|
|
"25. **Jennifer Mathews**\n",
|
|
"26. **Sunny Subramanian**\n",
|
|
"27. **Vicky Farrell**\n",
|
|
"28. **Mariel**\n",
|
|
"29. **Danielle**\n",
|
|
"30. **Kasey S**\n",
|
|
"\n",
|
|
"Note: This list includes both macro and micro influencers, as well as those who specialize in various aspects of beauty and skincare.\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"result = perplexity_data(prompt)\n",
|
|
"print(result)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Based on the provided search result and general knowledge, here are the available details for Chriselle Lim:\n",
|
|
"\n",
|
|
"1. **Name**: Chriselle Lim\n",
|
|
"2. **Contact Info**: Not provided in the search result.\n",
|
|
"4. **Facebook Username**: Not specified in the search result.\n",
|
|
"5. **Instagram Username**: TheChriselleFactor (though not directly mentioned in the provided link, it is commonly known and can be inferred from her website name, TheChriselleFactor.com)\n",
|
|
"6. **TikTok Username**: Not specified in the search result.\n",
|
|
"7. **YouTube Username**: Not specified in the search result, but it is also TheChriselleFactor based on general knowledge.\n",
|
|
"8. **Facebook Followers**: Not specified in the search result.\n",
|
|
"9. **Instagram Followers**: Not specified in the search result.\n",
|
|
"10. **TikTok Followers**: Not specified in the search result.\n",
|
|
"11. **YouTube Subscribers**: Not specified in the search result.\n",
|
|
"\n",
|
|
"For more detailed and up-to-date information, visiting Chriselle Lim's official social media profiles or her website would be necessary.\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"prompt = \"\"\"\n",
|
|
" Give me a the following social details of Chriselle Lim, a beauty and skincare influencer: \\n\n",
|
|
" 1. Name \\n\n",
|
|
" 2. Contact info \\n\n",
|
|
" 4. Facebook username\\n\n",
|
|
" 5. Instagram username \\n\n",
|
|
" 6. Tiktok username \\n\n",
|
|
" 7. Youtube username \\n\n",
|
|
" 8. Facebook followers \\n\n",
|
|
" 9. Instagram followers \\n\n",
|
|
" 10 Tiktok followers \\n\n",
|
|
" 11. Youtube subscribers \\n \n",
|
|
"\"\"\"\n",
|
|
"result = perplexity_data(prompt)\n",
|
|
"print(result)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def extract_names(influencer_names:str) -> dict:\n",
|
|
" logger.info(f\"Formatting Influencer Data\")\n",
|
|
" initiator_prompt = PromptTemplate(\n",
|
|
" template=\"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n",
|
|
" You are a Influencer Data Extractor AI Agent tasked with extracting information from a search result\\n\n",
|
|
" You are provided with three informations: \\n\n",
|
|
" 1. Search result of influencers names\n",
|
|
" \n",
|
|
" Your job is to extract all the influencers names\\n\n",
|
|
" Make sure to extract all the names there \\n\n",
|
|
" You are to return this as a JSON output. The key should be 'names' in the JSON \\n\n",
|
|
" Do not add to the search result, just return the JSON data of all the influencer names as expected.\\n\n",
|
|
"\n",
|
|
" <|eot_id|><|start_header_id|>user<|end_header_id|>\n",
|
|
" INFLUENCER_NAMEs: {influencer_names}\\n\n",
|
|
" \n",
|
|
" <|eot_id|><|start_header_id|>assistant<|end_header_id|>\"\"\",\n",
|
|
" input_variables=[\"influencer_names\"],\n",
|
|
" )\n",
|
|
"\n",
|
|
" initiator_router = initiator_prompt | llm | JsonOutputParser()\n",
|
|
" output = initiator_router.invoke({\"influencer_names\":influencer_names})\n",
|
|
" return output"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 28,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\u001b[32m2024-10-25 22:48:23.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mextract_names\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mFormatting Influencer Data\u001b[0m\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"{'names': ['Caroline Hirons', 'Hyram Yarbro', 'Shalini Kutti', 'Amy Serrano', 'Cecilia', 'Ozy', 'Divina Wong', 'Shreya Jain', 'Debasree Banerjee', 'Shalini Srivastava', 'Ankita Chaturvedi', 'Adrianna Kalisz', 'Bethany Mota', 'Zoe Elizabeth Sugg (Zoella)', 'Rachel Levin', 'Nikkie De Jager', 'Eva Gutowski', 'Marzia Bisognin', 'Jeffree Star', 'Carli Bybel', 'Mindy McKnight', 'Promise Phan', 'Jessica Lyn', 'Cordelia (Phyrra)', 'Jennifer Mathews', 'Sunny Subramanian', 'Vicky Farrell', 'Mariel', 'Danielle', 'Kasey S']}\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(extract_names(result))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Solution Implmentation"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"product_categories = [\n",
|
|
" \"Beauty & Skincare\",\n",
|
|
" \"Fashion & Lifestyle\",\n",
|
|
" \"Health & Fitness\",\n",
|
|
" \"Travel & Adventure\",\n",
|
|
" \"Food & Beverage\",\n",
|
|
" \"Technology & Gadgets\",\n",
|
|
" \"Gaming & Esports\",\n",
|
|
" \"Parenting & Family\",\n",
|
|
" \"Finance & Business\",\n",
|
|
" \"Wellness & Mental Health\",\n",
|
|
" \"Automotive & Motorsports\",\n",
|
|
" \"Entertainment & Pop Culture\",\n",
|
|
" \"Photography & Visual Arts\",\n",
|
|
" \"Education & Learning\",\n",
|
|
" \"Environmental & Sustainability\"\n",
|
|
"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def get_category_influencers(product_category: str):\n",
|
|
" logger.info(\"Search for influencers names\")\n",
|
|
" names_prompt = f\"Give me a list of the names of the all the top and popular {product_category} influencers globally. People that are known across the internet. I only need their names. Give me all that's available as much as possible.\"\n",
|
|
" search_names = perplexity_data(names_prompt)\n",
|
|
" logger.info(\"Extracting influencers names\")\n",
|
|
" influencers_names = extract_names(search_names)\n",
|
|
" return influencers_names\n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\u001b[32m2024-11-14 23:03:49.685\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mSearch for influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:04:18.963\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m5\u001b[0m - \u001b[1mExtracting influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:04:18.964\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mextract_names\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mFormatting Influencer Data\u001b[0m\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"{'names': ['Simeon Panda', 'Ulisses Jr.', 'Lazar Angelov', 'Sergi Constance', 'Joey Swoll', 'Brandon Hendrickson', 'Anton Antipov', 'Kayla Itsines', 'Nina Gelbke', 'Shawn Wolfe', 'Asker Jeukendrup', 'Laura de Witte', 'Angie Asche', 'Taylor Ryan', 'Meghann Featherstun', 'Renee McGregor', 'Sarah Patil', 'Abby Grimm', 'Allison Knott', 'mxttcartier', 'Dulce Rodriguez', 'Nayeli Maserati', 'Emma Sosa', 'Sar Hovsepian', 'Hannah LeBoff', 'Cristina Chan', 'Austin Rawlinz', 'Dr. Fernandez, MD', 'Chris Justin', 'Emma Litt', 'Sareen Hovsepian', 'Hannah | Pilates + Strength Workouts', 'Cristina Veronica Fit', 'Marie Spano']}\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(get_category_influencers(product_category=\"Health & Fitness\"))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# This is the function to get influencers for all product categories using multithreading concurrency\n",
|
|
"def get_all_names(product_categories: list):\n",
|
|
" all_influencers = {}\n",
|
|
"\n",
|
|
" # Using ThreadPoolExecutor for multithreading concurrency\n",
|
|
" with concurrent.futures.ThreadPoolExecutor() as executor:\n",
|
|
" # Create a dictionary where futures map to their category name\n",
|
|
" future_to_category = {executor.submit(get_category_influencers, category): category for category in product_categories}\n",
|
|
"\n",
|
|
" for future in concurrent.futures.as_completed(future_to_category):\n",
|
|
" category = future_to_category[future]\n",
|
|
" try:\n",
|
|
" influencers = future.result() # Get result of the completed future\n",
|
|
" all_influencers[category] = influencers\n",
|
|
" except Exception as exc:\n",
|
|
" logger.error(f\"{category} generated an exception: {exc}\")\n",
|
|
" \n",
|
|
" return all_influencers"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\u001b[32m2024-11-14 23:04:39.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mSearch for influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:04:39.157\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mSearch for influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:04:39.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mSearch for influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:04:39.163\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mSearch for influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:04:39.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mSearch for influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:04:39.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mSearch for influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:04:39.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mSearch for influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:04:39.178\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mSearch for influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:04:39.183\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mSearch for influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:04:39.184\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mSearch for influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:04:39.192\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mSearch for influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:04:39.194\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mSearch for influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:04:39.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mSearch for influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:04:39.204\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mSearch for influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:04:39.207\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mSearch for influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:04.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m5\u001b[0m - \u001b[1mExtracting influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:04.724\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mextract_names\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mFormatting Influencer Data\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:07.830\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m5\u001b[0m - \u001b[1mExtracting influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:07.834\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mextract_names\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mFormatting Influencer Data\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:08.055\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m5\u001b[0m - \u001b[1mExtracting influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:08.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mextract_names\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mFormatting Influencer Data\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:11.678\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m5\u001b[0m - \u001b[1mExtracting influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:11.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mextract_names\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mFormatting Influencer Data\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:11.802\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m5\u001b[0m - \u001b[1mExtracting influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:11.807\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mextract_names\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mFormatting Influencer Data\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:13.979\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m5\u001b[0m - \u001b[1mExtracting influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:13.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mextract_names\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mFormatting Influencer Data\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:16.327\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m5\u001b[0m - \u001b[1mExtracting influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:16.328\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mextract_names\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mFormatting Influencer Data\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:16.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m5\u001b[0m - \u001b[1mExtracting influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:16.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mextract_names\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mFormatting Influencer Data\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:17.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m5\u001b[0m - \u001b[1mExtracting influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:17.130\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mextract_names\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mFormatting Influencer Data\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:18.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m5\u001b[0m - \u001b[1mExtracting influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:18.506\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mextract_names\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mFormatting Influencer Data\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:19.718\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m5\u001b[0m - \u001b[1mExtracting influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:19.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mextract_names\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mFormatting Influencer Data\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:22.464\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m5\u001b[0m - \u001b[1mExtracting influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:22.465\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mextract_names\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mFormatting Influencer Data\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:22.659\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m5\u001b[0m - \u001b[1mExtracting influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:22.660\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mextract_names\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mFormatting Influencer Data\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:37.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m5\u001b[0m - \u001b[1mExtracting influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:05:37.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mextract_names\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mFormatting Influencer Data\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:06:19.797\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mget_category_influencers\u001b[0m:\u001b[36m5\u001b[0m - \u001b[1mExtracting influencers names\u001b[0m\n",
|
|
"\u001b[32m2024-11-14 23:06:19.799\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mextract_names\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mFormatting Influencer Data\u001b[0m\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Fetch all influencer names concurrently\n",
|
|
"all_influencers_data = get_all_names(product_categories)\n",
|
|
"\n",
|
|
"# Convert the result to JSON format\n",
|
|
"all_influencers_json = json.dumps(all_influencers_data, indent=4)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Data saved to influencers_names_data.json\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Save the JSON data to a file\n",
|
|
"output_file = 'influencers_names_data.json'\n",
|
|
"with open(output_file, 'w') as file:\n",
|
|
" file.write(all_influencers_json)\n",
|
|
"\n",
|
|
"# Optionally, print the saved file path\n",
|
|
"print(f\"Data saved to {output_file}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "base",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|