experiment.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from openai import OpenAI\n",
    "import os\n",
    "import requests\n",
    "from dotenv import load_dotenv\n",
    "from langchain_openai import ChatOpenAI\n",
    "from langchain_core.prompts.prompt import PromptTemplate\n",
    "from langchain_core.output_parsers import StrOutputParser, JsonOutputParser\n",
    "from loguru import logger\n",
    "load_dotenv()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"OPENAI_API_KEY\")\n",
    "llm = ChatOpenAI(model=\"gpt-4o\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "API_KEY = os.getenv('PERPLEXITY_AI_API')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_chat_completion(prompt, api_key=API_KEY):\n",
    "    url = \"https://api.perplexity.ai/chat/completions\"\n",
    "\n",
    "    payload = {\n",
    "        \"model\": \"llama-3.1-sonar-small-128k-online\",\n",
    "        \"messages\": [\n",
    "            {\n",
    "                \"role\": \"system\",\n",
    "                \"content\": \"Be precise and concise.\"\n",
    "            },\n",
    "            {\n",
    "                \"role\": \"user\",\n",
    "                \"content\": prompt\n",
    "            }\n",
    "        ],\n",
    "        \"temperature\": 0.2,\n",
    "        \"top_p\": 0.9,\n",
    "        \"return_citations\": True,\n",
    "        \"search_domain_filter\": [\"perplexity.ai\"],\n",
    "        \"return_images\": False,\n",
    "        \"return_related_questions\": False,\n",
    "        \"search_recency_filter\": \"month\",\n",
    "        \"top_k\": 0,\n",
    "        \"stream\": False,\n",
    "        \"presence_penalty\": 0,\n",
    "        \"frequency_penalty\": 1\n",
    "    }\n",
    "    \n",
    "    headers = {\n",
    "        \"Authorization\": f\"Bearer {api_key}\",\n",
    "        \"Content-Type\": \"application/json\"\n",
    "    }\n",
    "    \n",
    "    response = requests.post(url, json=payload, headers=headers)\n",
    "    \n",
    "    # Check if the request was successful\n",
    "    if response.status_code == 200:\n",
    "        response_data = response.json()\n",
    "        try:\n",
    "            # Extract the message content\n",
    "            message_content = response_data['choices'][0]['message']['content']\n",
    "            return message_content\n",
    "        except (KeyError, IndexError):\n",
    "            return \"Unexpected response format.\"\n",
    "    else:\n",
    "        return f\"Request failed with status code: {response.status_code}\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The Milky Way Galaxy is estimated to contain approximately **100 billion stars**.\n"
     ]
    }
   ],
   "source": [
    "prompt = \"How many stars are there in our galaxy?\"\n",
    "\n",
    "result = get_chat_completion(prompt)\n",
    "print(result)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "```python\n",
      "states_in_us = [\n",
      "    \"Alabama\", \"Alaska\", \"Arizona\", \"Arkansas\", \"California\", \"Colorado\", \"Connecticut\",\n",
      "    \"Delaware\", \"Florida\", \"Georgia\", \"Hawaii\", \"Idaho\", \"Illinois\", \"Indiana\", \"Iowa\",\n",
      "    \"Kansas\", \"Kentucky\", \"Louisiana\", \"Maine\", \"Maryland\", \"Massachusetts\", \"Michigan\",\n",
      "    \"Minnesota\", \"Mississippi\", \"Missouri\", \"Montana\", \"Nebraska\", \"Nevada\", \"New Hampshire\",\n",
      "    \"New Jersey\", \"New Mexico\", \"New York\", \"North Carolina\", \"North Dakota\", \"Ohio\",\n",
      "    \"Oklahoma\", \"Oregon\", \"Pennsylvania\", \"Rhode Island\", \"South Carolina\", \"South Dakota\",\n",
      "    \"Tennessee\", \"Texas\", \"Utah\", \"Vermont\", \"Virginia\", \"Washington\", \"West Virginia\",\n",
      "    \"Wisconsin\", \"Wyoming\"\n",
      "]\n",
      "```\n"
     ]
    }
   ],
   "source": [
    "prompt = \"Give me the list of the states in the US. Return as a python list. Don't add any other explanations to it.\"\n",
    "\n",
    "result = get_chat_completion(prompt)\n",
    "print(result)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "```python\n",
      "[\"England\", \"Scotland\", \"Wales\", \"Northern Ireland\"]\n",
      "```\n"
     ]
    }
   ],
   "source": [
    "prompt = \"Give me the list of the states in the UK or London. Return as a python list. Don't add any other explanations to it.\"\n",
    "\n",
    "result = get_chat_completion(prompt)\n",
    "print(result)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "```python\n",
      "[\n",
      "    \"Baden-Württemberg\",\n",
      "    \"Bavaria\",\n",
      "    \"Berlin\",\n",
      "    \"Brandenburg\",\n",
      "    \"Bremen\",\n",
      "    \"Hamburg\",\n",
      "    \"Hesse\",\n",
      "    \"Lower Saxony\",\n",
      "    \"Mecklenburg-Vorpommern\",\n",
      "    \"North Rhine-Westphalia\",\n",
      "    \"Rhineland-Palatinate\",\n",
      "    \"Saarland\",\n",
      "    \"Saxony\",\n",
      "    \"Saxony-Anhalt\",\n",
      "    \"Schleswig-Holstein\",\n",
      "    \"Thuringia\"\n",
      "]\n",
      "```\n"
     ]
    }
   ],
   "source": [
    "prompt = \"Give me the list of the states in Germany. Return as a python list. Don't add any other explanations to it.\"\n",
    "\n",
    "result = get_chat_completion(prompt)\n",
    "print(result)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "```python\n",
      "categories_of_brands = [\n",
      "    \"Corporate Brands\",\n",
      "    \"Product Brands\",\n",
      "    \"Sneakers and Sportswear\",\n",
      "    \"Soft Drinks\",\n",
      "    \"Neobanking and Neobrokerage\",\n",
      "    \"Banks\",\n",
      "    \"Grocery Stores\",\n",
      "    \"Online Grocery Delivery\",\n",
      "    \"Male Cosmetics\",\n",
      "    \"Jewelry\",\n",
      "    \"Apparel\",\n",
      "    \"Clothing\",\n",
      "    \"Accessories\",\n",
      "    \"Footwear\",\n",
      "    \"Online Fashion Stores\",\n",
      "    \"Department Stores\",\n",
      "    \"Supercenters & Hypermarkets\",\n",
      "    \"B2C E-Commerce\",\n",
      "    \"Insurance\",\n",
      "    \"Telecommunications\",\n",
      "    \"Airlines\",\n",
      "    \"Hotels\",\n",
      "    \"Automobiles\",\n",
      "    \"Electronics\",\n",
      "    \"Home Improvement\",\n",
      "    \"Pharmaceuticals\",\n",
      "    \"Health and Wellness\",\n",
      "    \"Food and Beverages\",\n",
      "    \"Retail\"\n",
      "]\n",
      "```\n"
     ]
    }
   ],
   "source": [
    "prompt = \"Give me a list of the categories of brands we have in the US, you can list up to 30 of them. Return as a python list. Don't add any other explanations to it.\"\n",
    "\n",
    "result = get_chat_completion(prompt)\n",
    "print(result)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Here are the lists of influencers in Alabama that you can consider for promoting your sneakers brand, based on the provided data:\n",
      "\n",
      "## Name\n",
      "- j6lack_\n",
      "- Sharaaa\n",
      "- Jaz\n",
      "- Randall Woodfin\n",
      "- Micah Myricks\n",
      "- KC\n",
      "- THE RAVEN SMITH\n",
      "- Bianca Sha'nel Patton\n",
      "- Mare Thomas\n",
      "- Destinee Ariee\n",
      "- Dewayne Mcbride\n",
      "\n",
      "## Email or Contact\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "\n",
      "## Location\n",
      "- Birmingham\n",
      "- Birmingham\n",
      "- Birmingham\n",
      "- Birmingham\n",
      "- Birmingham\n",
      "- Birmingham\n",
      "- Birmingham\n",
      "- Birmingham\n",
      "- Birmingham\n",
      "- Birmingham\n",
      "- Birmingham\n",
      "\n",
      "## Facebook Username\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "\n",
      "## Instagram Username\n",
      "- @j6lack_\n",
      "- @kashararachele\n",
      "- @birmingham_finest\n",
      "- @randall_woodfin\n",
      "- @thecoolestguyuno\n",
      "- @_imonlykc\n",
      "- @noflashplzz\n",
      "- @biancasobad\n",
      "- @savannahbrasfield\n",
      "- @therealddestiny\n",
      "- @queen.slim\n",
      "\n",
      "## Number of Facebook Followers\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "- Not available in the source data\n",
      "\n",
      "## Number of Instagram Followers\n",
      "- 15.1k\n",
      "- 11.9k\n",
      "- 11.1k\n",
      "- 17.2k\n",
      "- 10.2k\n",
      "- 12k\n",
      "- 15.8k\n",
      "- 35.1k\n",
      "- 15.5k\n",
      "- 24k\n",
      "- 10.9k\n"
     ]
    }
   ],
   "source": [
    "prompt = \"\"\"\n",
    "    I'm looking for influencers that i need to promote my sneakers brand. My current location is Alabama in the US. \\n\n",
    "    I need help with looking for influencers in my state. People that i can reach out to, and they will help me promote my sneakers brand. \\n\n",
    "    I need your help with a python list of all the influencers i have in this state. Here's how i want it. \\n\n",
    "        1. Name \\n\n",
    "        2. Email or contant \\n\n",
    "        3. Location \\n\n",
    "        4. Facebook Username\\n\n",
    "        5. Instagram Username \\n\n",
    "        6. Number of facebook followers \\n\n",
    "        7. Number of instagram followers \\n.\n",
    "    This information will help me track who to reach out to. \\n \n",
    "    I'm expecting 7 lists as output, it should follow the pattern i listed.\\n\n",
    "    Do not add other explanations to it, just return the lists alone.\n",
    "\"\"\"\n",
    "\n",
    "result = get_chat_completion(prompt)\n",
    "print(result)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Here are the lists of influencers in Alabama, formatted as requested:\n",
      "\n",
      "### Name:\n",
      "```\n",
      "['j6lack_', 'Sharaaa', 'Jaz', 'Randall Woodfin', 'Micah Myricks', '_imonlykc', 'THE RAVEN SMITH', 'Bianca Sha'nel Patton', 'Mare Thomas', 'Destinee Ariee', 'Dewayne Mcbride']\n",
      "```\n",
      "\n",
      "### Contact Info:\n",
      "```\n",
      "['Check contact details', 'Check contact details', 'Check contact details', 'Check contact details', 'Check contact details', 'Check contact details', 'Check contact details', 'Check contact details', 'Check contact details', 'Check contact details', 'Check contact details']\n",
      "```\n",
      "\n",
      "### Facebook Username:\n",
      "```\n",
      "['', '', '', '', '', '', '', '', '', '', '']\n",
      "```\n",
      "\n",
      "### Instagram Username:\n",
      "```\n",
      "['@j6lack_', '@kashararachele', '@birmingham_finest', '@randall_woodfin', '@thecoolestguyuno', '@_imonlykc', '@noflashplzz', '@biancasobad', '@savannahbrasfield', '@therealddestiny', '@queen.slim']\n",
      "```\n",
      "\n",
      "### Number of Facebook Followers:\n",
      "```\n",
      "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
      "```\n",
      "\n",
      "### Number of Instagram Followers:\n",
      "```\n",
      "[15.1k, 11.9k, 11.1k, 17.2k, 10.2k, 12k, 15.8k, 35.1k, 15.5k, 24k, 10.9k]\n",
      "```\n"
     ]
    }
   ],
   "source": [
    "prompt = \"\"\"\n",
    "    I'm looking for influencers that i need to promote my sneakers brand. My current location is Alabama in the US. \\n\n",
    "    I need help with looking for influencers in my state. People that i can reach out to, and they will help me promote my sneakers brand. \\n\n",
    "    I need your help with a list of 30  influencers i have in this state. Here's how i want it. \\n\n",
    "        1. Name \\n\n",
    "        2. Contant info \\n\n",
    "        3. Facebook Username\\n\n",
    "        4. Instagram Username \\n\n",
    "        5. Number of facebook followers \\n\n",
    "        6. Number of instagram followers \\n.\n",
    "    This information will help me track who to reach out to. \\n \n",
    "    I'm expecting 7 lists as output, it should follow the pattern i listed.\\n\n",
    "    Do not add other explanations to it, just return the lists alone. Make them come out in separate python lists, like this []. So i will have 7 in total. \\n\n",
    "    Place them in order, name: [], Contact info: [], Facebook Username: [], and so on.\\n\n",
    "    Please do this carefully.\n",
    "\"\"\"\n",
    "\n",
    "result = get_chat_completion(prompt)\n",
    "print(result)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "def influencer_data(search_result: str, prompt:str) -> dict:\n",
    "    logger.info(f\"Formatting Influencer Data\")\n",
    "    initiator_prompt = PromptTemplate(\n",
    "        template=\"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n",
    "        You are a Influencer Data Extractor AI Agent tasked with extracting information from a search result\\n\n",
    "        Backstory: \\n\n",
    "            A business brand is looking for some influencer in a specific location or area. He used a tool called Perplexity AI to get this data. \\n\n",
    "            This is an amazing too, and yet it can struggle a bit (ai isn't perfect you know). \\n \n",
    "            The information returned by this ai can be incomplete, not properly structured and all. \\n \n",
    "            This brand needs youe help. \\n \n",
    "        This is how you will help the brand: \\n \n",
    "            1. You will be given two things, the ai search result and the prompt used to query the ai. \\n\n",
    "            2. Your major interest here is formatting and making the structure right. \\n \n",
    "            3. You will process the ai result, create a JSON structure of the key information needed by the user and add the corresponding values to it. \\n\n",
    "            4. The user prompt is your guide on of how the JSON should be formatted. \\n\n",
    "            5. If there's are missing data or something in the ai response, send it back as NA. \\n \n",
    "            6. At the end of your processing you want to return a structured response and also make sure it in the best order as expected by the user. \\n\n",
    "            \n",
    "        Return a structured JSON or dictionary as output. \\n \n",
    "        Avoid individual dictionaay per person, but one for al. \\n\n",
    "        Ensure that the data is properly arranged and in a good format. \\n\n",
    "        Please do this carefully and excellently. \n",
    "        \n",
    "    <|eot_id|><|start_header_id|>user<|end_header_id|>\n",
    "    AI_SEARCH_RESULT: {search_result} \\n \n",
    "    PROMPT: {prompt}\n",
    "    \n",
    "    <|eot_id|><|start_header_id|>assistant<|end_header_id|>\"\"\",\n",
    "            input_variables=[\"search_result\", \"prompt\"],\n",
    "        )\n",
    "\n",
    "    initiator_router = initiator_prompt | llm | JsonOutputParser()\n",
    "    output = initiator_router.invoke({\"search_result\":search_result, \"prompt\":prompt})\n",
    "    return output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m2024-10-22 23:13:57.781\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36minfluencer_data\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mCategorizing products\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "data_result = influencer_data(result, prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Name': ['j6lack_',\n",
       "  'Sharaaa',\n",
       "  'Jaz',\n",
       "  'Randall Woodfin',\n",
       "  'Micah Myricks',\n",
       "  '_imonlykc',\n",
       "  'THE RAVEN SMITH',\n",
       "  \"Bianca Sha'nel Patton\",\n",
       "  'Mare Thomas',\n",
       "  'Destinee Ariee',\n",
       "  'Dewayne Mcbride'],\n",
       " 'Contact Info': ['Check contact details',\n",
       "  'Check contact details',\n",
       "  'Check contact details',\n",
       "  'Check contact details',\n",
       "  'Check contact details',\n",
       "  'Check contact details',\n",
       "  'Check contact details',\n",
       "  'Check contact details',\n",
       "  'Check contact details',\n",
       "  'Check contact details',\n",
       "  'Check contact details'],\n",
       " 'Facebook Username': ['', '', '', '', '', '', '', '', '', '', ''],\n",
       " 'Instagram Username': ['@j6lack_',\n",
       "  '@kashararachele',\n",
       "  '@birmingham_finest',\n",
       "  '@randall_woodfin',\n",
       "  '@thecoolestguyuno',\n",
       "  '@_imonlykc',\n",
       "  '@noflashplzz',\n",
       "  '@biancasobad',\n",
       "  '@savannahbrasfield',\n",
       "  '@therealddestiny',\n",
       "  '@queen.slim'],\n",
       " 'Number of Facebook Followers': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
       " 'Number of Instagram Followers': [15100,\n",
       "  11900,\n",
       "  11100,\n",
       "  17200,\n",
       "  10200,\n",
       "  12000,\n",
       "  15800,\n",
       "  35100,\n",
       "  15500,\n",
       "  24000,\n",
       "  10900]}"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "def combined_influencer_data(prompt: str) -> dict:\n",
    "    # Step 1: Get data using the get_chat_completion function\n",
    "    logger.info(\"Using Perplexity Ai to get the influencer data\")\n",
    "    search_result = get_chat_completion(prompt)\n",
    "    \n",
    "    # Step 2: Process the search result using the influencer_data function\n",
    "    logger.info(\"Formatting the data with OpenAI\")\n",
    "    formatted_data = influencer_data(search_result, prompt)\n",
    "    \n",
    "    # Step 3: Return the final output\n",
    "    return formatted_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m2024-10-22 23:27:16.425\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcombined_influencer_data\u001b[0m:\u001b[36m3\u001b[0m - \u001b[1mUsing Perplexity Ai to get the influencer data\u001b[0m\n",
      "\u001b[32m2024-10-22 23:27:27.610\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcombined_influencer_data\u001b[0m:\u001b[36m7\u001b[0m - \u001b[1mFormatting the data with OpenAI\u001b[0m\n",
      "\u001b[32m2024-10-22 23:27:27.611\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36minfluencer_data\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mFormatting Influencer Data\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "prompt = \"\"\"\n",
    "    I'm looking for influencers that i need to promote my sneakers brand. My target is the USA. \\n\n",
    "    I need help with looking for influencers in my target country. People that i can reach out to, and they will help me promote my sneakers brand. \\n\n",
    "    I need your help with a list of 30 top  influencers i have in this country. Here's how i want it. \\n\n",
    "        1. Name \\n\n",
    "        2. Contant_info \\n\n",
    "        3. Facebook username\\n\n",
    "        4. Instagram username \\n\n",
    "        5. Facebook followers \\n\n",
    "        6. Instagram followers \\n.\n",
    "        7. TikTok username \\n\n",
    "        8. TikTok followers\\n \n",
    "    This information will help me track who to reach out to. \\n \n",
    "    I'm expecting 7 lists as output, it should follow the pattern i listed.\\n\n",
    "    Please do this carefully.\n",
    "\"\"\"\n",
    "influencers_data = combined_influencer_data(prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'Name': 'Noah Beck',\n",
       "  'Contact_info': '@noahbeck',\n",
       "  'Facebook_username': '@noahbeck',\n",
       "  'Instagram_username': '@noahbeck',\n",
       "  'Facebook_followers': '1.5M',\n",
       "  'Instagram_followers': '18.4M',\n",
       "  'TikTok_username': '@noahbeck',\n",
       "  'TikTok_followers': '3.5M'},\n",
       " {'Name': 'Rickey Thompson',\n",
       "  'Contact_info': '@rickeythompson',\n",
       "  'Facebook_username': '@rickeythompson',\n",
       "  'Instagram_username': '@rickeythompson',\n",
       "  'Facebook_followers': '1.2M',\n",
       "  'Instagram_followers': '3.5M',\n",
       "  'TikTok_username': '@rickeythompson',\n",
       "  'TikTok_followers': '2.5M'},\n",
       " {'Name': 'Kristy Scott',\n",
       "  'Contact_info': '@kristy.sarah',\n",
       "  'Facebook_username': '@kristy.sarah',\n",
       "  'Instagram_username': '@kristy.sarah',\n",
       "  'Facebook_followers': '200k',\n",
       "  'Instagram_followers': '2.3M',\n",
       "  'TikTok_username': '@kristy.sarah',\n",
       "  'TikTok_followers': '1.5M'},\n",
       " {'Name': 'Lizzie Armanto',\n",
       "  'Contact_info': '@lizziearmanto',\n",
       "  'Facebook_username': '@lizziearmanto',\n",
       "  'Instagram_username': '@lizziearmanto',\n",
       "  'Facebook_followers': '50k',\n",
       "  'Instagram_followers': '504k',\n",
       "  'TikTok_username': '@lizziearmanto',\n",
       "  'TikTok_followers': '1.2M'},\n",
       " {'Name': 'Allison Bornstein',\n",
       "  'Contact_info': '@anyasreviews',\n",
       "  'Facebook_username': '@anyasreviews',\n",
       "  'Instagram_username': '@anyasreviews',\n",
       "  'Facebook_followers': '20k',\n",
       "  'Instagram_followers': '211k',\n",
       "  'TikTok_username': '@anyasreviews',\n",
       "  'TikTok_followers': '100k'},\n",
       " {'Name': 'Asyraf Ash Samsudin',\n",
       "  'Contact_info': '@iamjaymula',\n",
       "  'Facebook_username': '@iamjaymula',\n",
       "  'Instagram_username': '@iamjaymula',\n",
       "  'Facebook_followers': '10k',\n",
       "  'Instagram_followers': '117k',\n",
       "  'TikTok_username': '@iamjaymula',\n",
       "  'TikTok_followers': '50k'},\n",
       " {'Name': 'Faye Ellaby',\n",
       "  'Contact_info': '@schuylerreeves',\n",
       "  'Facebook_username': '@schuylerreeves',\n",
       "  'Instagram_username': '@schuylerreeves',\n",
       "  'Facebook_followers': '5k',\n",
       "  'Instagram_followers': '90k',\n",
       "  'TikTok_username': '@schuylerreeves',\n",
       "  'TikTok_followers': '20k'},\n",
       " {'Name': 'Josephine Lee',\n",
       "  'Contact_info': '@josephineylee',\n",
       "  'Facebook_username': '@josephineylee',\n",
       "  'Instagram_username': '@josephineylee',\n",
       "  'Facebook_followers': '5k',\n",
       "  'Instagram_followers': '82k',\n",
       "  'TikTok_username': '@josephineylee',\n",
       "  'TikTok_followers': '15k'},\n",
       " {'Name': 'Janelle C Shuttlesworth',\n",
       "  'Contact_info': '@jcheyenne_',\n",
       "  'Facebook_username': '@jcheyenne_',\n",
       "  'Instagram_username': '@jcheyenne_',\n",
       "  'Facebook_followers': '3k',\n",
       "  'Instagram_followers': '78k',\n",
       "  'TikTok_username': '@jcheyenne_',\n",
       "  'TikTok_followers': '10k'},\n",
       " {'Name': 'Terry Biviano',\n",
       "  'Contact_info': '@terrybiviano',\n",
       "  'Facebook_username': '@terrybiviano',\n",
       "  'Instagram_username': '@terrybiviano',\n",
       "  'Facebook_followers': '3k',\n",
       "  'Instagram_followers': '63k',\n",
       "  'TikTok_username': '@terrybiviano',\n",
       "  'TikTok_followers': '5k'},\n",
       " {'Name': 'Angel Celestino',\n",
       "  'Contact_info': '@angelxcelestino',\n",
       "  'Facebook_username': '@angelxcelestino',\n",
       "  'Instagram_username': '@angelxcelestino',\n",
       "  'Facebook_followers': '2k',\n",
       "  'Instagram_followers': '42k',\n",
       "  'TikTok_username': '@angelxcelestino',\n",
       "  'TikTok_followers': '3k'},\n",
       " {'Name': 'Dalton Lanning',\n",
       "  'Contact_info': '@dealingwithdalton',\n",
       "  'Facebook_username': '@dealingwithdalton',\n",
       "  'Instagram_username': '@dealingwithdalton',\n",
       "  'Facebook_followers': '2k',\n",
       "  'Instagram_followers': '39k',\n",
       "  'TikTok_username': '@dealingwithdalton',\n",
       "  'TikTok_followers': '2k'},\n",
       " {'Name': 'Sole Collector',\n",
       "  'Contact_info': 'N/A',\n",
       "  'Facebook_username': 'N/A',\n",
       "  'Instagram_username': 'N/A',\n",
       "  'Facebook_followers': 'N/A',\n",
       "  'Instagram_followers': 'N/A',\n",
       "  'TikTok_username': 'N/A',\n",
       "  'TikTok_followers': 'N/A'},\n",
       " {'Name': 'HYPEBEAST » Footwear',\n",
       "  'Contact_info': 'N/A',\n",
       "  'Facebook_username': 'N/A',\n",
       "  'Instagram_username': 'N/A',\n",
       "  'Facebook_followers': 'N/A',\n",
       "  'Instagram_followers': 'N/A',\n",
       "  'TikTok_username': 'N/A',\n",
       "  'TikTok_followers': 'N/A'},\n",
       " {'Name': 'Nice Kicks',\n",
       "  'Contact_info': 'N/A',\n",
       "  'Facebook_username': 'N/A',\n",
       "  'Instagram_username': 'N/A',\n",
       "  'Facebook_followers': 'N/A',\n",
       "  'Instagram_followers': 'N/A',\n",
       "  'TikTok_username': 'N/A',\n",
       "  'TikTok_followers': 'N/A'},\n",
       " {'Name': 'Sneaker News',\n",
       "  'Contact_info': 'N/A',\n",
       "  'Facebook_username': 'N/A',\n",
       "  'Instagram_username': 'N/A',\n",
       "  'Facebook_followers': 'N/A',\n",
       "  'Instagram_followers': 'N/A',\n",
       "  'TikTok_username': 'N/A',\n",
       "  'TikTok_followers': 'N/A'},\n",
       " {'Name': 'Sneaker Bar Detroit',\n",
       "  'Contact_info': 'N/A',\n",
       "  'Facebook_username': 'N/A',\n",
       "  'Instagram_username': 'N/A',\n",
       "  'Facebook_followers': 'N/A',\n",
       "  'Instagram_followers': 'N/A',\n",
       "  'TikTok_username': 'N/A',\n",
       "  'TikTok_followers': 'N/A'},\n",
       " {'Name': 'Modern Notoriety',\n",
       "  'Contact_info': 'N/A',\n",
       "  'Facebook_username': 'N/A',\n",
       "  'Instagram_username': 'N/A',\n",
       "  'Facebook_followers': 'N/A',\n",
       "  'Instagram_followers': 'N/A',\n",
       "  'TikTok_username': 'N/A',\n",
       "  'TikTok_followers': 'N/A'},\n",
       " {'Name': 'Sneaker Freaker',\n",
       "  'Contact_info': 'N/A',\n",
       "  'Facebook_username': 'N/A',\n",
       "  'Instagram_username': 'N/A',\n",
       "  'Facebook_followers': 'N/A',\n",
       "  'Instagram_followers': 'N/A',\n",
       "  'TikTok_username': 'N/A',\n",
       "  'TikTok_followers': 'N/A'},\n",
       " {'Name': 'SneakerShouts',\n",
       "  'Contact_info': 'N/A',\n",
       "  'Facebook_username': 'N/A',\n",
       "  'Instagram_username': 'N/A',\n",
       "  'Facebook_followers': 'N/A',\n",
       "  'Instagram_followers': 'N/A',\n",
       "  'TikTok_username': 'N/A',\n",
       "  'TikTok_followers': 'N/A'},\n",
       " {'Name': 'Charli D’Amelio',\n",
       "  'Contact_info': '@charlidamelio',\n",
       "  'Facebook_username': '@charlidamelio',\n",
       "  'Instagram_username': '@charlidamelio',\n",
       "  'Facebook_followers': '150M',\n",
       "  'Instagram_followers': '155M',\n",
       "  'TikTok_username': '@charlidamelio',\n",
       "  'TikTok_followers': '50M'},\n",
       " {'Name': 'Chiara Ferragni',\n",
       "  'Contact_info': '@chiaraferragni',\n",
       "  'Facebook_username': '@chiaraferragni',\n",
       "  'Instagram_username': '@chiaraferragni',\n",
       "  'Facebook_followers': '30M',\n",
       "  'Instagram_followers': '28.7M',\n",
       "  'TikTok_username': '@chiaraferragni',\n",
       "  'TikTok_followers': '10M'},\n",
       " {'Name': 'Anya Barefoot',\n",
       "  'Contact_info': '@anyabarefoot',\n",
       "  'Facebook_username': '@anyabarefoot',\n",
       "  'Instagram_username': '@anyabarefoot',\n",
       "  'Facebook_followers': '10k',\n",
       "  'Instagram_followers': '100k',\n",
       "  'TikTok_username': '@anyabarefoot',\n",
       "  'TikTok_followers': '20k'},\n",
       " {'Name': 'Jyothee Murali',\n",
       "  'Contact_info': '@jyotheemurali',\n",
       "  'Facebook_username': '@jyotheemurali',\n",
       "  'Instagram_username': '@jyotheemurali',\n",
       "  'Facebook_followers': '5k',\n",
       "  'Instagram_followers': '50k',\n",
       "  'TikTok_username': '@jyotheemurali',\n",
       "  'TikTok_followers': '10k'},\n",
       " {'Name': 'Asyraf Ash Samsudin',\n",
       "  'Contact_info': '@iamjaymula',\n",
       "  'Facebook_username': '@iamjaymula',\n",
       "  'Instagram_username': '@iamjaymula',\n",
       "  'Facebook_followers': '10k',\n",
       "  'Instagram_followers': '117k',\n",
       "  'TikTok_username': '@iamjaymula',\n",
       "  'TikTok_followers': '50k'},\n",
       " {'Name': 'Lizzie Armanto',\n",
       "  'Contact_info': '@lizziearmanto',\n",
       "  'Facebook_username': '@lizziearmanto',\n",
       "  'Instagram_username': '@lizziearmanto',\n",
       "  'Facebook_followers': '50k',\n",
       "  'Instagram_followers': '504k',\n",
       "  'TikTok_username': '@lizziearmanto',\n",
       "  'TikTok_followers': '1.2M'},\n",
       " {'Name': 'Allison Bornstein',\n",
       "  'Contact_info': '@anyasreviews',\n",
       "  'Facebook_username': '@anyasreviews',\n",
       "  'Instagram_username': '@anyasreviews',\n",
       "  'Facebook_followers': '20k',\n",
       "  'Instagram_followers': '211k',\n",
       "  'TikTok_username': '@anyasreviews',\n",
       "  'TikTok_followers': '100k'},\n",
       " {'Name': 'Kristy Scott',\n",
       "  'Contact_info': '@kristy.sarah',\n",
       "  'Facebook_username': '@kristy.sarah',\n",
       "  'Instagram_username': '@kristy.sarah',\n",
       "  'Facebook_followers': '200k',\n",
       "  'Instagram_followers': '2.3M',\n",
       "  'TikTok_username': '@kristy.sarah',\n",
       "  'TikTok_followers': '1.5M'},\n",
       " {'Name': 'Noah Beck',\n",
       "  'Contact_info': '@noahbeck',\n",
       "  'Facebook_username': '@noahbeck',\n",
       "  'Instagram_username': '@noahbeck',\n",
       "  'Facebook_followers': '1.5M',\n",
       "  'Instagram_followers': '18.4M',\n",
       "  'TikTok_username': '@noahbeck',\n",
       "  'TikTok_followers': '3.5M'},\n",
       " {'Name': 'Rickey Thompson',\n",
       "  'Contact_info': '@rickeythompson',\n",
       "  'Facebook_username': '@rickeythompson',\n",
       "  'Instagram_username': '@rickeythompson',\n",
       "  'Facebook_followers': '1.2M',\n",
       "  'Instagram_followers': '3.5M',\n",
       "  'TikTok_username': '@rickeythompson',\n",
       "  'TikTok_followers': '2.5M'}]"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "influencers_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "product_categories = [\n",
    "    \"Beauty & Skincare\",\n",
    "    \"Fashion & Lifestyle\",\n",
    "    \"Health & Fitness\",\n",
    "    \"Travel & Adventure\",\n",
    "    \"Food & Beverage\",\n",
    "    \"Technology & Gadgets\",\n",
    "    \"Gaming & Esports\",\n",
    "    \"Parenting & Family\",\n",
    "    \"Finance & Business\",\n",
    "    \"Wellness & Mental Health\",\n",
    "    \"Automotive & Motorsports\",\n",
    "    \"Entertainment & Pop Culture\",\n",
    "    \"Photography & Visual Arts\",\n",
    "    \"Education & Learning\",\n",
    "    \"Environmental & Sustainability\"\n",
    "]\n",
    "\n",
    "\n",
    "def product_categorizer(product_lists: list, product_categories=product_categories) -> str:\n",
    "    logger.info(f\"Categorizing products\")\n",
    "    initiator_prompt = PromptTemplate(\n",
    "        template=\"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n",
    "        You are a Brand AI Agent tasked with categorizing products into categories\\n\n",
    "        There are some categories given by the brand, these are the categories the products are expected to be categorized into.\\n\n",
    "        You will be given a list of products and asked to categorize them.\\n\n",
    "        You are meant to categorize all the given product into one category. \\n \n",
    "        You take the following steps: \n",
    "            1. Looking into all the list of products. \\n\n",
    "            2. Understand where they belong to. \\n \n",
    "            3. Look into the kist of categories. \\n \n",
    "            4. Select the category that this products falls under. \\n\n",
    "            \n",
    "        You should return the selected category as output. \\n \n",
    "        Please do this carefully and correctly. \n",
    "        \n",
    "    <|eot_id|><|start_header_id|>user<|end_header_id|>\n",
    "    PRODUCT_LISTS: {product_lists} \\n \n",
    "    PRODUCT_CATEGORY: {product_categories}\n",
    "    \n",
    "    <|eot_id|><|start_header_id|>assistant<|end_header_id|>\"\"\",\n",
    "            input_variables=[\"product_lists\", \"product_categories\"],\n",
    "        )\n",
    "\n",
    "    initiator_router = initiator_prompt | llm | StrOutputParser()\n",
    "    output = initiator_router.invoke({\"product_lists\":product_lists, \"product_categories\":product_categories})\n",
    "    return output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m2024-10-22 22:48:56.971\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mproduct_categorizer\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mCategorizing products\u001b[0m\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'Parenting & Family'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "product_categorizer(['baby oil', 'baby socks', 'baby sweaters'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m2024-10-22 22:48:58.275\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mproduct_categorizer\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mCategorizing products\u001b[0m\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'Fashion & Lifestyle'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "product_categorizer(['sneakers', 'versace', 'nike'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}