diff --git a/.env b/.env index d674969..8596fba 100644 --- a/.env +++ b/.env @@ -1 +1,2 @@ -PERPLEXITY_AI_API = "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" \ No newline at end of file +PERPLEXITY_AI_API = "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" +OPENAI_API_KEY = "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" \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8ae74aa --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +client.json +credentials.json \ No newline at end of file diff --git a/drive_test.ipynb b/drive_test.ipynb new file mode 100644 index 0000000..a9b8cbc --- /dev/null +++ b/drive_test.ipynb @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from __future__ import print_function\n", + "import os\n", + "import google.auth\n", + "import google_auth_oauthlib.flow\n", + "import googleapiclient.discovery\n", + "import googleapiclient.errors\n", + "from google.oauth2.credentials import Credentials" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=165594406980-5f6scupsune6ise7dqp04ti3mmt2eqe9.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A52470%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.readonly&state=TRbdRt2JQkFtZttqScUDD9N4DW7scP&access_type=offline\n" + ] + } + ], + "source": [ + "# If modifying these SCOPES, delete the file token.json.\n", + "SCOPES = ['https://www.googleapis.com/auth/drive.readonly']\n", + "\n", + "def main():\n", + " \"\"\"Shows basic usage of the Drive v3 API.\n", + " Prints the names and ids of the first 10 files the user has access to.\n", + " \"\"\"\n", + " creds = None\n", + " # The file token.json stores the user's access and refresh tokens, and is\n", + " # created automatically when the authorization flow completes for the first time.\n", + " if os.path.exists('token.json'):\n", + " creds = Credentials.from_authorized_user_file('token.json', SCOPES)\n", + " # If there are no (valid) credentials available, let the user log in.\n", + " if not creds or not creds.valid:\n", + " if creds and creds.expired and creds.refresh_token:\n", + " creds.refresh(google.auth.transport.requests.Request())\n", + " else:\n", + " flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file(\n", + " 'credentials.json', SCOPES)\n", + " creds = flow.run_local_server(port=0)\n", + " # Save the credentials for the next run\n", + " with open('token.json', 'w') as token:\n", + " token.write(creds.to_json())\n", + "\n", + " service = googleapiclient.discovery.build('drive', 'v3', credentials=creds)\n", + "\n", + " # Call the Drive v3 API\n", + " results = service.files().list(\n", + " pageSize=10, fields=\"nextPageToken, files(id, name)\").execute()\n", + " items = results.get('files', [])\n", + "\n", + " if not items:\n", + " print('No files found.')\n", + " else:\n", + " print('Files:')\n", + " for item in items:\n", + " print(u'{0} ({1})'.format(item['name'], item['id']))\n", + "\n", + "if __name__ == '__main__':\n", + " main()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/ds_apis.py b/ds_apis.py new file mode 100644 index 0000000..441850a --- /dev/null +++ b/ds_apis.py @@ -0,0 +1,36 @@ +import sys, os +from fastapi import FastAPI, Form, Body +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import FileResponse, JSONResponse +from typing import List, Dict +from pydantic import BaseModel +from utils import product_categorizer + + +app = FastAPI() + +# Define allowed origins for CORS +origins = [ + "http://localhost:5300", + "http://localhost:3000", + # Add other allowed origins here +] + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + + +class ProductRequest(BaseModel): + products: List[str] + +@app.post("/categorize-products/") +async def categorize_products(request: ProductRequest): + categorized_output = product_categorizer(request.products) + return JSONResponse(content={"categorized_products": categorized_output}) diff --git a/experiment.ipynb b/experiment.ipynb index 742fdc1..9e1c3df 100644 --- a/experiment.ipynb +++ b/experiment.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -11,7 +11,7 @@ "True" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -21,13 +21,26 @@ "import os\n", "import requests\n", "from dotenv import load_dotenv\n", - "\n", + "from langchain_openai import ChatOpenAI\n", + "from langchain_core.prompts.prompt import PromptTemplate\n", + "from langchain_core.output_parsers import StrOutputParser, JsonOutputParser\n", + "from loguru import logger\n", "load_dotenv()" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"OPENAI_API_KEY\")\n", + "llm = ChatOpenAI(model=\"gpt-4o\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -36,7 +49,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -90,14 +103,14 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "There are approximately 100 billion stars in the Milky Way Galaxy.\n" + "The Milky Way Galaxy is estimated to contain approximately **100 billion stars**.\n" ] } ], @@ -110,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -118,15 +131,15 @@ "output_type": "stream", "text": [ "```python\n", - "states = [\n", + "states_in_us = [\n", " \"Alabama\", \"Alaska\", \"Arizona\", \"Arkansas\", \"California\", \"Colorado\", \"Connecticut\",\n", - " \"Delaware\", \"Florida\", \"Georgia\", \"Hawaii\", \"Idaho\", \"Illinois\", \"Indiana\",\n", - " \"Iowa\", \"Kansas\", \"Kentucky\", \"Louisiana\", \"Maine\", \"Maryland\", \"Massachusetts\",\n", - " \"Michigan\", \"Minnesota\", \"Mississippi\", \"Missouri\", \"Montana\", \"Nebraska\",\n", - " \"Nevada\", \"New Hampshire\", \"New Jersey\", \"New Mexico\", \"New York\", \"North Carolina\",\n", - " \"North Dakota\", \"Ohio\", \"Oklahoma\", \"Oregon\", \"Pennsylvania\", \"Rhode Island\",\n", - " \"South Carolina\", \"South Dakota\", \"Tennessee\", \"Texas\", \"Utah\", \"Vermont\",\n", - " \"Virginia\", \"Washington\", \"West Virginia\", \"Wisconsin\", \"Wyoming\"\n", + " \"Delaware\", \"Florida\", \"Georgia\", \"Hawaii\", \"Idaho\", \"Illinois\", \"Indiana\", \"Iowa\",\n", + " \"Kansas\", \"Kentucky\", \"Louisiana\", \"Maine\", \"Maryland\", \"Massachusetts\", \"Michigan\",\n", + " \"Minnesota\", \"Mississippi\", \"Missouri\", \"Montana\", \"Nebraska\", \"Nevada\", \"New Hampshire\",\n", + " \"New Jersey\", \"New Mexico\", \"New York\", \"North Carolina\", \"North Dakota\", \"Ohio\",\n", + " \"Oklahoma\", \"Oregon\", \"Pennsylvania\", \"Rhode Island\", \"South Carolina\", \"South Dakota\",\n", + " \"Tennessee\", \"Texas\", \"Utah\", \"Vermont\", \"Virginia\", \"Washington\", \"West Virginia\",\n", + " \"Wisconsin\", \"Wyoming\"\n", "]\n", "```\n" ] @@ -141,7 +154,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -149,7 +162,7 @@ "output_type": "stream", "text": [ "```python\n", - "['England', 'Scotland', 'Wales', 'Northern Ireland']\n", + "[\"England\", \"Scotland\", \"Wales\", \"Northern Ireland\"]\n", "```\n" ] } @@ -163,7 +176,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -171,7 +184,7 @@ "output_type": "stream", "text": [ "```python\n", - "states = [\n", + "[\n", " \"Baden-Württemberg\",\n", " \"Bavaria\",\n", " \"Berlin\",\n", @@ -202,7 +215,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -210,7 +223,37 @@ "output_type": "stream", "text": [ "```python\n", - "['Retail', 'Fashion', 'Department Stores', 'Online Shops', 'Grocery Stores', 'Supercenters', 'Hypermarkets', 'Neobanking', 'Neobrokerage', 'Fintech', 'Automotive', 'Finance', 'Consumer Goods', 'Sneakers', 'Sportswear', 'Soft Drinks', 'Jewelry', 'Cosmetics', 'Insurance', 'Telecommunications', 'Hotels', 'Airlines', 'Banks', 'Online Grocery Delivery', 'Men\\'s Shop', 'NBA', 'NCAA', 'NFL', 'College Football']\n", + "categories_of_brands = [\n", + " \"Corporate Brands\",\n", + " \"Product Brands\",\n", + " \"Sneakers and Sportswear\",\n", + " \"Soft Drinks\",\n", + " \"Neobanking and Neobrokerage\",\n", + " \"Banks\",\n", + " \"Grocery Stores\",\n", + " \"Online Grocery Delivery\",\n", + " \"Male Cosmetics\",\n", + " \"Jewelry\",\n", + " \"Apparel\",\n", + " \"Clothing\",\n", + " \"Accessories\",\n", + " \"Footwear\",\n", + " \"Online Fashion Stores\",\n", + " \"Department Stores\",\n", + " \"Supercenters & Hypermarkets\",\n", + " \"B2C E-Commerce\",\n", + " \"Insurance\",\n", + " \"Telecommunications\",\n", + " \"Airlines\",\n", + " \"Hotels\",\n", + " \"Automobiles\",\n", + " \"Electronics\",\n", + " \"Home Improvement\",\n", + " \"Pharmaceuticals\",\n", + " \"Health and Wellness\",\n", + " \"Food and Beverages\",\n", + " \"Retail\"\n", + "]\n", "```\n" ] } @@ -224,26 +267,105 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "1. **Name** | **Email/Contact** | **Location** | **Facebook Username** | **Instagram Username** | **Facebook Followers** | **Instagram Followers**\n", - "2. **Sharaaa** | N/A | Birmingham, AL | N/A | @kashararachele | 11.9k | 11.9k\n", - "3. **Jaz** | N/A | Birmingham, AL | N/A | @birmingham_finest | 11.1k | 11.1k\n", - "4. **Randall Woodfin** | N/A | Birmingham, AL | N/A | @randall_woodfin | N/A | 17.2k\n", - "5. **Micah Myricks** | N/A | Birmingham, AL | N/A | @thecoolestguyuno | N/A | 10.2k\n", - "6. **KC** | N/A | Birmingham, AL | N/A | @_imonlykc | N/A | 12k\n", - "7. **The Raveen Smith** | N/A | Birmingham, AL | N/A | @noflashplzz | N/A | 15.8k\n", - "8. **Bianca Sha'nel Patton** | N/A | Birmingham, AL | N/A | @biancasobad | N/A | 35.1k\n", - "9. **Mare Thomas** | N/A | Birmingham, AL | N/A | @savannahbrasfield | N/A | 15.5k\n", - "10. **Destinee Ariee** | N/A | Birmingham, AL | N/A | @therealddestiny | N/A | 24k\n", - "11. **Dewayne Mcbride** | N/A | Birmingham, AL | N/A | @queen.slim | N/A | 10.9k\n", + "Here are the lists of influencers in Alabama that you can consider for promoting your sneakers brand, based on the provided data:\n", "\n", - "Note: The email and contact details are not available in the provided sources.\n" + "## Name\n", + "- j6lack_\n", + "- Sharaaa\n", + "- Jaz\n", + "- Randall Woodfin\n", + "- Micah Myricks\n", + "- KC\n", + "- THE RAVEN SMITH\n", + "- Bianca Sha'nel Patton\n", + "- Mare Thomas\n", + "- Destinee Ariee\n", + "- Dewayne Mcbride\n", + "\n", + "## Email or Contact\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "\n", + "## Location\n", + "- Birmingham\n", + "- Birmingham\n", + "- Birmingham\n", + "- Birmingham\n", + "- Birmingham\n", + "- Birmingham\n", + "- Birmingham\n", + "- Birmingham\n", + "- Birmingham\n", + "- Birmingham\n", + "- Birmingham\n", + "\n", + "## Facebook Username\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "\n", + "## Instagram Username\n", + "- @j6lack_\n", + "- @kashararachele\n", + "- @birmingham_finest\n", + "- @randall_woodfin\n", + "- @thecoolestguyuno\n", + "- @_imonlykc\n", + "- @noflashplzz\n", + "- @biancasobad\n", + "- @savannahbrasfield\n", + "- @therealddestiny\n", + "- @queen.slim\n", + "\n", + "## Number of Facebook Followers\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "- Not available in the source data\n", + "\n", + "## Number of Instagram Followers\n", + "- 15.1k\n", + "- 11.9k\n", + "- 11.1k\n", + "- 17.2k\n", + "- 10.2k\n", + "- 12k\n", + "- 15.8k\n", + "- 35.1k\n", + "- 15.5k\n", + "- 24k\n", + "- 10.9k\n" ] } ], @@ -251,7 +373,7 @@ "prompt = \"\"\"\n", " I'm looking for influencers that i need to promote my sneakers brand. My current location is Alabama in the US. \\n\n", " I need help with looking for influencers in my state. People that i can reach out to, and they will help me promote my sneakers brand. \\n\n", - " I need your help with a list of all the influencers i have in this state. Here's how i want it. \\n\n", + " I need your help with a python list of all the influencers i have in this state. Here's how i want it. \\n\n", " 1. Name \\n\n", " 2. Email or contant \\n\n", " 3. Location \\n\n", @@ -270,116 +392,44 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "```python\n", - "# List of influencers in Alabama\n", + "Here are the lists of influencers in Alabama, formatted as requested:\n", "\n", - "name = [\n", - " \"J6lack_\",\n", - " \"Sharaaa\",\n", - " \"Jaz šŸ–¤\",\n", - " \"Randall Woodfin\",\n", - " \"Micah Myricks\",\n", - " \"_imonlykc\",\n", - " \"THE RAVEN SMITH\",\n", - " \"Bianca Sha'nel Patton\",\n", - " \"Mare Thomas\",\n", - " \"Destinee Ariee\",\n", - " \"Dewayne Mcbride\"\n", - "]\n", - "\n", - "contact_info = [\n", - " \"Check contact details\",\n", - " \"Check contact details\",\n", - " \"Check contact details\",\n", - " \"Check contact details\",\n", - " \"Check contact details\",\n", - " \"Check contact details\",\n", - " \"Check contact details\",\n", - " \"Check contact details\",\n", - " \"Check contact details\",\n", - " \"Check contact details\",\n", - " \"Check contact details\"\n", - "]\n", - "\n", - "location = [\n", - " \"Birmingham\",\n", - " \"Birmingham\",\n", - " \"Birmingham\",\n", - " \"Birmingham\",\n", - " \"Birmingham\",\n", - " \"Birmingham\",\n", - " \"Birmingham\",\n", - " \"Birmingham\",\n", - " \"Birmingham\",\n", - " \"Birmingham\",\n", - " \"Birmingham\"\n", - "]\n", - "\n", - "facebook_username = [\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None\n", - "]\n", - "\n", - "instagram_username = [\n", - " \"@j6lack_\",\n", - " \"@kashararachele\",\n", - " \"@birmingham_finest\",\n", - " \"@randall_woodfin\",\n", - " \"@thecoolestguyuno\",\n", - " \"@_imonlykc\",\n", - " \"@noflashplzz\",\n", - " \"@biancasobad\",\n", - " \"@savannahbrasfield\",\n", - " \"@therealddestiny\",\n", - " \"@queen.slim\"\n", - "]\n", - "\n", - "number_of_facebook_followers = [\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None,\n", - " None\n", - "]\n", - "\n", - "number_of_instagram_followers = [\n", - " \"15.1k\",\n", - " \"11.9k\",\n", - " \"11.1k\",\n", - " \"17.2k\",\n", - " \"10.2k\",\n", - " \"12k\",\n", - " \"15.8k\",\n", - " \"35.1k\",\n", - " \"15.5k\",\n", - " \"24k\",\n", - " \"10.9k\"\n", - "]\n", + "### Name:\n", + "```\n", + "['j6lack_', 'Sharaaa', 'Jaz', 'Randall Woodfin', 'Micah Myricks', '_imonlykc', 'THE RAVEN SMITH', 'Bianca Sha'nel Patton', 'Mare Thomas', 'Destinee Ariee', 'Dewayne Mcbride']\n", "```\n", "\n", - "This list includes the requested information for the top 11 influencers in Alabama, as per the provided data. Note that Facebook usernames and follower counts are not available in the source data, so they are listed as `None` in the respective lists.\n" + "### Contact Info:\n", + "```\n", + "['Check contact details', 'Check contact details', 'Check contact details', 'Check contact details', 'Check contact details', 'Check contact details', 'Check contact details', 'Check contact details', 'Check contact details', 'Check contact details', 'Check contact details']\n", + "```\n", + "\n", + "### Facebook Username:\n", + "```\n", + "['', '', '', '', '', '', '', '', '', '', '']\n", + "```\n", + "\n", + "### Instagram Username:\n", + "```\n", + "['@j6lack_', '@kashararachele', '@birmingham_finest', '@randall_woodfin', '@thecoolestguyuno', '@_imonlykc', '@noflashplzz', '@biancasobad', '@savannahbrasfield', '@therealddestiny', '@queen.slim']\n", + "```\n", + "\n", + "### Number of Facebook Followers:\n", + "```\n", + "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", + "```\n", + "\n", + "### Number of Instagram Followers:\n", + "```\n", + "[15.1k, 11.9k, 11.1k, 17.2k, 10.2k, 12k, 15.8k, 35.1k, 15.5k, 24k, 10.9k]\n", + "```\n" ] } ], @@ -387,14 +437,13 @@ "prompt = \"\"\"\n", " I'm looking for influencers that i need to promote my sneakers brand. My current location is Alabama in the US. \\n\n", " I need help with looking for influencers in my state. People that i can reach out to, and they will help me promote my sneakers brand. \\n\n", - " I need your help with a list of 50 influencers i have in this state. Here's how i want it. \\n\n", + " I need your help with a list of 30 influencers i have in this state. Here's how i want it. \\n\n", " 1. Name \\n\n", " 2. Contant info \\n\n", - " 3. Location \\n\n", - " 4. Facebook Username\\n\n", - " 5. Instagram Username \\n\n", - " 6. Number of facebook followers \\n\n", - " 7. Number of instagram followers \\n.\n", + " 3. Facebook Username\\n\n", + " 4. Instagram Username \\n\n", + " 5. Number of facebook followers \\n\n", + " 6. Number of instagram followers \\n.\n", " This information will help me track who to reach out to. \\n \n", " I'm expecting 7 lists as output, it should follow the pattern i listed.\\n\n", " Do not add other explanations to it, just return the lists alone. Make them come out in separate python lists, like this []. So i will have 7 in total. \\n\n", @@ -406,6 +455,558 @@ "print(result)\n" ] }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "def influencer_data(search_result: str, prompt:str) -> dict:\n", + " logger.info(f\"Formatting Influencer Data\")\n", + " initiator_prompt = PromptTemplate(\n", + " template=\"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n", + " You are a Influencer Data Extractor AI Agent tasked with extracting information from a search result\\n\n", + " Backstory: \\n\n", + " A business brand is looking for some influencer in a specific location or area. He used a tool called Perplexity AI to get this data. \\n\n", + " This is an amazing too, and yet it can struggle a bit (ai isn't perfect you know). \\n \n", + " The information returned by this ai can be incomplete, not properly structured and all. \\n \n", + " This brand needs youe help. \\n \n", + " This is how you will help the brand: \\n \n", + " 1. You will be given two things, the ai search result and the prompt used to query the ai. \\n\n", + " 2. Your major interest here is formatting and making the structure right. \\n \n", + " 3. You will process the ai result, create a JSON structure of the key information needed by the user and add the corresponding values to it. \\n\n", + " 4. The user prompt is your guide on of how the JSON should be formatted. \\n\n", + " 5. If there's are missing data or something in the ai response, send it back as NA. \\n \n", + " 6. At the end of your processing you want to return a structured response and also make sure it in the best order as expected by the user. \\n\n", + " \n", + " Return a structured JSON or dictionary as output. \\n \n", + " Ensure that the data is properly arranged and in a good format. \\n\n", + " Please do this carefully and excellently. \n", + " \n", + " <|eot_id|><|start_header_id|>user<|end_header_id|>\n", + " AI_SEARCH_RESULT: {search_result} \\n \n", + " PROMPT: {prompt}\n", + " \n", + " <|eot_id|><|start_header_id|>assistant<|end_header_id|>\"\"\",\n", + " input_variables=[\"search_result\", \"prompt\"],\n", + " )\n", + "\n", + " initiator_router = initiator_prompt | llm | JsonOutputParser()\n", + " output = initiator_router.invoke({\"search_result\":search_result, \"prompt\":prompt})\n", + " return output" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-10-22 23:13:57.781\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36minfluencer_data\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mCategorizing products\u001b[0m\n" + ] + } + ], + "source": [ + "data_result = influencer_data(result, prompt)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Name': ['j6lack_',\n", + " 'Sharaaa',\n", + " 'Jaz',\n", + " 'Randall Woodfin',\n", + " 'Micah Myricks',\n", + " '_imonlykc',\n", + " 'THE RAVEN SMITH',\n", + " \"Bianca Sha'nel Patton\",\n", + " 'Mare Thomas',\n", + " 'Destinee Ariee',\n", + " 'Dewayne Mcbride'],\n", + " 'Contact Info': ['Check contact details',\n", + " 'Check contact details',\n", + " 'Check contact details',\n", + " 'Check contact details',\n", + " 'Check contact details',\n", + " 'Check contact details',\n", + " 'Check contact details',\n", + " 'Check contact details',\n", + " 'Check contact details',\n", + " 'Check contact details',\n", + " 'Check contact details'],\n", + " 'Facebook Username': ['', '', '', '', '', '', '', '', '', '', ''],\n", + " 'Instagram Username': ['@j6lack_',\n", + " '@kashararachele',\n", + " '@birmingham_finest',\n", + " '@randall_woodfin',\n", + " '@thecoolestguyuno',\n", + " '@_imonlykc',\n", + " '@noflashplzz',\n", + " '@biancasobad',\n", + " '@savannahbrasfield',\n", + " '@therealddestiny',\n", + " '@queen.slim'],\n", + " 'Number of Facebook Followers': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", + " 'Number of Instagram Followers': [15100,\n", + " 11900,\n", + " 11100,\n", + " 17200,\n", + " 10200,\n", + " 12000,\n", + " 15800,\n", + " 35100,\n", + " 15500,\n", + " 24000,\n", + " 10900]}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_result" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "def combined_influencer_data(prompt: str) -> dict:\n", + " # Step 1: Get data using the get_chat_completion function\n", + " logger.info(\"Using Perplexity Ai to get the influencer data\")\n", + " search_result = get_chat_completion(prompt)\n", + " \n", + " # Step 2: Process the search result using the influencer_data function\n", + " logger.info(\"Formatting the data with OpenAI\")\n", + " formatted_data = influencer_data(search_result, prompt)\n", + " \n", + " # Step 3: Return the final output\n", + " return formatted_data" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-10-22 23:27:16.425\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcombined_influencer_data\u001b[0m:\u001b[36m3\u001b[0m - \u001b[1mUsing Perplexity Ai to get the influencer data\u001b[0m\n", + "\u001b[32m2024-10-22 23:27:27.610\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcombined_influencer_data\u001b[0m:\u001b[36m7\u001b[0m - \u001b[1mFormatting the data with OpenAI\u001b[0m\n", + "\u001b[32m2024-10-22 23:27:27.611\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36minfluencer_data\u001b[0m:\u001b[36m2\u001b[0m - \u001b[1mFormatting Influencer Data\u001b[0m\n" + ] + } + ], + "source": [ + "prompt = \"\"\"\n", + " I'm looking for influencers that i need to promote my sneakers brand. My target is the USA. \\n\n", + " I need help with looking for influencers in my target country. People that i can reach out to, and they will help me promote my sneakers brand. \\n\n", + " I need your help with a list of 30 top influencers i have in this country. Here's how i want it. \\n\n", + " 1. Name \\n\n", + " 2. Contant_info \\n\n", + " 3. Facebook username\\n\n", + " 4. Instagram username \\n\n", + " 5. Facebook followers \\n\n", + " 6. Instagram followers \\n.\n", + " 7. TikTok username \\n\n", + " 8. TikTok followers\\n \n", + " This information will help me track who to reach out to. \\n \n", + " I'm expecting 7 lists as output, it should follow the pattern i listed.\\n\n", + " Please do this carefully.\n", + "\"\"\"\n", + "influencers_data = combined_influencer_data(prompt)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'Name': 'Noah Beck',\n", + " 'Contact_info': '@noahbeck',\n", + " 'Facebook_username': '@noahbeck',\n", + " 'Instagram_username': '@noahbeck',\n", + " 'Facebook_followers': '1.5M',\n", + " 'Instagram_followers': '18.4M',\n", + " 'TikTok_username': '@noahbeck',\n", + " 'TikTok_followers': '3.5M'},\n", + " {'Name': 'Rickey Thompson',\n", + " 'Contact_info': '@rickeythompson',\n", + " 'Facebook_username': '@rickeythompson',\n", + " 'Instagram_username': '@rickeythompson',\n", + " 'Facebook_followers': '1.2M',\n", + " 'Instagram_followers': '3.5M',\n", + " 'TikTok_username': '@rickeythompson',\n", + " 'TikTok_followers': '2.5M'},\n", + " {'Name': 'Kristy Scott',\n", + " 'Contact_info': '@kristy.sarah',\n", + " 'Facebook_username': '@kristy.sarah',\n", + " 'Instagram_username': '@kristy.sarah',\n", + " 'Facebook_followers': '200k',\n", + " 'Instagram_followers': '2.3M',\n", + " 'TikTok_username': '@kristy.sarah',\n", + " 'TikTok_followers': '1.5M'},\n", + " {'Name': 'Lizzie Armanto',\n", + " 'Contact_info': '@lizziearmanto',\n", + " 'Facebook_username': '@lizziearmanto',\n", + " 'Instagram_username': '@lizziearmanto',\n", + " 'Facebook_followers': '50k',\n", + " 'Instagram_followers': '504k',\n", + " 'TikTok_username': '@lizziearmanto',\n", + " 'TikTok_followers': '1.2M'},\n", + " {'Name': 'Allison Bornstein',\n", + " 'Contact_info': '@anyasreviews',\n", + " 'Facebook_username': '@anyasreviews',\n", + " 'Instagram_username': '@anyasreviews',\n", + " 'Facebook_followers': '20k',\n", + " 'Instagram_followers': '211k',\n", + " 'TikTok_username': '@anyasreviews',\n", + " 'TikTok_followers': '100k'},\n", + " {'Name': 'Asyraf Ash Samsudin',\n", + " 'Contact_info': '@iamjaymula',\n", + " 'Facebook_username': '@iamjaymula',\n", + " 'Instagram_username': '@iamjaymula',\n", + " 'Facebook_followers': '10k',\n", + " 'Instagram_followers': '117k',\n", + " 'TikTok_username': '@iamjaymula',\n", + " 'TikTok_followers': '50k'},\n", + " {'Name': 'Faye Ellaby',\n", + " 'Contact_info': '@schuylerreeves',\n", + " 'Facebook_username': '@schuylerreeves',\n", + " 'Instagram_username': '@schuylerreeves',\n", + " 'Facebook_followers': '5k',\n", + " 'Instagram_followers': '90k',\n", + " 'TikTok_username': '@schuylerreeves',\n", + " 'TikTok_followers': '20k'},\n", + " {'Name': 'Josephine Lee',\n", + " 'Contact_info': '@josephineylee',\n", + " 'Facebook_username': '@josephineylee',\n", + " 'Instagram_username': '@josephineylee',\n", + " 'Facebook_followers': '5k',\n", + " 'Instagram_followers': '82k',\n", + " 'TikTok_username': '@josephineylee',\n", + " 'TikTok_followers': '15k'},\n", + " {'Name': 'Janelle C Shuttlesworth',\n", + " 'Contact_info': '@jcheyenne_',\n", + " 'Facebook_username': '@jcheyenne_',\n", + " 'Instagram_username': '@jcheyenne_',\n", + " 'Facebook_followers': '3k',\n", + " 'Instagram_followers': '78k',\n", + " 'TikTok_username': '@jcheyenne_',\n", + " 'TikTok_followers': '10k'},\n", + " {'Name': 'Terry Biviano',\n", + " 'Contact_info': '@terrybiviano',\n", + " 'Facebook_username': '@terrybiviano',\n", + " 'Instagram_username': '@terrybiviano',\n", + " 'Facebook_followers': '3k',\n", + " 'Instagram_followers': '63k',\n", + " 'TikTok_username': '@terrybiviano',\n", + " 'TikTok_followers': '5k'},\n", + " {'Name': 'Angel Celestino',\n", + " 'Contact_info': '@angelxcelestino',\n", + " 'Facebook_username': '@angelxcelestino',\n", + " 'Instagram_username': '@angelxcelestino',\n", + " 'Facebook_followers': '2k',\n", + " 'Instagram_followers': '42k',\n", + " 'TikTok_username': '@angelxcelestino',\n", + " 'TikTok_followers': '3k'},\n", + " {'Name': 'Dalton Lanning',\n", + " 'Contact_info': '@dealingwithdalton',\n", + " 'Facebook_username': '@dealingwithdalton',\n", + " 'Instagram_username': '@dealingwithdalton',\n", + " 'Facebook_followers': '2k',\n", + " 'Instagram_followers': '39k',\n", + " 'TikTok_username': '@dealingwithdalton',\n", + " 'TikTok_followers': '2k'},\n", + " {'Name': 'Sole Collector',\n", + " 'Contact_info': 'N/A',\n", + " 'Facebook_username': 'N/A',\n", + " 'Instagram_username': 'N/A',\n", + " 'Facebook_followers': 'N/A',\n", + " 'Instagram_followers': 'N/A',\n", + " 'TikTok_username': 'N/A',\n", + " 'TikTok_followers': 'N/A'},\n", + " {'Name': 'HYPEBEAST Ā» Footwear',\n", + " 'Contact_info': 'N/A',\n", + " 'Facebook_username': 'N/A',\n", + " 'Instagram_username': 'N/A',\n", + " 'Facebook_followers': 'N/A',\n", + " 'Instagram_followers': 'N/A',\n", + " 'TikTok_username': 'N/A',\n", + " 'TikTok_followers': 'N/A'},\n", + " {'Name': 'Nice Kicks',\n", + " 'Contact_info': 'N/A',\n", + " 'Facebook_username': 'N/A',\n", + " 'Instagram_username': 'N/A',\n", + " 'Facebook_followers': 'N/A',\n", + " 'Instagram_followers': 'N/A',\n", + " 'TikTok_username': 'N/A',\n", + " 'TikTok_followers': 'N/A'},\n", + " {'Name': 'Sneaker News',\n", + " 'Contact_info': 'N/A',\n", + " 'Facebook_username': 'N/A',\n", + " 'Instagram_username': 'N/A',\n", + " 'Facebook_followers': 'N/A',\n", + " 'Instagram_followers': 'N/A',\n", + " 'TikTok_username': 'N/A',\n", + " 'TikTok_followers': 'N/A'},\n", + " {'Name': 'Sneaker Bar Detroit',\n", + " 'Contact_info': 'N/A',\n", + " 'Facebook_username': 'N/A',\n", + " 'Instagram_username': 'N/A',\n", + " 'Facebook_followers': 'N/A',\n", + " 'Instagram_followers': 'N/A',\n", + " 'TikTok_username': 'N/A',\n", + " 'TikTok_followers': 'N/A'},\n", + " {'Name': 'Modern Notoriety',\n", + " 'Contact_info': 'N/A',\n", + " 'Facebook_username': 'N/A',\n", + " 'Instagram_username': 'N/A',\n", + " 'Facebook_followers': 'N/A',\n", + " 'Instagram_followers': 'N/A',\n", + " 'TikTok_username': 'N/A',\n", + " 'TikTok_followers': 'N/A'},\n", + " {'Name': 'Sneaker Freaker',\n", + " 'Contact_info': 'N/A',\n", + " 'Facebook_username': 'N/A',\n", + " 'Instagram_username': 'N/A',\n", + " 'Facebook_followers': 'N/A',\n", + " 'Instagram_followers': 'N/A',\n", + " 'TikTok_username': 'N/A',\n", + " 'TikTok_followers': 'N/A'},\n", + " {'Name': 'SneakerShouts',\n", + " 'Contact_info': 'N/A',\n", + " 'Facebook_username': 'N/A',\n", + " 'Instagram_username': 'N/A',\n", + " 'Facebook_followers': 'N/A',\n", + " 'Instagram_followers': 'N/A',\n", + " 'TikTok_username': 'N/A',\n", + " 'TikTok_followers': 'N/A'},\n", + " {'Name': 'Charli D’Amelio',\n", + " 'Contact_info': '@charlidamelio',\n", + " 'Facebook_username': '@charlidamelio',\n", + " 'Instagram_username': '@charlidamelio',\n", + " 'Facebook_followers': '150M',\n", + " 'Instagram_followers': '155M',\n", + " 'TikTok_username': '@charlidamelio',\n", + " 'TikTok_followers': '50M'},\n", + " {'Name': 'Chiara Ferragni',\n", + " 'Contact_info': '@chiaraferragni',\n", + " 'Facebook_username': '@chiaraferragni',\n", + " 'Instagram_username': '@chiaraferragni',\n", + " 'Facebook_followers': '30M',\n", + " 'Instagram_followers': '28.7M',\n", + " 'TikTok_username': '@chiaraferragni',\n", + " 'TikTok_followers': '10M'},\n", + " {'Name': 'Anya Barefoot',\n", + " 'Contact_info': '@anyabarefoot',\n", + " 'Facebook_username': '@anyabarefoot',\n", + " 'Instagram_username': '@anyabarefoot',\n", + " 'Facebook_followers': '10k',\n", + " 'Instagram_followers': '100k',\n", + " 'TikTok_username': '@anyabarefoot',\n", + " 'TikTok_followers': '20k'},\n", + " {'Name': 'Jyothee Murali',\n", + " 'Contact_info': '@jyotheemurali',\n", + " 'Facebook_username': '@jyotheemurali',\n", + " 'Instagram_username': '@jyotheemurali',\n", + " 'Facebook_followers': '5k',\n", + " 'Instagram_followers': '50k',\n", + " 'TikTok_username': '@jyotheemurali',\n", + " 'TikTok_followers': '10k'},\n", + " {'Name': 'Asyraf Ash Samsudin',\n", + " 'Contact_info': '@iamjaymula',\n", + " 'Facebook_username': '@iamjaymula',\n", + " 'Instagram_username': '@iamjaymula',\n", + " 'Facebook_followers': '10k',\n", + " 'Instagram_followers': '117k',\n", + " 'TikTok_username': '@iamjaymula',\n", + " 'TikTok_followers': '50k'},\n", + " {'Name': 'Lizzie Armanto',\n", + " 'Contact_info': '@lizziearmanto',\n", + " 'Facebook_username': '@lizziearmanto',\n", + " 'Instagram_username': '@lizziearmanto',\n", + " 'Facebook_followers': '50k',\n", + " 'Instagram_followers': '504k',\n", + " 'TikTok_username': '@lizziearmanto',\n", + " 'TikTok_followers': '1.2M'},\n", + " {'Name': 'Allison Bornstein',\n", + " 'Contact_info': '@anyasreviews',\n", + " 'Facebook_username': '@anyasreviews',\n", + " 'Instagram_username': '@anyasreviews',\n", + " 'Facebook_followers': '20k',\n", + " 'Instagram_followers': '211k',\n", + " 'TikTok_username': '@anyasreviews',\n", + " 'TikTok_followers': '100k'},\n", + " {'Name': 'Kristy Scott',\n", + " 'Contact_info': '@kristy.sarah',\n", + " 'Facebook_username': '@kristy.sarah',\n", + " 'Instagram_username': '@kristy.sarah',\n", + " 'Facebook_followers': '200k',\n", + " 'Instagram_followers': '2.3M',\n", + " 'TikTok_username': '@kristy.sarah',\n", + " 'TikTok_followers': '1.5M'},\n", + " {'Name': 'Noah Beck',\n", + " 'Contact_info': '@noahbeck',\n", + " 'Facebook_username': '@noahbeck',\n", + " 'Instagram_username': '@noahbeck',\n", + " 'Facebook_followers': '1.5M',\n", + " 'Instagram_followers': '18.4M',\n", + " 'TikTok_username': '@noahbeck',\n", + " 'TikTok_followers': '3.5M'},\n", + " {'Name': 'Rickey Thompson',\n", + " 'Contact_info': '@rickeythompson',\n", + " 'Facebook_username': '@rickeythompson',\n", + " 'Instagram_username': '@rickeythompson',\n", + " 'Facebook_followers': '1.2M',\n", + " 'Instagram_followers': '3.5M',\n", + " 'TikTok_username': '@rickeythompson',\n", + " 'TikTok_followers': '2.5M'}]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "influencers_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "product_categories = [\n", + " \"Beauty & Skincare\",\n", + " \"Fashion & Lifestyle\",\n", + " \"Health & Fitness\",\n", + " \"Travel & Adventure\",\n", + " \"Food & Beverage\",\n", + " \"Technology & Gadgets\",\n", + " \"Gaming & Esports\",\n", + " \"Parenting & Family\",\n", + " \"Finance & Business\",\n", + " \"Wellness & Mental Health\",\n", + " \"Automotive & Motorsports\",\n", + " \"Entertainment & Pop Culture\",\n", + " \"Photography & Visual Arts\",\n", + " \"Education & Learning\",\n", + " \"Environmental & Sustainability\"\n", + "]\n", + "\n", + "\n", + "def product_categorizer(product_lists: list, product_categories=product_categories) -> str:\n", + " logger.info(f\"Categorizing products\")\n", + " initiator_prompt = PromptTemplate(\n", + " template=\"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n", + " You are a Brand AI Agent tasked with categorizing products into categories\\n\n", + " There are some categories given by the brand, these are the categories the products are expected to be categorized into.\\n\n", + " You will be given a list of products and asked to categorize them.\\n\n", + " You are meant to categorize all the given product into one category. \\n \n", + " You take the following steps: \n", + " 1. Looking into all the list of products. \\n\n", + " 2. Understand where they belong to. \\n \n", + " 3. Look into the kist of categories. \\n \n", + " 4. Select the category that this products falls under. \\n\n", + " \n", + " You should return the selected category as output. \\n \n", + " Please do this carefully and correctly. \n", + " \n", + " <|eot_id|><|start_header_id|>user<|end_header_id|>\n", + " PRODUCT_LISTS: {product_lists} \\n \n", + " PRODUCT_CATEGORY: {product_categories}\n", + " \n", + " <|eot_id|><|start_header_id|>assistant<|end_header_id|>\"\"\",\n", + " input_variables=[\"product_lists\", \"product_categories\"],\n", + " )\n", + "\n", + " initiator_router = initiator_prompt | llm | StrOutputParser()\n", + " output = initiator_router.invoke({\"product_lists\":product_lists, \"product_categories\":product_categories})\n", + " return output" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-10-22 22:48:56.971\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mproduct_categorizer\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mCategorizing products\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'Parenting & Family'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "product_categorizer(['baby oil', 'baby socks', 'baby sweaters'])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-10-22 22:48:58.275\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mproduct_categorizer\u001b[0m:\u001b[36m34\u001b[0m - \u001b[1mCategorizing products\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'Fashion & Lifestyle'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "product_categorizer(['sneakers', 'versace', 'nike'])" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..f2fc623 --- /dev/null +++ b/utils.py @@ -0,0 +1,160 @@ +from langchain_openai import ChatOpenAI +from langchain_core.prompts.prompt import PromptTemplate +from langchain_core.output_parsers import StrOutputParser, JsonOutputParser +import os +import requests +from loguru import logger +from dotenv import load_dotenv +load_dotenv() + + +os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") +PERPLEXITYAI_API_KEY = os.getenv('PERPLEXITY_AI_API') +llm = ChatOpenAI(model="gpt-4o") + + + +def get_chat_completion(prompt, api_key=PERPLEXITYAI_API_KEY): + url = "https://api.perplexity.ai/chat/completions" + + payload = { + "model": "llama-3.1-sonar-small-128k-online", + "messages": [ + { + "role": "system", + "content": "Be precise and concise." + }, + { + "role": "user", + "content": prompt + } + ], + "temperature": 0.2, + "top_p": 0.9, + "return_citations": True, + "search_domain_filter": ["perplexity.ai"], + "return_images": False, + "return_related_questions": False, + "search_recency_filter": "month", + "top_k": 0, + "stream": False, + "presence_penalty": 0, + "frequency_penalty": 1 + } + + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json" + } + + response = requests.post(url, json=payload, headers=headers) + + # Check if the request was successful + if response.status_code == 200: + response_data = response.json() + try: + # Extract the message content + message_content = response_data['choices'][0]['message']['content'] + return message_content + except (KeyError, IndexError): + return "Unexpected response format." + else: + return f"Request failed with status code: {response.status_code}" + + +def influencer_data(search_result: str, prompt:str) -> dict: + logger.info(f"Formatting Influencer Data") + initiator_prompt = PromptTemplate( + template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> + You are a Influencer Data Extractor AI Agent tasked with extracting information from a search result\n + Backstory: \n + A business brand is looking for some influencer in a specific location or area. He used a tool called Perplexity AI to get this data. \n + This is an amazing too, and yet it can struggle a bit (ai isn't perfect you know). \n + The information returned by this ai can be incomplete, not properly structured and all. \n + This brand needs youe help. \n + This is how you will help the brand: \n + 1. You will be given two things, the ai search result and the prompt used to query the ai. \n + 2. Your major interest here is formatting and making the structure right. \n + 3. You will process the ai result, create a JSON structure of the key information needed by the user and add the corresponding values to it. \n + 4. The user prompt is your guide on of how the JSON should be formatted. \n + 5. If there's are missing data or something in the ai response, send it back as NA. \n + 6. At the end of your processing you want to return a structured response and also make sure it in the best order as expected by the user. \n + + Return a structured JSON or dictionary as output. \n + Ensure that the data is properly arranged and in a good format. \n + Please do this carefully and excellently. + + <|eot_id|><|start_header_id|>user<|end_header_id|> + AI_SEARCH_RESULT: {search_result} \n + PROMPT: {prompt} + + <|eot_id|><|start_header_id|>assistant<|end_header_id|>""", + input_variables=["search_result", "prompt"], + ) + + initiator_router = initiator_prompt | llm | JsonOutputParser() + output = initiator_router.invoke({"search_result":search_result, "prompt":prompt}) + return output + + +def combined_influencer_data(prompt: str) -> dict: + # Step 1: Get data using the get_chat_completion function + logger.info("Using Perplexity Ai to get the influencer data") + search_result = get_chat_completion(prompt) + + # Step 2: Process the search result using the influencer_data function + logger.info("Formatting the data with OpenAI") + formatted_data = influencer_data(search_result, prompt) + + # Step 3: Return the final output + return formatted_data + + +product_categories = [ + "Beauty & Skincare", + "Fashion & Lifestyle", + "Health & Fitness", + "Travel & Adventure", + "Food & Beverage", + "Technology & Gadgets", + "Gaming & Esports", + "Parenting & Family", + "Finance & Business", + "Wellness & Mental Health", + "Automotive & Motorsports", + "Entertainment & Pop Culture", + "Photography & Visual Arts", + "Education & Learning", + "Environmental & Sustainability" +] + + +def product_categorizer(product_lists: list, product_categories=product_categories) -> str: + logger.info(f"Categorizing products") + initiator_prompt = PromptTemplate( + template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> + You are a Brand AI Agent tasked with categorizing products into categories\n + There are some categories given by the brand, these are the categories the products are expected to be categorized into.\n + You will be given a list of products and asked to categorize them.\n + You are meant to categorize all the given product into one category. \n + You take the following steps: + 1. Looking into all the list of products. \n + 2. Understand where they belong to. \n + 3. Look into the kist of categories. \n + 4. Select the category that this products falls under. \n + + You should return the selected category as output. \n + Please do this carefully and correctly. + + <|eot_id|><|start_header_id|>user<|end_header_id|> + PRODUCT_LISTS: {product_lists} \n + PRODUCT_CATEGORY: {product_categories} + + <|eot_id|><|start_header_id|>assistant<|end_header_id|>""", + input_variables=["product_lists", "product_categories"], + ) + + initiator_router = initiator_prompt | llm | StrOutputParser() + output = initiator_router.invoke({"product_lists":product_lists, "product_categories":product_categories}) + return output +