progress

2024-10-22 23:37:05 +01:00
parent 0680807bfd
commit 957bac887d
6 changed files with 1070 additions and 144 deletions
@@ -1 +1,2 @@
 PERPLEXITY_AI_API = "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"
 OPENAI_API_KEY = "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"
@@ -0,0 +1,2 @@
 client.json
 credentials.json
@@ -0,0 +1,126 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from __future__ import print_function\n",
    "import os\n",
    "import google.auth\n",
    "import google_auth_oauthlib.flow\n",
    "import googleapiclient.discovery\n",
    "import googleapiclient.errors\n",
    "from google.oauth2.credentials import Credentials"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=165594406980-5f6scupsune6ise7dqp04ti3mmt2eqe9.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A52470%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.readonly&state=TRbdRt2JQkFtZttqScUDD9N4DW7scP&access_type=offline\n"
     ]
    }
   ],
   "source": [
    "# If modifying these SCOPES, delete the file token.json.\n",
    "SCOPES = ['https://www.googleapis.com/auth/drive.readonly']\n",
    "\n",
    "def main():\n",
    "    \"\"\"Shows basic usage of the Drive v3 API.\n",
    "    Prints the names and ids of the first 10 files the user has access to.\n",
    "    \"\"\"\n",
    "    creds = None\n",
    "    # The file token.json stores the user's access and refresh tokens, and is\n",
    "    # created automatically when the authorization flow completes for the first time.\n",
    "    if os.path.exists('token.json'):\n",
    "        creds = Credentials.from_authorized_user_file('token.json', SCOPES)\n",
    "    # If there are no (valid) credentials available, let the user log in.\n",
    "    if not creds or not creds.valid:\n",
    "        if creds and creds.expired and creds.refresh_token:\n",
    "            creds.refresh(google.auth.transport.requests.Request())\n",
    "        else:\n",
    "            flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file(\n",
    "                'credentials.json', SCOPES)\n",
    "            creds = flow.run_local_server(port=0)\n",
    "        # Save the credentials for the next run\n",
    "        with open('token.json', 'w') as token:\n",
    "            token.write(creds.to_json())\n",
    "\n",
    "    service = googleapiclient.discovery.build('drive', 'v3', credentials=creds)\n",
    "\n",
    "    # Call the Drive v3 API\n",
    "    results = service.files().list(\n",
    "        pageSize=10, fields=\"nextPageToken, files(id, name)\").execute()\n",
    "    items = results.get('files', [])\n",
    "\n",
    "    if not items:\n",
    "        print('No files found.')\n",
    "    else:\n",
    "        print('Files:')\n",
    "        for item in items:\n",
    "            print(u'{0} ({1})'.format(item['name'], item['id']))\n",
    "\n",
    "if __name__ == '__main__':\n",
    "    main()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
@@ -0,0 +1,36 @@
 import sys, os
 from fastapi import FastAPI, Form, Body
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import FileResponse, JSONResponse
 from typing import List, Dict
 from pydantic import BaseModel
 from utils import product_categorizer
 app = FastAPI()
 # Define allowed origins for CORS
 origins = [
    "http://localhost:5300",
    "http://localhost:3000",
    # Add other allowed origins here
 ]
 # Add CORS middleware
 app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )
 class ProductRequest(BaseModel):
    products: List[str]
@app.post("/categorize-products/")
 async def categorize_products(request: ProductRequest):
    categorized_output = product_categorizer(request.products)
    return JSONResponse(content={"categorized_products": categorized_output})
@@ -0,0 +1,160 @@
 from langchain_openai import ChatOpenAI
 from langchain_core.prompts.prompt import PromptTemplate
 from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
 import os
 import requests
 from loguru import logger
 from dotenv import load_dotenv
 load_dotenv()
 os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
 PERPLEXITYAI_API_KEY = os.getenv('PERPLEXITY_AI_API')
 llm = ChatOpenAI(model="gpt-4o")
 def get_chat_completion(prompt, api_key=PERPLEXITYAI_API_KEY):
    url = "https://api.perplexity.ai/chat/completions"
    payload = {
        "model": "llama-3.1-sonar-small-128k-online",
        "messages": [
            {
                "role": "system",
                "content": "Be precise and concise."
            },
            {
                "role": "user",
                "content": prompt
            }
        ],
        "temperature": 0.2,
        "top_p": 0.9,
        "return_citations": True,
        "search_domain_filter": ["perplexity.ai"],
        "return_images": False,
        "return_related_questions": False,
        "search_recency_filter": "month",
        "top_k": 0,
        "stream": False,
        "presence_penalty": 0,
        "frequency_penalty": 1
    }
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }
    response = requests.post(url, json=payload, headers=headers)
    # Check if the request was successful
    if response.status_code == 200:
        response_data = response.json()
        try:
            # Extract the message content
            message_content = response_data['choices'][0]['message']['content']
            return message_content
        except (KeyError, IndexError):
            return "Unexpected response format."
    else:
        return f"Request failed with status code: {response.status_code}"
 def influencer_data(search_result: str, prompt:str) -> dict:
    logger.info(f"Formatting Influencer Data")
    initiator_prompt = PromptTemplate(
        template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
        You are a Influencer Data Extractor AI Agent tasked with extracting information from a search result\n
        Backstory: \n
            A business brand is looking for some influencer in a specific location or area. He used a tool called Perplexity AI to get this data. \n
            This is an amazing too, and yet it can struggle a bit (ai isn't perfect you know). \n 
            The information returned by this ai can be incomplete, not properly structured and all. \n 
            This brand needs youe help. \n 
        This is how you will help the brand: \n 
            1. You will be given two things, the ai search result and the prompt used to query the ai. \n
            2. Your major interest here is formatting and making the structure right. \n 
            3. You will process the ai result, create a JSON structure of the key information needed by the user and add the corresponding values to it. \n
            4. The user prompt is your guide on of how the JSON should be formatted. \n
            5. If there's are missing data or something in the ai response, send it back as NA. \n 
            6. At the end of your processing you want to return a structured response and also make sure it in the best order as expected by the user. \n
        Return a structured JSON or dictionary as output. \n 
        Ensure that the data is properly arranged and in a good format. \n
        Please do this carefully and excellently. 
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    AI_SEARCH_RESULT: {search_result} \n 
    PROMPT: {prompt}
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
            input_variables=["search_result", "prompt"],
        )
    initiator_router = initiator_prompt | llm | JsonOutputParser()
    output = initiator_router.invoke({"search_result":search_result, "prompt":prompt})
    return output
 def combined_influencer_data(prompt: str) -> dict:
    # Step 1: Get data using the get_chat_completion function
    logger.info("Using Perplexity Ai to get the influencer data")
    search_result = get_chat_completion(prompt)
    # Step 2: Process the search result using the influencer_data function
    logger.info("Formatting the data with OpenAI")
    formatted_data = influencer_data(search_result, prompt)
    # Step 3: Return the final output
    return formatted_data
 product_categories = [
    "Beauty & Skincare",
    "Fashion & Lifestyle",
    "Health & Fitness",
    "Travel & Adventure",
    "Food & Beverage",
    "Technology & Gadgets",
    "Gaming & Esports",
    "Parenting & Family",
    "Finance & Business",
    "Wellness & Mental Health",
    "Automotive & Motorsports",
    "Entertainment & Pop Culture",
    "Photography & Visual Arts",
    "Education & Learning",
    "Environmental & Sustainability"
 ]
 def product_categorizer(product_lists: list, product_categories=product_categories) -> str:
    logger.info(f"Categorizing products")
    initiator_prompt = PromptTemplate(
        template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
        You are a Brand AI Agent tasked with categorizing products into categories\n
        There are some categories given by the brand, these are the categories the products are expected to be categorized into.\n
        You will be given a list of products and asked to categorize them.\n
        You are meant to categorize all the given product into one category. \n 
        You take the following steps: 
            1. Looking into all the list of products. \n
            2. Understand where they belong to. \n 
            3. Look into the kist of categories. \n 
            4. Select the category that this products falls under. \n
        You should return the selected category as output. \n 
        Please do this carefully and correctly. 
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    PRODUCT_LISTS: {product_lists} \n 
    PRODUCT_CATEGORY: {product_categories}
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
            input_variables=["product_lists", "product_categories"],
        )
    initiator_router = initiator_prompt | llm | StrOutputParser()
    output = initiator_router.invoke({"product_lists":product_lists, "product_categories":product_categories})
    return output
`@@ -1 +1,2 @@`
	`PERPLEXITY_AI_API = "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"`	`PERPLEXITY_AI_API = "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"`
		`OPENAI_API_KEY = "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"`