ds_sabaproject/utils.py

from langchain_openai import ChatOpenAI
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
import os
import requests
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import requests
from loguru import logger
from dotenv import load_dotenv
load_dotenv()


os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
PERPLEXITYAI_API_KEY = os.getenv('PERPLEXITY_AI_API')
llm = ChatOpenAI(model="gpt-4o")


def get_chat_completion(prompt, api_key=PERPLEXITYAI_API_KEY):
    url = "https://api.perplexity.ai/chat/completions"

    payload = {
        "model": "llama-3.1-sonar-small-128k-online",
        "messages": [
            {
                "role": "system",
                "content": "Be precise and concise."
            },
            {
                "role": "user",
                "content": prompt
            }
        ],
        "temperature": 0.2,
        "top_p": 0.9,
        "return_citations": True,
        "search_domain_filter": ["perplexity.ai"],
        "return_images": False,
        "return_related_questions": False,
        "search_recency_filter": "month",
        "top_k": 0,
        "stream": False,
        "presence_penalty": 0,
        "frequency_penalty": 1
    }

    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }

    response = requests.post(url, json=payload, headers=headers)

    # Check if the request was successful
    if response.status_code == 200:
        response_data = response.json()
        try:
            # Extract the message content
            message_content = response_data['choices'][0]['message']['content']
            return message_content
        except (KeyError, IndexError):
            return "Unexpected response format."
    else:
        return f"Request failed with status code: {response.status_code}"


def influencer_data(search_result: str, prompt:str) -> dict:
    logger.info(f"Formatting Influencer Data")
    initiator_prompt = PromptTemplate(
        template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
        You are a Influencer Data Extractor AI Agent tasked with extracting information from a search result\n
        Backstory: \n
            A business brand is looking for some influencer in a specific location or area. He used a tool called Perplexity AI to get this data. \n
            This is an amazing too, and yet it can struggle a bit (ai isn't perfect you know). \n
            The information returned by this ai can be incomplete, not properly structured and all. \n
            This brand needs youe help. \n
        This is how you will help the brand: \n
            1. You will be given two things, the ai search result and the prompt used to query the ai. \n
            2. Your major interest here is formatting and making the structure right. \n
            3. You will process the ai result, create a JSON structure of the key information needed by the user and add the corresponding values to it. \n
            4. The user prompt is your guide on of how the JSON should be formatted. \n
            5. If there's are missing data or something in the ai response, send it back as NA. \n
            6. At the end of your processing you want to return a structured response and also make sure it in the best order as expected by the user. \n

        Return a structured JSON or dictionary as output. \n
        Ensure that the data is properly arranged and in a good format. \n
        Please do this carefully and excellently.

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    AI_SEARCH_RESULT: {search_result} \n
    PROMPT: {prompt}

    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
            input_variables=["search_result", "prompt"],
        )

    initiator_router = initiator_prompt | llm | JsonOutputParser()
    output = initiator_router.invoke({"search_result":search_result, "prompt":prompt})
    return output


def combined_influencer_data(prompt: str) -> dict:
    # Step 1: Get data using the get_chat_completion function
    logger.info("Using Perplexity Ai to get the influencer data")
    search_result = get_chat_completion(prompt)

    # Step 2: Process the search result using the influencer_data function
    logger.info("Formatting the data with OpenAI")
    formatted_data = influencer_data(search_result, prompt)

    # Step 3: Return the final output
    return formatted_data


product_categories = [
    "Beauty & Skincare",
    "Fashion & Lifestyle",
    "Health & Fitness",
    "Travel & Adventure",
    "Food & Beverage",
    "Technology & Gadgets",
    "Gaming & Esports",
    "Parenting & Family",
    "Finance & Business",
    "Wellness & Mental Health",
    "Automotive & Motorsports",
    "Entertainment & Pop Culture",
    "Photography & Visual Arts",
    "Education & Learning",
    "Environmental & Sustainability"
]


def product_categorizer(product_lists: list, product_categories=product_categories) -> str:
    logger.info(f"Categorizing products")
    initiator_prompt = PromptTemplate(
        template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
        You are a Brand AI Agent tasked with categorizing products into categories\n
        There are some categories given by the brand, these are the categories the products are expected to be categorized into.\n
        You will be given a list of products and asked to categorize them.\n
        You are meant to categorize all the given product into one category. \n
        You take the following steps:
            1. Looking into all the list of products. \n
            2. Understand where they belong to. \n
            3. Look into the kist of categories. \n
            4. Select the category that this products falls under. \n

        You should return the selected category as output. \n
        Please do this carefully and correctly.

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    PRODUCT_LISTS: {product_lists} \n
    PRODUCT_CATEGORY: {product_categories}

    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
            input_variables=["product_lists", "product_categories"],
        )

    initiator_router = initiator_prompt | llm | StrOutputParser()
    output = initiator_router.invoke({"product_lists":product_lists, "product_categories":product_categories})
    return output

def get_youtube_channel_info(channel_name: str):
    API_KEY = 'YOUTUBE_API_KEY'  # Replace with your API key

    url = 'https://www.googleapis.com/youtube/v3/search'
    params = {
        'part': 'id',
        'q': channel_name,
        'type': 'channel',
        'key': API_KEY
    }

    response = requests.get(url, params=params)

    if response.status_code == 200:
        data = response.json()
        if 'items' in data and len(data['items']) > 0:
            channel_id = data['items'][0]['id']['channelId']
            channel_info_url = 'https://www.googleapis.com/youtube/v3/channels'
            channel_info_params = {
                'part': 'snippet,statistics',
                'id': channel_id,
                'key': API_KEY
            }
            channel_info_response = requests.get(channel_info_url, params=channel_info_params)

            if channel_info_response.status_code == 200:
                channel_info_data = channel_info_response.json()
                if 'items' in channel_info_data and len(channel_info_data['items']) > 0:
                    channel_info = channel_info_data['items'][0]
                    title = channel_info['snippet']['title']
                    description = channel_info['snippet']['description']
                    subscriber_count = channel_info['statistics']['subscriberCount']
                    view_count = channel_info['statistics']['viewCount']
                    video_count = channel_info['statistics']['videoCount']
                    return {
                        "Title": title,
                        "Description": description,
                        "Subscribers": subscriber_count,
                        "Total Views": view_count,
                        "Total Videos": video_count
                    }
                else:
                    return "Channel not found."
            else:
                return f"Error: {channel_info_response.status_code}"
        else:
            return "Channel not found."
    else:
        return f"Error: {response.status_code}"

def get_channel_videos(channel_name, max_results=5):
    try:
        API_KEY = 'YOUTUBE_API_KEY'
        # Build the YouTube service object
        youtube = build('youtube', 'v3', developerKey=API_KEY)

        # Search for the channel by name
        search_response = youtube.search().list(
            q=channel_name,
            type='channel',
            part='id',
            maxResults=1
        ).execute()

        # Extract the channel ID
        items = search_response.get('items')
        if not items:
            print("No channel found with that name.")
            return []

        channel_id = items[0]['id']['channelId']

        # Retrieve the uploads playlist ID
        channels_response = youtube.channels().list(
            part='contentDetails',
            id=channel_id
        ).execute()

        uploads_playlist_id = channels_response['items'][0]['contentDetails']['relatedPlaylists']['uploads']

        # Retrieve the videos from the uploads playlist
        playlist_items_response = youtube.playlistItems().list(
            part='snippet',
            playlistId=uploads_playlist_id,
            maxResults=max_results
        ).execute()

        # Extract video URLs
        video_urls = []
        for item in playlist_items_response['items']:
            video_id = item['snippet']['resourceId']['videoId']
            video_url = f"https://www.youtube.com/watch?v={video_id}"
            video_urls.append(video_url)

        return video_urls

    except HttpError as e:
        print(f"An HTTP error occurred: {e}")
        return []

def get_channel_details(channel_name):
    channel_info = get_youtube_channel_info(channel_name)
    recent_videos = get_channel_videos(channel_name)
    channel_info['recent_videos'] = recent_videos
    return channel_info