Merge pull request #797 from rishi-raj-jain/patch-2
examples: sales web crawler
This commit is contained in:
@@ -0,0 +1,3 @@
|
|||||||
|
OPENAI_API_KEY=
|
||||||
|
FIRECRAWL_API_KEY=
|
||||||
|
SERP_API_KEY=
|
||||||
@@ -0,0 +1,78 @@
|
|||||||
|
import csv
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from firecrawl import FirecrawlApp
|
||||||
|
from openai import OpenAI
|
||||||
|
from serpapi import GoogleSearch
|
||||||
|
from swarm import Agent
|
||||||
|
from swarm.repl import run_demo_loop
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Initialize FirecrawlApp and OpenAI
|
||||||
|
app = FirecrawlApp(api_key=os.getenv("FIRECRAWL_API_KEY"))
|
||||||
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
||||||
|
|
||||||
|
def crawl_and_analyze_url(url, objective):
|
||||||
|
"""Crawl a website using Firecrawl and analyze the content."""
|
||||||
|
print(f"Parameters: url={url}, objective={objective}")
|
||||||
|
# Crawl the website
|
||||||
|
crawl_status = app.crawl_url(
|
||||||
|
url,
|
||||||
|
params={'limit': 10, 'scrapeOptions': {'formats': ['markdown']}},
|
||||||
|
poll_interval=5
|
||||||
|
)
|
||||||
|
crawl_status = crawl_status['data']
|
||||||
|
# Process each 'markdown' element individually
|
||||||
|
combined_results = []
|
||||||
|
for item in crawl_status:
|
||||||
|
if 'markdown' in item:
|
||||||
|
content = item['markdown']
|
||||||
|
# Analyze the content
|
||||||
|
analysis = generate_completion(
|
||||||
|
"website data extractor",
|
||||||
|
f"Analyze the following website content and extract a JSON object based on the objective. Do not write the ```json and ``` to denote a JSON when returning a response",
|
||||||
|
"Objective: " + objective + "\nContent: " + content
|
||||||
|
)
|
||||||
|
# Parse the JSON result
|
||||||
|
try:
|
||||||
|
result = json.loads(analysis)
|
||||||
|
combined_results.append(result)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
print(f"Could not parse JSON from analysis: {analysis}")
|
||||||
|
# Combine the results
|
||||||
|
return {"objective": objective, "results": combined_results}
|
||||||
|
|
||||||
|
def generate_completion(role, task, content):
|
||||||
|
"""Generate a completion using OpenAI."""
|
||||||
|
print(f"Parameters: role={role}, task={task[:50]}..., content={content[:50]}...")
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="gpt-4o",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": f"You are a {role}. {task}"},
|
||||||
|
{"role": "user", "content": content}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
return response.choices[0].message.content
|
||||||
|
|
||||||
|
def handoff_to_crawl_url():
|
||||||
|
"""Hand off the url to the crawl url agent."""
|
||||||
|
return crawl_website_agent
|
||||||
|
|
||||||
|
user_interface_agent = Agent(
|
||||||
|
name="User Interface Agent",
|
||||||
|
instructions="You are a user interface agent that handles all interactions with the user. You need to always start by asking for a URL to crawl and the web data extraction objective. Be concise.",
|
||||||
|
functions=[handoff_to_crawl_url],
|
||||||
|
)
|
||||||
|
|
||||||
|
crawl_website_agent = Agent(
|
||||||
|
name="Crawl Website Agent",
|
||||||
|
instructions="You are a crawl URL agent specialized in crawling web pages and analyzing their content. When you are done, you must print the results to the console.",
|
||||||
|
functions=[crawl_and_analyze_url],
|
||||||
|
)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Run the demo loop with the user interface agent
|
||||||
|
run_demo_loop(user_interface_agent, stream=True)
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
firecrawl-py
|
||||||
|
openai
|
||||||
|
google-search-results
|
||||||
|
git+https://github.com/openai/swarm.git
|
||||||
Reference in New Issue
Block a user