feat(rust): update rust sdk to support new features (#1446)

* chore(rust-sdk): cargo fmt

* feat(rust-sdk): implement search api + example + test

* feat(rust-sdk): implement crawl cancel api + example + test

* feat(rust-sdk): implement crawl check errors api + example + test

* feat(rust-sdk): implement batch crawl + test + example

+ Fix MapOptions

* feat(rust-sdk): implement extract api + test + example

* feat(rust-sdk): implement llmtxt api + test + example

* chore(rust-sdk): correct mock tests

* chore(rust-sdk): prep for cargo distribution
This commit is contained in:
kkharji
2025-04-18 07:59:59 +03:00
committed by GitHub
parent 33aece8e96
commit f2c01340d1
20 changed files with 4350 additions and 125 deletions
@@ -0,0 +1,175 @@
use clap::{Parser, Subcommand};
use firecrawl::{
batch_scrape::{BatchScrapeParams, WebhookOptions},
map::MapOptions,
scrape::{ScrapeFormats, ScrapeOptions},
FirecrawlApp,
};
use serde_json::Value;
use std::error::Error;
use std::net::SocketAddr;
use std::sync::Arc;
use tokio::sync::Mutex;
// Store webhook responses
struct WebhookState {
responses: Vec<Value>,
}
#[derive(Parser)]
#[command(version, about, long_about = None)]
struct Cli {
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
/// Multiple URL scraping with webhook monitoring
Basic,
}
async fn create_firecrawl_app() -> Result<FirecrawlApp, Box<dyn Error>> {
let api_url = std::env::var("FIRECRAWL_API_URL")
.expect("Please set the FIRECRAWL_API_URL environment variable");
FirecrawlApp::new_selfhosted(api_url, None::<&str>).map_err(|e| e.into())
}
// Start webhook server and return its address
async fn start_webhook_server(
port: u16,
state: Arc<Mutex<WebhookState>>,
) -> Result<String, Box<dyn Error>> {
let state = state.clone();
use axum::routing::post;
use axum::Json;
let app = axum::Router::new().route(
"/",
post(move |body: Json<Value>| {
let state = state.clone();
async move {
state.lock().await.responses.push(body.0.clone());
match serde_json::to_string_pretty(&body.0) {
Ok(data) => println!(
"Received webhook: {}",
serde_json::to_string_pretty(&data).unwrap()
),
Err(_) => println!("Received webhook: {}", body.0),
}
"OK"
}
}),
);
let addr = SocketAddr::from(([0, 0, 0, 0], port));
let webhook_url = format!("http://host.docker.internal:{}", port);
tokio::spawn(async move {
let listener = tokio::net::TcpListener::bind(addr)
.await
.inspect_err(|err| println!("{err:?}"))
.unwrap();
if let Err(e) = axum::serve(listener, app).await {
eprintln!("Webhook server error: {}", e);
}
});
println!("Webhook server running at {}", webhook_url);
Ok(webhook_url)
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
let cli = Cli::parse();
let firecrawl = create_firecrawl_app().await?;
let state = Arc::new(Mutex::new(WebhookState { responses: vec![] }));
let webhook_url = start_webhook_server(39120, state.clone()).await?;
match cli.command {
Commands::Basic => {
let mut urls = Vec::new();
let url_one = "https://invalid-url.url/";
println!("Mapping: {}", url_one);
match firecrawl.map_url(url_one, None).await {
Ok(mapped_urls) => urls.extend(mapped_urls),
Err(e) => println!("Error mapping {}: {}", url_one, e),
}
let url_two = "https://www.devjobsscanner.com";
println!("Mapping: {}", url_two);
match firecrawl
.map_url(
url_two,
Some(MapOptions {
search: Some("rust".into()),
limit: Some(20),
..Default::default()
}),
)
.await
{
Ok(mapped_urls) => urls.extend(mapped_urls),
Err(e) => println!("Error mapping {}: {}", url_two, e),
}
test_multiple_urls(&firecrawl, urls, &webhook_url).await?;
// Give time for webhooks to arrive
tokio::time::sleep(tokio::time::Duration::from_secs(5)).await;
println!(
"Received {} webhook responses",
state.lock().await.responses.len()
);
}
}
Ok(())
}
async fn test_multiple_urls(
app: &FirecrawlApp,
urls: Vec<String>,
webhook_url: &str,
) -> Result<(), Box<dyn Error>> {
println!("Testing batch scraping of {} URLs", urls.len());
let webhook = WebhookOptions {
url: webhook_url.to_string(),
headers: None,
auth_token: None,
};
let params = BatchScrapeParams {
urls,
webhook: Some(webhook),
ignore_invalid_urls: true,
options: Some(ScrapeOptions {
formats: Some(vec![ScrapeFormats::Markdown, ScrapeFormats::Links]),
..Default::default()
}),
..Default::default()
};
let batch = app.async_batch_scrape_urls(params).await?;
println!("Batch job started: {}", batch.id);
// Poll status periodically
loop {
let status = app.check_batch_scrape_status(&batch.id).await?;
println!("Progress: {}/{} pages", status.completed, status.total);
if status.completed >= status.total {
println!("Batch job completed!");
break;
}
tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
}
Ok(())
}
@@ -0,0 +1,33 @@
use firecrawl::FirecrawlApp;
use std::error::Error;
use std::time::Duration;
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
// Get API URL from environment
let api_url = std::env::var("FIRECRAWL_API_URL")
.expect("Please set the FIRECRAWL_API_URL environment variable");
// Create the FirecrawlApp instance
let firecrawl = FirecrawlApp::new_selfhosted(api_url, None::<&str>)?;
// Start a crawl job
println!("Starting a crawl job...");
let crawl_response = firecrawl
.crawl_url_async("https://example.com", None)
.await?;
println!("Crawl job started with ID: {}", crawl_response.id);
// Wait for a moment to let the crawl job start
println!("Waiting for a moment...");
tokio::time::sleep(Duration::from_secs(2)).await;
// Cancel the crawl job
println!("Cancelling the crawl job...");
let cancel_response = firecrawl.cancel_crawl(&crawl_response.id).await?;
println!("Cancellation result:");
println!(" Status: {:?}", cancel_response.status);
Ok(())
}
@@ -0,0 +1,59 @@
use firecrawl::FirecrawlApp;
use std::error::Error;
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
// Get API URL from environment
let api_url = std::env::var("FIRECRAWL_API_URL")
.expect("Please set the FIRECRAWL_API_URL environment variable");
// Create the FirecrawlApp instance
let firecrawl = FirecrawlApp::new_selfhosted(api_url, None::<&str>)?;
// Start a crawl job that will likely have some errors (invalid URL format)
println!("Starting a crawl job...");
let crawl_response = firecrawl
.crawl_url_async("https://no-wer-agg.invalid", None)
.await?;
println!("Crawl job started with ID: {}", crawl_response.id);
println!("Let it do it's thing...");
tokio::time::sleep(tokio::time::Duration::from_secs(3)).await;
// Check the crawl errors
println!("Checking for crawl errors...");
match firecrawl.check_crawl_errors(&crawl_response.id).await {
Ok(error_response) => {
println!("Crawl errors response:");
println!(" Number of errors: {}", error_response.errors.len());
if !error_response.errors.is_empty() {
println!("\nDetailed errors:");
for (i, error) in error_response.errors.iter().enumerate() {
println!("Error #{}", i + 1);
println!(" ID: {}", error.id);
if let Some(timestamp) = &error.timestamp {
println!(" Timestamp: {}", timestamp);
}
println!(" URL: {}", error.url);
println!(" Error: {}", error.error);
}
}
println!(
"\nRobots.txt blocked URLs: {}",
error_response.robots_blocked.len()
);
for (i, url) in error_response.robots_blocked.iter().enumerate() {
println!(" {}. {}", i + 1, url);
}
}
Err(e) => {
println!("Failed to check crawl errors: {}", e);
}
}
let cancel = firecrawl.cancel_crawl(&crawl_response.id).await?;
println!("Cancel: {}", cancel.status);
Ok(())
}
+17 -13
View File
@@ -1,4 +1,8 @@
use firecrawl::{crawl::CrawlOptions, scrape::{ExtractOptions, ScrapeFormats, ScrapeOptions}, FirecrawlApp};
use firecrawl::{
crawl::CrawlOptions,
scrape::{ExtractOptions, ScrapeFormats, ScrapeOptions},
FirecrawlApp,
};
use serde_json::json;
#[tokio::main]
@@ -19,19 +23,20 @@ async fn main() {
// Crawl a website
let crawl_options = CrawlOptions {
exclude_paths: vec![ "blog/*".into() ].into(),
exclude_paths: vec!["blog/*".into()].into(),
..Default::default()
};
let crawl_result = app
.crawl_url("https://mendable.ai", crawl_options)
.await;
let crawl_result = app.crawl_url("https://mendable.ai", crawl_options).await;
match crawl_result {
Ok(data) => println!("Crawl Result (used {} credits):\n{:#?}", data.credits_used, data.data),
Ok(data) => println!(
"Crawl Result (used {} credits):\n{:#?}",
data.credits_used, data.data
),
Err(e) => eprintln!("Crawl failed: {}", e),
}
// Scrape with Extract
let json_schema = json!({
"type": "object",
@@ -57,11 +62,12 @@ async fn main() {
});
let llm_extraction_options = ScrapeOptions {
formats: vec![ ScrapeFormats::Extract ].into(),
formats: vec![ScrapeFormats::Extract].into(),
extract: ExtractOptions {
schema: json_schema.into(),
..Default::default()
}.into(),
}
.into(),
..Default::default()
};
@@ -75,9 +81,7 @@ async fn main() {
}
// Map a website (Alpha)
let map_result = app
.map_url("https://firecrawl.dev", None)
.await;
let map_result = app.map_url("https://firecrawl.dev", None).await;
match map_result {
Ok(data) => println!("Mapped URLs: {:#?}", data),
+237
View File
@@ -0,0 +1,237 @@
use firecrawl::{extract::ExtractParams, FirecrawlApp};
use serde_json::json;
use std::error::Error;
use clap::{Parser, ValueEnum};
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
struct Args {
#[arg(value_enum)]
command: Examples,
}
#[derive(Copy, Clone, PartialEq, Eq, ValueEnum)]
enum Examples {
Basic,
Schema,
JsonSchema,
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
let args = Args::parse();
let api_url = std::env::var("FIRECRAWL_API_URL")
.expect("Please set the FIRECRAWL_API_URL environment variable");
let firecrawl = FirecrawlApp::new_selfhosted(api_url, None::<&str>)?;
let urls = vec![
"https://www.firecrawl.dev/".to_string(),
"https://betteruptime.com".to_string(),
];
match args.command {
Examples::Basic => {
println!("Example 1: Extracting with URLs and prompt");
let extract_params = ExtractParams {
prompt: Some(
"Extract Product promise, consice descirption and category".to_string(),
),
url_trace: Some(true),
..Default::default()
};
println!("Starting asynchronous extraction job...");
let response = firecrawl
.async_extract(ExtractParams {
urls: Some(urls.iter().map(|u| u.to_string()).collect()),
prompt: extract_params.prompt.clone(),
url_trace: extract_params.url_trace,
..Default::default()
})
.await?;
println!("Extract job initiated:");
println!(" Job ID: {}", response.id);
println!("\nChecking extract status...");
for _ in 0..5 {
let response = firecrawl.get_extract_status(&response.id).await?;
println!("Extract status: {}", response.status);
if let Some(url_trace) = &response.url_trace {
println!("URL traces:");
for trace in url_trace {
println!(" URL: {}", trace.url);
println!(" Status: {}", trace.status);
}
}
println!("Extract data: {:#?}", response.data);
if response.status == "completed" {
break;
}
tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
}
}
Examples::Schema => {
println!("Example 2: Extracting with schema");
let schema = json!({
"type": "object",
"properties": {
"category": { "type": "string" },
"promise": { "type": "string" },
"descirption": { "type": "string" }
},
"required": ["category", "promise", "description"]
});
println!("Starting synchronous extraction job...");
match firecrawl
.extract(ExtractParams {
urls: urls.into(),
schema: Some(schema),
..Default::default()
})
.await
{
Ok(result) => {
println!("Extraction completed successfully!");
println!("Status: {}", result.status);
if let Some(data) = result.data {
println!("\nExtracted data:");
println!(" Title: {}", data["title"]);
if let Some(desc) = data.get("description") {
println!(" Description: {}", desc);
}
println!(
" Content (preview): {:.100}...",
data["content"].as_str().unwrap_or("N/A")
);
}
if let Some(sources) = result.sources {
println!("\nSources:");
for (field, urls) in sources {
println!(" {}: {}", field, urls.join(", "));
}
}
}
Err(e) => {
println!("Extraction failed: {}", e);
}
}
}
Examples::JsonSchema => {
println!("Example 3: Using JsonSchema derive");
/// A comprehensive analysis of given product
#[derive(serde::Serialize, serde::Deserialize, schemars::JsonSchema)]
struct ProductAnalysis {
/// The full name of the product
product_name: String,
/// The company/brand behind the product
brand: String,
/// The general price range (e.g. "Premium", "$10-50", "Enterprise")
price_range: String,
/// The main customer segments this product targets
target_audience: Vec<String>,
/// Primary benefits and value propositions of the product
key_benefits: Vec<String>,
/// Distinctive features that set this product apart from competitors
unique_selling_points: Vec<String>,
/// Direct comparisons with competing products/services
competitor_comparison: Vec<String>,
/// Technologies, frameworks, or platforms used (if applicable)
tech_stack: Option<Vec<String>>,
/// Aggregated review data and sentiment analysis
reviews_summary: ReviewsSummary,
// /// Score from 0-10 indicating product-market fit based on analysis
// market_fit_score: f32, // NOTE: Breaks
/// Assessment of future growth prospects (e.g. "High", "Moderate", "Limited")
growth_potential: String,
/// Relevant compliance standards and certifications
regulatory_compliance: Option<Vec<String>>,
}
/// Aggregated analysis of product reviews from multiple sources
#[derive(serde::Serialize, serde::Deserialize, schemars::JsonSchema)]
struct ReviewsSummary {
/// Overall sentiment from review analysis (e.g. "Highly Positive", "Mixed", "Negative")
sentiment_analysis: String,
/// Most frequently mentioned positive aspects
common_praises: Vec<String>,
/// Most frequently mentioned criticisms or issues
common_complaints: Vec<String>,
/// Platforms or websites where reviews were sourced from
review_sources: Vec<String>,
}
println!("Starting extraction with derived schema...");
match firecrawl
.extract_with_schemars::<ProductAnalysis>(ExtractParams {
urls: urls.into(),
..Default::default()
})
.await
{
Ok(result) => {
println!("Extraction completed!");
println!("Status: {}", result.status);
if let Some(data) = result.data {
if let Ok(analysis) = serde_json::from_value::<ProductAnalysis>(data) {
println!("\nExtracted Product Analysis:");
println!(" Product: {}", analysis.product_name);
println!(" Brand: {}", analysis.brand);
println!(" Price Range: {}", analysis.price_range);
println!(" Target Audience:");
for audience in analysis.target_audience {
println!(" - {}", audience);
}
println!(" Key Benefits:");
for benefit in analysis.key_benefits {
println!(" - {}", benefit);
}
println!(" USPs:");
for usp in analysis.unique_selling_points {
println!(" - {}", usp);
}
println!("\n Reviews Summary:");
println!(
" Sentiment: {}",
analysis.reviews_summary.sentiment_analysis
);
println!(" Common Praises:");
for praise in analysis.reviews_summary.common_praises {
println!(" - {}", praise);
}
println!(" Common Complaints:");
for complaint in analysis.reviews_summary.common_complaints {
println!(" - {}", complaint);
}
} else {
println!("Failed to parse extracted data");
}
}
if let Some(sources) = result.sources {
println!("\nSources:");
for (field, urls) in sources {
println!(" {}: {}", field, urls.join(", "));
}
}
}
Err(e) => {
println!("Extraction failed: {}", e);
}
}
}
}
Ok(())
}
+173
View File
@@ -0,0 +1,173 @@
#![allow(clippy::option_map_unit_fn)]
use bat::{Input, PrettyPrinter};
use firecrawl::{llmstxt::GenerateLLMsTextParams, FirecrawlApp};
use std::error::Error;
use clap::{Parser, ValueEnum};
#[derive(Copy, Clone, PartialEq, Eq, ValueEnum)]
enum Mode {
Basic,
Pool,
Fulltext,
}
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
struct Args {
/// URL for which to generate LLMs.txt
#[arg(default_value = "https://www.firecrawl.dev/")]
url: String,
#[arg(long, short = 'm', value_enum, default_value = "Mode::Basic")]
mode: Mode,
/// Maximum number of URLs to process
#[arg(long, short = 'd', default_value = "1")]
max_urls: u32,
/// Whether to show the full LLMs-full.txt in the response
#[arg(long, short = 'f', default_value = "false")]
full_text: bool,
/// Experimental streaming option
#[arg(long, short = 's', default_value = "false")]
stream: bool,
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
let args = Args::parse();
let api_url = std::env::var("FIRECRAWL_API_URL")
.expect("Please set the FIRECRAWL_API_URL environment variable");
let firecrawl = FirecrawlApp::new_selfhosted(api_url, None::<&str>)?;
let params = GenerateLLMsTextParams {
url: args.url.clone(),
max_urls: args.max_urls,
show_full_text: args.full_text,
experimental_stream: args.stream,
};
match args.mode {
Mode::Basic => {
println!("Example 1: Basic LLMs.txt generation (synchronous)");
println!("Generating LLMs.txt for {}...", args.url);
firecrawl
.generate_llms_text(params)
.await
.inspect(|result| {
println!("Expires at: {}", result.expires_at);
let text = (if args.full_text {
result.data.full.as_ref()
} else {
result.data.compact.as_ref()
})
.expect("LLM Text");
pretty_print_content("Firecrawl Result", text).expect("Print");
})?;
}
Mode::Pool => {
println!("Example 2: Asynchronous LLMs.txt generation with manual polling");
println!("Starting asynchronous LLMs.txt generation job...");
let response = firecrawl.async_generate_llms_text(params).await?;
println!("LLMs.txt generation job initiated:");
println!(" Job ID: {}", response.id);
println!("\nManually polling for status...");
for _ in 0..10 {
let status = firecrawl
.check_generate_llms_text_status(&response.id)
.await?;
match status.status.as_str() {
"completed" => {
println!("LLMs.txt generation completed!");
let text = (if args.full_text {
status.data.full.as_ref()
} else {
status.data.compact.as_ref()
})
.expect("LLM Text");
pretty_print_content("Pool Result", text).expect("Print");
break;
}
"failed" => {
println!(
"LLMs.txt generation failed: {}",
status.error.unwrap_or_default()
);
break;
}
status => println!("Generation status: {}", status),
}
println!("Waiting 2 seconds before checking again...");
tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
}
}
Mode::Fulltext => {
println!("Example 3: LLMs.txt generation with full text");
println!("Generating LLMs.txt with full text...");
match firecrawl.generate_llms_text(params).await {
Ok(result) => {
println!("LLMs.txt generation completed successfully!");
let llmstxt = result.data.compact.expect("LLMs Text Expected");
let fulltxt = result.data.full.expect("Full LLMs Text Expected");
pretty_print_contents(&[
("LLMs.txt (compact)", llmstxt),
("LLMs.txt (full text)", fulltxt),
])
.expect("Print")
}
Err(e) => {
println!("LLMs.txt generation failed: {}", e);
}
}
}
}
Ok(())
}
/// Pretty prints the provided content with syntax highlighting
fn pretty_print_content(title: &str, content: &str) -> Result<(), Box<dyn Error>> {
PrettyPrinter::new()
.header(true)
.grid(true)
.input(
Input::from_bytes(content.as_bytes())
.title(title)
.name("file.md"),
)
.print()?;
Ok(())
}
/// Pretty prints multiple contents with syntax highlighting
fn pretty_print_contents(title_contents: &[(&'static str, String)]) -> Result<(), Box<dyn Error>> {
let mut inputs = Vec::new();
for (title, content) in title_contents {
inputs.push(
Input::from_bytes(content.as_bytes())
.title(*title)
.name("file.md"),
);
}
PrettyPrinter::new()
.header(true)
.grid(true)
.inputs(inputs)
.print()?;
Ok(())
}
+186
View File
@@ -0,0 +1,186 @@
use clap::{Parser, ValueEnum};
use firecrawl::{
search::{SearchParams, SearchResponse},
FirecrawlApp,
};
use std::error::Error;
#[derive(Debug, Parser)]
#[command(author, version, about, long_about = None)]
struct Args {
/// Which example to run
#[arg(value_enum, default_value_t = Examples::All)]
example: Examples,
}
#[derive(Debug, Clone, ValueEnum)]
enum Examples {
All,
Basic,
Advanced,
Geo,
Temporal,
Social,
News,
Academic,
Commercial,
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
let args = Args::parse();
let api_url = std::env::var("FIRECRAWL_API_URL")
.expect("Please set the FIRECRAWL_API_URL environment variable");
let firecrawl = FirecrawlApp::new_selfhosted(api_url, None::<&str>)?;
match args.example {
Examples::All => {
run_basic_example(&firecrawl).await?;
run_advanced_example(&firecrawl).await?;
run_geographic_example(&firecrawl).await?;
run_temporal_example(&firecrawl).await?;
run_social_example(&firecrawl).await?;
run_news_example(&firecrawl).await?;
run_academic_example(&firecrawl).await?;
run_commercial_example(&firecrawl).await?;
}
Examples::Basic => run_basic_example(&firecrawl).await?,
Examples::Advanced => run_advanced_example(&firecrawl).await?,
Examples::Geo => run_geographic_example(&firecrawl).await?,
Examples::Temporal => run_temporal_example(&firecrawl).await?,
Examples::Social => run_social_example(&firecrawl).await?,
Examples::News => run_news_example(&firecrawl).await?,
Examples::Academic => run_academic_example(&firecrawl).await?,
Examples::Commercial => run_commercial_example(&firecrawl).await?,
}
Ok(())
}
async fn run_basic_example(firecrawl: &FirecrawlApp) -> Result<(), Box<dyn Error>> {
let query = "rust programming language";
let results = firecrawl.search(query, None).await?;
print_results("Basic Search", query, &results);
Ok(())
}
async fn run_advanced_example(firecrawl: &FirecrawlApp) -> Result<(), Box<dyn Error>> {
let query = "rust web framework site:github.com OR site:gitlab.com";
let params = SearchParams {
query: query.to_string(),
limit: Some(5),
..Default::default()
};
let results = firecrawl.search_with_params(params).await?;
print_results("Advanced Repository Search", query, &results);
Ok(())
}
async fn run_geographic_example(firecrawl: &FirecrawlApp) -> Result<(), Box<dyn Error>> {
let query = "coworking space startup hub";
let params = SearchParams {
query: query.to_string(),
// WARN: Doesn't work with searxng
location: Some("Silicon Valley, CA".to_string()),
// WARN: Doesn't work with searxng
country: Some("us".to_string()),
limit: Some(5),
..Default::default()
};
let results = firecrawl.search_with_params(params).await?;
print_results("Geographic-Specific Search", query, &results);
Ok(())
}
async fn run_temporal_example(firecrawl: &FirecrawlApp) -> Result<(), Box<dyn Error>> {
let query = "artificial intelligence breakthroughs";
let params = SearchParams {
query: query.to_string(),
// WARN: Doesn't work with searxng
tbs: Some("qdr:m1".to_string()),
limit: Some(5),
..Default::default()
};
let results = firecrawl.search_with_params(params).await?;
print_results("Recent AI News", query, &results);
Ok(())
}
async fn run_social_example(firecrawl: &FirecrawlApp) -> Result<(), Box<dyn Error>> {
let query = "viral tech trends site:twitter.com";
let params = SearchParams {
query: query.to_string(),
// WARN: Doesn't work. Maybe searxng related
filter: Some("site:twitter.com OR site:linkedin.com".to_string()),
// WARN: Doesn't work with searxng
tbs: Some("qdr:w".to_string()), // Last week
limit: Some(5),
..Default::default()
};
let results = firecrawl.search_with_params(params).await?;
print_results("Social Media Tech Trends", query, &results);
Ok(())
}
async fn run_news_example(firecrawl: &FirecrawlApp) -> Result<(), Box<dyn Error>> {
let query =
"cryptocurrency market analysis site:reuters.com OR site:bloomberg.com OR site:ft.com";
let params = SearchParams {
query: query.to_string(),
// WARN: Doesn't work with searxng
tbs: Some("qdr:d".to_string()), // Last 24 hours
limit: Some(5),
..Default::default()
};
let results = firecrawl.search_with_params(params).await?;
print_results("Financial News Search", query, &results);
Ok(())
}
async fn run_academic_example(firecrawl: &FirecrawlApp) -> Result<(), Box<dyn Error>> {
let query = "quantum computing research papers site:arxiv.org OR site:scholar.google.com";
let params = SearchParams {
query: query.to_string(),
// WARN: Doesn't work. Maybe searxng related
// filter: Some("site:arxiv.org OR site:scholar.google.com".to_string()),
// WARN: Doesn't work with searxng
tbs: Some("qdr:y".to_string()), // Last year
limit: Some(5),
..Default::default()
};
let results = firecrawl.search_with_params(params).await?;
print_results("Academic Research Search", query, &results);
Ok(())
}
async fn run_commercial_example(firecrawl: &FirecrawlApp) -> Result<(), Box<dyn Error>> {
let query = "enterprise cloud solutions reviews site:g2.com";
let params = SearchParams {
query: query.to_string(),
limit: Some(5),
..Default::default()
};
let results = firecrawl.search_with_params(params).await?;
print_results("Commercial Product Search", query, &results);
Ok(())
}
fn print_results(name: &str, query: &str, results: &SearchResponse) {
let sec = "=".repeat(70);
println!("\n{sec}");
println!("🔍 {name}");
println!("🔎 Query: \"{query}\"");
println!("{sec}");
for (i, doc) in results.data.iter().enumerate() {
println!("{}. 📌 Title: {}", i + 1, doc.title);
println!(" - 🔗 URL: {}", doc.url);
println!(" - 📝 Description: \"{:.40}\"...", doc.description);
}
if let Some(warning) = &results.warning {
println!("\n⚠️ Warning: {warning}");
}
println!("{sec}\n");
}