feat(rust): update rust sdk to support new features (#1446)

* chore(rust-sdk): cargo fmt

* feat(rust-sdk): implement search api + example + test

* feat(rust-sdk): implement crawl cancel api + example + test

* feat(rust-sdk): implement crawl check errors api + example + test

* feat(rust-sdk): implement batch crawl + test + example

+ Fix MapOptions

* feat(rust-sdk): implement extract api + test + example

* feat(rust-sdk): implement llmtxt api + test + example

* chore(rust-sdk): correct mock tests

* chore(rust-sdk): prep for cargo distribution
This commit is contained in:
kkharji
2025-04-18 07:59:59 +03:00
committed by GitHub
parent 33aece8e96
commit f2c01340d1
20 changed files with 4350 additions and 125 deletions
+10 -8
View File
@@ -24,26 +24,26 @@ pub enum ScrapeFormats {
Links,
/// Will result in a URL to a screenshot of the page.
///
///
/// Can not be used in conjunction with `ScrapeFormats::ScreenshotFullPage`.
#[serde(rename = "screenshot")]
Screenshot,
/// Will result in a URL to a full-page screenshot of the page.
///
///
/// Can not be used in conjunction with `ScrapeFormats::Screenshot`.
#[serde(rename = "screenshot@fullPage")]
ScreenshotFullPage,
/// Will result in the results of an LLM extraction.
///
///
/// See `ScrapeOptions.extract` for more options.
#[serde(rename = "extract")]
Extract,
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default)]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ExtractOptions {
/// Schema the output should adhere to, provided in JSON Schema format.
@@ -56,7 +56,7 @@ pub struct ExtractOptions {
}
#[serde_with::skip_serializing_none]
#[derive(Deserialize, Serialize, Debug, Default)]
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ScrapeOptions {
/// Formats to extract from the page. (default: `[ Markdown ]`)
@@ -66,12 +66,12 @@ pub struct ScrapeOptions {
pub only_main_content: Option<bool>,
/// HTML tags to exclusively include.
///
///
/// For example, if you pass `div`, you will only get content from `<div>`s and their children.
pub include_tags: Option<Vec<String>>,
/// HTML tags to exclude.
///
///
/// For example, if you pass `img`, you will never get image URLs in your results.
pub exclude_tags: Option<Vec<String>>,
@@ -131,7 +131,9 @@ impl FirecrawlApp {
.await
.map_err(|e| FirecrawlError::HttpError(format!("Scraping {:?}", url.as_ref()), e))?;
let response = self.handle_response::<ScrapeResponse>(response, "scrape URL").await?;
let response = self
.handle_response::<ScrapeResponse>(response, "scrape URL")
.await?;
Ok(response.data)
}