feat(rust): update rust sdk to support new features (#1446)

* chore(rust-sdk): cargo fmt * feat(rust-sdk): implement search api + example + test * feat(rust-sdk): implement crawl cancel api + example + test * feat(rust-sdk): implement crawl check errors api + example + test * feat(rust-sdk): implement batch crawl + test + example + Fix MapOptions * feat(rust-sdk): implement extract api + test + example * feat(rust-sdk): implement llmtxt api + test + example * chore(rust-sdk): correct mock tests * chore(rust-sdk): prep for cargo distribution
2025-04-18 07:59:59 +03:00
parent 33aece8e96
commit f2c01340d1
20 changed files with 4350 additions and 125 deletions
@@ -24,26 +24,26 @@ pub enum ScrapeFormats {
    Links,

    /// Will result in a URL to a screenshot of the page.
-    /// 
+    ///
    /// Can not be used in conjunction with `ScrapeFormats::ScreenshotFullPage`.
    #[serde(rename = "screenshot")]
    Screenshot,

    /// Will result in a URL to a full-page screenshot of the page.
-    /// 
+    ///
    /// Can not be used in conjunction with `ScrapeFormats::Screenshot`.
    #[serde(rename = "screenshot@fullPage")]
    ScreenshotFullPage,

    /// Will result in the results of an LLM extraction.
-    /// 
+    ///
    /// See `ScrapeOptions.extract` for more options.
    #[serde(rename = "extract")]
    Extract,
 }

 #[serde_with::skip_serializing_none]
-#[derive(Deserialize, Serialize, Debug, Default)]
+#[derive(Deserialize, Serialize, Debug, Default, Clone)]
 #[serde(rename_all = "camelCase")]
 pub struct ExtractOptions {
    /// Schema the output should adhere to, provided in JSON Schema format.
@@ -56,7 +56,7 @@ pub struct ExtractOptions {
 }

 #[serde_with::skip_serializing_none]
-#[derive(Deserialize, Serialize, Debug, Default)]
+#[derive(Deserialize, Serialize, Debug, Default, Clone)]
 #[serde(rename_all = "camelCase")]
 pub struct ScrapeOptions {
    /// Formats to extract from the page. (default: `[ Markdown ]`)
@@ -66,12 +66,12 @@ pub struct ScrapeOptions {
    pub only_main_content: Option<bool>,

    /// HTML tags to exclusively include.
-    /// 
+    ///
    /// For example, if you pass `div`, you will only get content from `<div>`s and their children.
    pub include_tags: Option<Vec<String>>,

    /// HTML tags to exclude.
-    /// 
+    ///
    /// For example, if you pass `img`, you will never get image URLs in your results.
    pub exclude_tags: Option<Vec<String>>,

@@ -131,7 +131,9 @@ impl FirecrawlApp {
            .await
            .map_err(|e| FirecrawlError::HttpError(format!("Scraping {:?}", url.as_ref()), e))?;

-        let response = self.handle_response::<ScrapeResponse>(response, "scrape URL").await?;
+        let response = self
+            .handle_response::<ScrapeResponse>(response, "scrape URL")
+            .await?;

        Ok(response.data)
    }