diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index 120cc604..9a5933b6 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -246,6 +246,13 @@ export async function supaAuthenticateUser( subscriptionData.plan ); break; + case RateLimiterMode.Map: + rateLimiter = getRateLimiter( + RateLimiterMode.Map, + token, + subscriptionData.plan + ); + break; case RateLimiterMode.CrawlStatus: rateLimiter = getRateLimiter(RateLimiterMode.CrawlStatus, token); break; @@ -298,6 +305,8 @@ export async function supaAuthenticateUser( token === "this_is_just_a_preview_token" && (mode === RateLimiterMode.Scrape || mode === RateLimiterMode.Preview || + mode === RateLimiterMode.Map || + // mode === RateLimiterMode.Crawl || mode === RateLimiterMode.Search) ) { return { success: true, team_id: "preview" }; diff --git a/apps/js-sdk/example.ts b/apps/js-sdk/example.ts index 76e26db9..9a4d840c 100644 --- a/apps/js-sdk/example.ts +++ b/apps/js-sdk/example.ts @@ -4,14 +4,14 @@ import { CrawlStatusResponse } from './firecrawl/src/index'; const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"}); // Scrape a website: -const scrapeResult = await app.scrapeUrl('firecrawl.dev') as ScrapeResponse; +const scrapeResult = await app.scrapeUrl('firecrawl.dev'); if (scrapeResult) { console.log(scrapeResult.markdown) } // Crawl a website: -const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false); +const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludePaths: ['blog/*'], limit: 5}}, false); console.log(crawlResult) const jobId: string = await crawlResult['jobId']; diff --git a/apps/js-sdk/exampleV0.ts b/apps/js-sdk/exampleV0.ts index 58c46b6a..cecaaf24 100644 --- a/apps/js-sdk/exampleV0.ts +++ b/apps/js-sdk/exampleV0.ts @@ -1,10 +1,10 @@ import FirecrawlApp, { ScrapeResponseV0, CrawlStatusResponseV0, SearchResponseV0 } from './firecrawl/src/index' //'@mendable/firecrawl-js'; import { z } from "zod"; -const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY", version: "v0"}); +const app = new FirecrawlApp<"v0">({apiKey: "fc-YOUR_API_KEY", version: "v0"}) // Scrape a website: -const scrapeResult = await app.scrapeUrl('firecrawl.dev') as ScrapeResponseV0; +const scrapeResult = await app.scrapeUrl('firecrawl.dev'); if (scrapeResult.data) { console.log(scrapeResult.data.content) @@ -53,9 +53,7 @@ const zodSchema = z.object({ .describe("Top 5 stories on Hacker News"), }); -let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", { - extractorOptions: { extractionSchema: zodSchema }, -}); +let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com"); if (llmExtractionResult.data) { console.log(llmExtractionResult.data[0].llm_extraction); diff --git a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts index cd5fc948..d71688b5 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts @@ -1,163 +1,331 @@ -import FirecrawlApp, { CrawlResponseV0, CrawlStatusResponse, CrawlStatusResponseV0, FirecrawlDocumentV0, ScrapeResponseV0, SearchResponseV0 } from '../../index'; -import { v4 as uuidv4 } from 'uuid'; -import dotenv from 'dotenv'; -import { describe, test, expect } from '@jest/globals'; +import FirecrawlApp, { + CrawlResponseV0, + CrawlStatusResponse, + CrawlStatusResponseV0, + FirecrawlDocumentV0, + ScrapeResponseV0, + SearchResponseV0, +} from "../../index"; +import { v4 as uuidv4 } from "uuid"; +import dotenv from "dotenv"; +import { describe, test, expect } from "@jest/globals"; dotenv.config(); const TEST_API_KEY = process.env.TEST_API_KEY; const API_URL = "http://127.0.0.1:3002"; -describe('FirecrawlApp E2E Tests', () => { - test.concurrent('should throw error for no API key', async () => { +describe('FirecrawlApp<"v0"> E2E Tests', () => { + test.concurrent("should throw error for no API key", async () => { expect(() => { - new FirecrawlApp({ apiKey: null, apiUrl: API_URL, version: "v0" }); + new FirecrawlApp<"v0">({ apiKey: null, apiUrl: API_URL, version: "v0" }); }).toThrow("No API key provided"); }); - test.concurrent('should throw error for invalid API key on scrape', async () => { - const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL, version: "v0" }); - await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401"); - }); - - test.concurrent('should throw error for blocklisted URL on scrape', async () => { - const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" }); - const blocklistedUrl = "https://facebook.com/fake-test"; - await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403"); - }); - - test.concurrent('should return successful response with valid preview token', async () => { - const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL, version: "v0" }); - const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponseV0; - expect(response).not.toBeNull(); - expect(response.data?.content).toContain("_Roast_"); - }, 30000); // 30 seconds timeout - - test.concurrent('should return successful response for valid scrape', async () => { - const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" }); - const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponseV0; - expect(response).not.toBeNull(); - expect(response.data?.content).toContain("_Roast_"); - expect(response.data).toHaveProperty('markdown'); - expect(response.data).toHaveProperty('metadata'); - expect(response.data).not.toHaveProperty('html'); - }, 30000); // 30 seconds timeout - - test.concurrent('should return successful response with valid API key and include HTML', async () => { - const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" }); - const response = await app.scrapeUrl('https://roastmywebsite.ai', { pageOptions: { includeHtml: true } }) as ScrapeResponseV0; - expect(response).not.toBeNull(); - expect(response.data?.content).toContain("_Roast_"); - expect(response.data?.markdown).toContain("_Roast_"); - expect(response.data?.html).toContain(" { - const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" }); - const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf') as ScrapeResponseV0; - expect(response).not.toBeNull(); - expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); - }, 30000); // 30 seconds timeout - - test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => { - const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" }); - const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001') as ScrapeResponseV0; - expect(response).not.toBeNull(); - expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); - }, 30000); // 30 seconds timeout - - test.concurrent('should throw error for invalid API key on crawl', async () => { - const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL, version: "v0" }); - await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401"); - }); - - test.concurrent('should throw error for blocklisted URL on crawl', async () => { - const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" }); - const blocklistedUrl = "https://twitter.com/fake-test"; - await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403"); - }); - - test.concurrent('should return successful response for crawl and wait for completion', async () => { - const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" }); - const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 10) as FirecrawlDocumentV0[]; - expect(response).not.toBeNull(); - console.log({response}); - expect(response[0].content).toContain("_Roast_"); - }, 60000); // 60 seconds timeout - - test.concurrent('should handle idempotency key for crawl', async () => { - const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" }); - const uniqueIdempotencyKey = uuidv4(); - const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey) as CrawlResponseV0; - expect(response).not.toBeNull(); - expect(response.jobId).toBeDefined(); - - await expect(app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409"); - }); - - test.concurrent('should check crawl status', async () => { - const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" }); - const response: any = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false) as CrawlResponseV0; - expect(response).not.toBeNull(); - expect(response.jobId).toBeDefined(); - - let statusResponse: any = await app.checkCrawlStatus(response.jobId); - const maxChecks = 15; - let checks = 0; - - while (statusResponse.status === 'active' && checks < maxChecks) { - await new Promise(resolve => setTimeout(resolve, 5000)); - expect(statusResponse.partial_data).not.toBeNull(); - // expect(statusResponse.current).toBeGreaterThanOrEqual(1); - statusResponse = await app.checkCrawlStatus(response.jobId) as CrawlStatusResponseV0; - checks++; + test.concurrent( + "should throw error for invalid API key on scrape", + async () => { + const invalidApp = new FirecrawlApp<"v0">({ + apiKey: "invalid_api_key", + apiUrl: API_URL, + version: "v0", + }); + await expect( + invalidApp.scrapeUrl("https://roastmywebsite.ai") + ).rejects.toThrow("Request failed with status code 401"); } + ); - expect(statusResponse).not.toBeNull(); - expect(statusResponse.success).toBe(true); - expect(statusResponse.status).toBe('completed'); - expect(statusResponse.total).toEqual(statusResponse.current); - expect(statusResponse.current_step).not.toBeNull(); - expect(statusResponse.current).toBeGreaterThanOrEqual(1); + test.concurrent( + "should throw error for blocklisted URL on scrape", + async () => { + const app = new FirecrawlApp<"v0">({ + apiKey: TEST_API_KEY, + apiUrl: API_URL, + version: "v0", + }); + const blocklistedUrl = "https://facebook.com/fake-test"; + await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow( + "Request failed with status code 403" + ); + } + ); - expect(statusResponse?.data?.length).toBeGreaterThan(0); - }, 35000); // 35 seconds timeout + test.concurrent( + "should return successful response with valid preview token", + async () => { + const app = new FirecrawlApp<"v0">({ + apiKey: "this_is_just_a_preview_token", + apiUrl: API_URL, + version: "v0", + }); + const response = (await app.scrapeUrl( + "https://roastmywebsite.ai" + )) as ScrapeResponseV0; + expect(response).not.toBeNull(); + expect(response.data?.content).toContain("_Roast_"); + }, + 30000 + ); // 30 seconds timeout - test.concurrent('should return successful response for search', async () => { - const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" }); - const response = await app.search("test query") as SearchResponseV0; + test.concurrent( + "should return successful response for valid scrape", + async () => { + const app = new FirecrawlApp<"v0">({ + apiKey: TEST_API_KEY, + apiUrl: API_URL, + version: "v0", + }); + const response = (await app.scrapeUrl( + "https://roastmywebsite.ai" + )) as ScrapeResponseV0; + expect(response).not.toBeNull(); + expect(response.data?.content).toContain("_Roast_"); + expect(response.data).toHaveProperty("markdown"); + expect(response.data).toHaveProperty("metadata"); + expect(response.data).not.toHaveProperty("html"); + }, + 30000 + ); // 30 seconds timeout + + test.concurrent( + "should return successful response with valid API key and include HTML", + async () => { + const app = new FirecrawlApp<"v0">({ + apiKey: TEST_API_KEY, + apiUrl: API_URL, + version: "v0", + }); + const response = (await app.scrapeUrl("https://roastmywebsite.ai", { + pageOptions: { includeHtml: true }, + })) as ScrapeResponseV0; + expect(response).not.toBeNull(); + expect(response.data?.content).toContain("_Roast_"); + expect(response.data?.markdown).toContain("_Roast_"); + expect(response.data?.html).toContain(" { + const app = new FirecrawlApp<"v0">({ + apiKey: TEST_API_KEY, + apiUrl: API_URL, + version: "v0", + }); + const response = (await app.scrapeUrl( + "https://arxiv.org/pdf/astro-ph/9301001.pdf" + )) as ScrapeResponseV0; + expect(response).not.toBeNull(); + expect(response.data?.content).toContain( + "We present spectrophotometric observations of the Broad Line Radio Galaxy" + ); + }, + 30000 + ); // 30 seconds timeout + + test.concurrent( + "should return successful response for valid scrape with PDF file without explicit extension", + async () => { + const app = new FirecrawlApp<"v0">({ + apiKey: TEST_API_KEY, + apiUrl: API_URL, + version: "v0", + }); + const response = (await app.scrapeUrl( + "https://arxiv.org/pdf/astro-ph/9301001" + )) as ScrapeResponseV0; + expect(response).not.toBeNull(); + expect(response.data?.content).toContain( + "We present spectrophotometric observations of the Broad Line Radio Galaxy" + ); + }, + 30000 + ); // 30 seconds timeout + + test.concurrent( + "should throw error for invalid API key on crawl", + async () => { + const invalidApp = new FirecrawlApp<"v0">({ + apiKey: "invalid_api_key", + apiUrl: API_URL, + version: "v0", + }); + await expect( + invalidApp.crawlUrl("https://roastmywebsite.ai") + ).rejects.toThrow("Request failed with status code 401"); + } + ); + + test.concurrent( + "should throw error for blocklisted URL on crawl", + async () => { + const app = new FirecrawlApp<"v0">({ + apiKey: TEST_API_KEY, + apiUrl: API_URL, + version: "v0", + }); + const blocklistedUrl = "https://twitter.com/fake-test"; + await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow( + "Request failed with status code 403" + ); + } + ); + + test.concurrent( + "should return successful response for crawl and wait for completion", + async () => { + const app = new FirecrawlApp<"v0">({ + apiKey: TEST_API_KEY, + apiUrl: API_URL, + version: "v0", + }); + const response = (await app.crawlUrl( + "https://roastmywebsite.ai", + { crawlerOptions: { excludes: ["blog/*"] } }, + true, + 10 + )) as FirecrawlDocumentV0[]; + expect(response).not.toBeNull(); + console.log({ response }); + expect(response[0].content).toContain("_Roast_"); + }, + 60000 + ); // 60 seconds timeout + + test.concurrent("should handle idempotency key for crawl", async () => { + const app = new FirecrawlApp<"v0">({ + apiKey: TEST_API_KEY, + apiUrl: API_URL, + version: "v0", + }); + const uniqueIdempotencyKey = uuidv4(); + const response = (await app.crawlUrl( + "https://roastmywebsite.ai", + { crawlerOptions: { excludes: ["blog/*"] } }, + false, + 2, + uniqueIdempotencyKey + )) as CrawlResponseV0; expect(response).not.toBeNull(); - expect(response?.data?.[0]?.content).toBeDefined(); - expect(response?.data?.length).toBeGreaterThan(2); - }, 30000); // 30 seconds timeout + expect(response.jobId).toBeDefined(); - test.concurrent('should throw error for invalid API key on search', async () => { - const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL, version: "v0" }); - await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401"); + await expect( + app.crawlUrl( + "https://roastmywebsite.ai", + { crawlerOptions: { excludes: ["blog/*"] } }, + true, + 2, + uniqueIdempotencyKey + ) + ).rejects.toThrow("Request failed with status code 409"); }); - test.concurrent('should perform LLM extraction', async () => { - const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" }); - const response = await app.scrapeUrl("https://mendable.ai", { - extractorOptions: { - mode: 'llm-extraction', - extractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source", - extractionSchema: { - type: 'object', - properties: { - company_mission: { type: 'string' }, - supports_sso: { type: 'boolean' }, - is_open_source: { type: 'boolean' } - }, - required: ['company_mission', 'supports_sso', 'is_open_source'] - } + test.concurrent( + "should check crawl status", + async () => { + const app = new FirecrawlApp<"v0">({ + apiKey: TEST_API_KEY, + apiUrl: API_URL, + version: "v0", + }); + const response: any = (await app.crawlUrl( + "https://roastmywebsite.ai", + { crawlerOptions: { excludes: ["blog/*"] } }, + false + )) as CrawlResponseV0; + expect(response).not.toBeNull(); + expect(response.jobId).toBeDefined(); + + let statusResponse = await app.checkCrawlStatus(response.jobId); + const maxChecks = 15; + let checks = 0; + + while (statusResponse.status === "active" && checks < maxChecks) { + await new Promise((resolve) => setTimeout(resolve, 5000)); + expect(statusResponse.partial_data).not.toBeNull(); + // expect(statusResponse.current).toBeGreaterThanOrEqual(1); + statusResponse = (await app.checkCrawlStatus( + response.jobId + )) as CrawlStatusResponseV0; + checks++; } - }) as ScrapeResponseV0; - expect(response).not.toBeNull(); - expect(response.data?.llm_extraction).toBeDefined(); - const llmExtraction = response.data?.llm_extraction; - expect(llmExtraction?.company_mission).toBeDefined(); - expect(typeof llmExtraction?.supports_sso).toBe('boolean'); - expect(typeof llmExtraction?.is_open_source).toBe('boolean'); - }, 30000); // 30 seconds timeout + + expect(statusResponse).not.toBeNull(); + expect(statusResponse.success).toBe(true); + expect(statusResponse.status).toBe("completed"); + expect(statusResponse.total).toEqual(statusResponse.current); + expect(statusResponse.current_step).not.toBeNull(); + expect(statusResponse.current).toBeGreaterThanOrEqual(1); + + expect(statusResponse?.data?.length).toBeGreaterThan(0); + }, + 35000 + ); // 35 seconds timeout + + test.concurrent( + "should return successful response for search", + async () => { + const app = new FirecrawlApp<"v0">({ + apiKey: TEST_API_KEY, + apiUrl: API_URL, + version: "v0", + }); + const response = (await app.search("test query")) as SearchResponseV0; + expect(response).not.toBeNull(); + expect(response?.data?.[0]?.content).toBeDefined(); + expect(response?.data?.length).toBeGreaterThan(2); + }, + 30000 + ); // 30 seconds timeout + + test.concurrent( + "should throw error for invalid API key on search", + async () => { + const invalidApp = new FirecrawlApp<"v0">({ + apiKey: "invalid_api_key", + apiUrl: API_URL, + version: "v0", + }); + await expect(invalidApp.search("test query")).rejects.toThrow( + "Request failed with status code 401" + ); + } + ); + + test.concurrent( + "should perform LLM extraction", + async () => { + const app = new FirecrawlApp<"v0">({ + apiKey: TEST_API_KEY, + apiUrl: API_URL, + version: "v0", + }); + const response = (await app.scrapeUrl("https://mendable.ai", { + extractorOptions: { + mode: "llm-extraction", + extractionPrompt: + "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source", + extractionSchema: { + type: "object", + properties: { + company_mission: { type: "string" }, + supports_sso: { type: "boolean" }, + is_open_source: { type: "boolean" }, + }, + required: ["company_mission", "supports_sso", "is_open_source"], + }, + }, + })) as ScrapeResponseV0; + expect(response).not.toBeNull(); + expect(response.data?.llm_extraction).toBeDefined(); + const llmExtraction = response.data?.llm_extraction; + expect(llmExtraction?.company_mission).toBeDefined(); + expect(typeof llmExtraction?.supports_sso).toBe("boolean"); + expect(typeof llmExtraction?.is_open_source).toBe("boolean"); + }, + 30000 + ); // 30 seconds timeout }); diff --git a/apps/js-sdk/firecrawl/src/__tests__/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/index.test.ts index dcda96f7..92951237 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/index.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/index.test.ts @@ -31,7 +31,7 @@ describe('the firecrawl JS SDK', () => { }); const apiKey = 'YOUR_API_KEY' - const app = new FirecrawlApp({ apiKey }); + const app = new FirecrawlApp<"v0">({ apiKey }); // Scrape a single URL const url = 'https://mendable.ai'; const scrapedData = await app.scrapeUrl(url); diff --git a/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts index 81c870f5..05f9c566 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts @@ -222,7 +222,7 @@ describe('FirecrawlApp E2E Tests', () => { expect(response).not.toBeNull(); expect(response.id).toBeDefined(); - let statusResponse: any = await app.checkCrawlStatus(response.id) as CrawlStatusResponse; + let statusResponse = await app.checkCrawlStatus(response.id); const maxChecks = 15; let checks = 0; diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index d63ee1a9..390b3701 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -315,10 +315,10 @@ export interface SearchResponseV0 { * Main class for interacting with the Firecrawl API. * Provides methods for scraping, searching, crawling, and mapping web content. */ -export default class FirecrawlApp { +export default class FirecrawlApp { private apiKey: string; private apiUrl: string; - private version: "v0" | "v1"; + public version: T; /** * Initializes a new instance of the FirecrawlApp class. @@ -327,7 +327,7 @@ export default class FirecrawlApp { constructor({ apiKey = null, apiUrl = null, version = "v1" }: FirecrawlAppConfig) { this.apiKey = apiKey || ""; this.apiUrl = apiUrl || "https://api.firecrawl.dev"; - this.version = version; + this.version = version as T; if (!this.apiKey) { throw new Error("No API key provided"); } @@ -342,7 +342,7 @@ export default class FirecrawlApp { async scrapeUrl( url: string, params?: ScrapeParams | ScrapeParamsV0 - ): Promise { + ): Promise { const headers: AxiosRequestHeaders = { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}`, @@ -372,16 +372,12 @@ export default class FirecrawlApp { if (response.status === 200) { const responseData = response.data; if (responseData.success) { - if (this.version == 'v0') { - return responseData as ScrapeResponseV0; - } else { - return { - success: true, - warning: responseData.warning, - error: responseData.error, - ...responseData.data - } as ScrapeResponse; - } + return (this.version === 'v0' ? responseData as ScrapeResponseV0 : { + success: true, + warning: responseData.warning, + error: responseData.error, + ...responseData.data + }) as ScrapeResponse; } else { throw new Error(`Failed to scrape URL. Error: ${responseData.error}`); } @@ -391,7 +387,7 @@ export default class FirecrawlApp { } catch (error: any) { throw new Error(error.message); } - return { success: false, error: "Internal server error." }; + return { success: false, error: "Internal server error." } as this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse; } /** @@ -405,7 +401,7 @@ export default class FirecrawlApp { params?: SearchParamsV0 ): Promise { if (this.version === "v1") { - throw new Error("Search is not supported in v1"); + throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0."); } const headers: AxiosRequestHeaders = { @@ -449,11 +445,15 @@ export default class FirecrawlApp { */ async crawlUrl( url: string, - params?: CrawlParams | CrawlParamsV0, + params?: this['version'] extends 'v0' ? CrawlParamsV0 : CrawlParams, waitUntilDone: boolean = true, pollInterval: number = 2, idempotencyKey?: string - ): Promise { + ): Promise< + this['version'] extends 'v0' + ? CrawlResponseV0 | CrawlStatusResponseV0 | FirecrawlDocumentV0[] + : CrawlResponse | CrawlStatusResponse + > { const headers = this.prepareHeaders(idempotencyKey); let jsonData: any = { url, ...params }; try { @@ -463,13 +463,13 @@ export default class FirecrawlApp { headers ); if (response.status === 200) { - const id: string = this.version == 'v0' ? response.data.jobId : response.data.id; + const id: string = this.version === 'v0' ? response.data.jobId : response.data.id; let checkUrl: string | undefined = undefined; if (waitUntilDone) { - if (this.version == 'v1') { checkUrl = response.data.url } + if (this.version === 'v1') { checkUrl = response.data.url } return this.monitorJobStatus(id, headers, pollInterval, checkUrl); } else { - if (this.version == 'v0') { + if (this.version === 'v0') { return { success: true, jobId: id @@ -485,13 +485,13 @@ export default class FirecrawlApp { this.handleError(response, "start crawl job"); } } catch (error: any) { - if (error.response.data.error) { + if (error.response?.data?.error) { throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`); } else { throw new Error(error.message); } } - return { success: false, error: "Internal server error." }; + return { success: false, error: "Internal server error." } as this['version'] extends 'v0' ? CrawlResponseV0 : CrawlResponse; } /** @@ -499,7 +499,7 @@ export default class FirecrawlApp { * @param id - The ID of the crawl operation. * @returns The response containing the job status. */ - async checkCrawlStatus(id?: string): Promise { + async checkCrawlStatus(id?: string): Promise { if (!id) { throw new Error("No crawl ID provided"); } @@ -507,14 +507,14 @@ export default class FirecrawlApp { const headers: AxiosRequestHeaders = this.prepareHeaders(); try { const response: AxiosResponse = await this.getRequest( - this.version == 'v1' ? - this.apiUrl + `/${this.version}/crawl/${id}` : - this.apiUrl + `/${this.version}/crawl/status/${id}`, + this.version === 'v1' ? + `${this.apiUrl}/${this.version}/crawl/${id}` : + `${this.apiUrl}/${this.version}/crawl/status/${id}`, headers ); if (response.status === 200) { - if (this.version == 'v0') { - return { + if (this.version === 'v0') { + return ({ success: true, status: response.data.status, current: response.data.current, @@ -525,9 +525,9 @@ export default class FirecrawlApp { partial_data: !response.data.data ? response.data.partial_data : undefined, - } as CrawlStatusResponseV0; - } else if (this.version == 'v1') { - return { + } as CrawlStatusResponseV0) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse; + } else { + return ({ success: true, status: response.data.status, totalCount: response.data.totalCount, @@ -536,7 +536,7 @@ export default class FirecrawlApp { next: response.data.next, data: response.data.data, error: response.data.error - } as CrawlStatusResponse; + } as CrawlStatusResponse) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse; } } else { this.handleError(response, "check crawl status"); @@ -545,8 +545,8 @@ export default class FirecrawlApp { throw new Error(error.message); } - if (this.version == 'v0') { - return { + return this.version === 'v0' ? + ({ success: false, status: "unknown", current: 0, @@ -554,13 +554,11 @@ export default class FirecrawlApp { current_step: "", total: 0, error: "Internal server error.", - } as CrawlStatusResponseV0; - } else { - return { + } as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse) : + ({ success: false, error: "Internal server error.", - } as CrawlStatusResponse; - } + } as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse); } async mapUrl(url: string, params?: MapParams): Promise { @@ -633,6 +631,7 @@ export default class FirecrawlApp { * @param id - The ID of the crawl operation. * @param headers - The headers for the request. * @param checkInterval - Interval in seconds for job status checks. + * @param checkUrl - Optional URL to check the status (used for v1 API) * @returns The final job status or data. */ async monitorJobStatus( @@ -640,13 +639,13 @@ export default class FirecrawlApp { headers: AxiosRequestHeaders, checkInterval: number, checkUrl?: string - ): Promise { + ): Promise { let apiUrl: string = ''; while (true) { - if (this.version == 'v1') { - apiUrl = checkUrl ?? this.apiUrl + `/v1/crawl/${id}`; - } else if (this.version == 'v0') { - apiUrl = checkUrl ?? this.apiUrl + `/v0/crawl/status/${id}`; + if (this.version === 'v1') { + apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`; + } else if (this.version === 'v0') { + apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`; } const statusResponse: AxiosResponse = await this.getRequest( apiUrl, @@ -656,19 +655,17 @@ export default class FirecrawlApp { const statusData = statusResponse.data; if (statusData.status === "completed") { if ("data" in statusData) { - return this.version == 'v0' ? statusData.data : statusData; + return this.version === 'v0' ? statusData.data : statusData; } else { throw new Error("Crawl job completed but no data was returned"); } } else if ( ["active", "paused", "pending", "queued", "scraping"].includes(statusData.status) ) { - if (checkInterval < 2) { - checkInterval = 2; - } + checkInterval = Math.max(checkInterval, 2); await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000) - ); // Wait for the specified timeout before checking again + ); } else { throw new Error( `Crawl job failed or was stopped. Status: ${statusData.status}`