Nick: fixed sdk types and map preview

This commit is contained in:
Nicolas
2024-08-27 20:02:39 -03:00
parent 38ed845b93
commit 7dff5cdf49
7 changed files with 374 additions and 202 deletions
+9
View File
@@ -246,6 +246,13 @@ export async function supaAuthenticateUser(
subscriptionData.plan subscriptionData.plan
); );
break; break;
case RateLimiterMode.Map:
rateLimiter = getRateLimiter(
RateLimiterMode.Map,
token,
subscriptionData.plan
);
break;
case RateLimiterMode.CrawlStatus: case RateLimiterMode.CrawlStatus:
rateLimiter = getRateLimiter(RateLimiterMode.CrawlStatus, token); rateLimiter = getRateLimiter(RateLimiterMode.CrawlStatus, token);
break; break;
@@ -298,6 +305,8 @@ export async function supaAuthenticateUser(
token === "this_is_just_a_preview_token" && token === "this_is_just_a_preview_token" &&
(mode === RateLimiterMode.Scrape || (mode === RateLimiterMode.Scrape ||
mode === RateLimiterMode.Preview || mode === RateLimiterMode.Preview ||
mode === RateLimiterMode.Map ||
// mode === RateLimiterMode.Crawl ||
mode === RateLimiterMode.Search) mode === RateLimiterMode.Search)
) { ) {
return { success: true, team_id: "preview" }; return { success: true, team_id: "preview" };
+2 -2
View File
@@ -4,14 +4,14 @@ import { CrawlStatusResponse } from './firecrawl/src/index';
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"}); const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
// Scrape a website: // Scrape a website:
const scrapeResult = await app.scrapeUrl('firecrawl.dev') as ScrapeResponse; const scrapeResult = await app.scrapeUrl('firecrawl.dev');
if (scrapeResult) { if (scrapeResult) {
console.log(scrapeResult.markdown) console.log(scrapeResult.markdown)
} }
// Crawl a website: // Crawl a website:
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false); const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludePaths: ['blog/*'], limit: 5}}, false);
console.log(crawlResult) console.log(crawlResult)
const jobId: string = await crawlResult['jobId']; const jobId: string = await crawlResult['jobId'];
+3 -5
View File
@@ -1,10 +1,10 @@
import FirecrawlApp, { ScrapeResponseV0, CrawlStatusResponseV0, SearchResponseV0 } from './firecrawl/src/index' //'@mendable/firecrawl-js'; import FirecrawlApp, { ScrapeResponseV0, CrawlStatusResponseV0, SearchResponseV0 } from './firecrawl/src/index' //'@mendable/firecrawl-js';
import { z } from "zod"; import { z } from "zod";
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY", version: "v0"}); const app = new FirecrawlApp<"v0">({apiKey: "fc-YOUR_API_KEY", version: "v0"})
// Scrape a website: // Scrape a website:
const scrapeResult = await app.scrapeUrl('firecrawl.dev') as ScrapeResponseV0; const scrapeResult = await app.scrapeUrl('firecrawl.dev');
if (scrapeResult.data) { if (scrapeResult.data) {
console.log(scrapeResult.data.content) console.log(scrapeResult.data.content)
@@ -53,9 +53,7 @@ const zodSchema = z.object({
.describe("Top 5 stories on Hacker News"), .describe("Top 5 stories on Hacker News"),
}); });
let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", { let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com");
extractorOptions: { extractionSchema: zodSchema },
});
if (llmExtractionResult.data) { if (llmExtractionResult.data) {
console.log(llmExtractionResult.data[0].llm_extraction); console.log(llmExtractionResult.data[0].llm_extraction);
@@ -1,163 +1,331 @@
import FirecrawlApp, { CrawlResponseV0, CrawlStatusResponse, CrawlStatusResponseV0, FirecrawlDocumentV0, ScrapeResponseV0, SearchResponseV0 } from '../../index'; import FirecrawlApp, {
import { v4 as uuidv4 } from 'uuid'; CrawlResponseV0,
import dotenv from 'dotenv'; CrawlStatusResponse,
import { describe, test, expect } from '@jest/globals'; CrawlStatusResponseV0,
FirecrawlDocumentV0,
ScrapeResponseV0,
SearchResponseV0,
} from "../../index";
import { v4 as uuidv4 } from "uuid";
import dotenv from "dotenv";
import { describe, test, expect } from "@jest/globals";
dotenv.config(); dotenv.config();
const TEST_API_KEY = process.env.TEST_API_KEY; const TEST_API_KEY = process.env.TEST_API_KEY;
const API_URL = "http://127.0.0.1:3002"; const API_URL = "http://127.0.0.1:3002";
describe('FirecrawlApp E2E Tests', () => { describe('FirecrawlApp<"v0"> E2E Tests', () => {
test.concurrent('should throw error for no API key', async () => { test.concurrent("should throw error for no API key", async () => {
expect(() => { expect(() => {
new FirecrawlApp({ apiKey: null, apiUrl: API_URL, version: "v0" }); new FirecrawlApp<"v0">({ apiKey: null, apiUrl: API_URL, version: "v0" });
}).toThrow("No API key provided"); }).toThrow("No API key provided");
}); });
test.concurrent('should throw error for invalid API key on scrape', async () => { test.concurrent(
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL, version: "v0" }); "should throw error for invalid API key on scrape",
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401"); async () => {
const invalidApp = new FirecrawlApp<"v0">({
apiKey: "invalid_api_key",
apiUrl: API_URL,
version: "v0",
}); });
await expect(
invalidApp.scrapeUrl("https://roastmywebsite.ai")
).rejects.toThrow("Request failed with status code 401");
}
);
test.concurrent('should throw error for blocklisted URL on scrape', async () => { test.concurrent(
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" }); "should throw error for blocklisted URL on scrape",
async () => {
const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const blocklistedUrl = "https://facebook.com/fake-test"; const blocklistedUrl = "https://facebook.com/fake-test";
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403"); await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow(
"Request failed with status code 403"
);
}
);
test.concurrent(
"should return successful response with valid preview token",
async () => {
const app = new FirecrawlApp<"v0">({
apiKey: "this_is_just_a_preview_token",
apiUrl: API_URL,
version: "v0",
}); });
const response = (await app.scrapeUrl(
test.concurrent('should return successful response with valid preview token', async () => { "https://roastmywebsite.ai"
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL, version: "v0" }); )) as ScrapeResponseV0;
const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponseV0;
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response.data?.content).toContain("_Roast_"); expect(response.data?.content).toContain("_Roast_");
}, 30000); // 30 seconds timeout },
30000
); // 30 seconds timeout
test.concurrent('should return successful response for valid scrape', async () => { test.concurrent(
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" }); "should return successful response for valid scrape",
const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponseV0; async () => {
const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const response = (await app.scrapeUrl(
"https://roastmywebsite.ai"
)) as ScrapeResponseV0;
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response.data?.content).toContain("_Roast_"); expect(response.data?.content).toContain("_Roast_");
expect(response.data).toHaveProperty('markdown'); expect(response.data).toHaveProperty("markdown");
expect(response.data).toHaveProperty('metadata'); expect(response.data).toHaveProperty("metadata");
expect(response.data).not.toHaveProperty('html'); expect(response.data).not.toHaveProperty("html");
}, 30000); // 30 seconds timeout },
30000
); // 30 seconds timeout
test.concurrent('should return successful response with valid API key and include HTML', async () => { test.concurrent(
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" }); "should return successful response with valid API key and include HTML",
const response = await app.scrapeUrl('https://roastmywebsite.ai', { pageOptions: { includeHtml: true } }) as ScrapeResponseV0; async () => {
const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const response = (await app.scrapeUrl("https://roastmywebsite.ai", {
pageOptions: { includeHtml: true },
})) as ScrapeResponseV0;
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response.data?.content).toContain("_Roast_"); expect(response.data?.content).toContain("_Roast_");
expect(response.data?.markdown).toContain("_Roast_"); expect(response.data?.markdown).toContain("_Roast_");
expect(response.data?.html).toContain("<h1"); expect(response.data?.html).toContain("<h1");
}, 30000); // 30 seconds timeout },
30000
); // 30 seconds timeout
test.concurrent('should return successful response for valid scrape with PDF file', async () => { test.concurrent(
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" }); "should return successful response for valid scrape with PDF file",
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf') as ScrapeResponseV0; async () => {
expect(response).not.toBeNull(); const app = new FirecrawlApp<"v0">({
expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); apiKey: TEST_API_KEY,
}, 30000); // 30 seconds timeout apiUrl: API_URL,
version: "v0",
test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001') as ScrapeResponseV0;
expect(response).not.toBeNull();
expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
}, 30000); // 30 seconds timeout
test.concurrent('should throw error for invalid API key on crawl', async () => {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL, version: "v0" });
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
}); });
const response = (await app.scrapeUrl(
"https://arxiv.org/pdf/astro-ph/9301001.pdf"
)) as ScrapeResponseV0;
expect(response).not.toBeNull();
expect(response.data?.content).toContain(
"We present spectrophotometric observations of the Broad Line Radio Galaxy"
);
},
30000
); // 30 seconds timeout
test.concurrent('should throw error for blocklisted URL on crawl', async () => { test.concurrent(
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" }); "should return successful response for valid scrape with PDF file without explicit extension",
async () => {
const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const response = (await app.scrapeUrl(
"https://arxiv.org/pdf/astro-ph/9301001"
)) as ScrapeResponseV0;
expect(response).not.toBeNull();
expect(response.data?.content).toContain(
"We present spectrophotometric observations of the Broad Line Radio Galaxy"
);
},
30000
); // 30 seconds timeout
test.concurrent(
"should throw error for invalid API key on crawl",
async () => {
const invalidApp = new FirecrawlApp<"v0">({
apiKey: "invalid_api_key",
apiUrl: API_URL,
version: "v0",
});
await expect(
invalidApp.crawlUrl("https://roastmywebsite.ai")
).rejects.toThrow("Request failed with status code 401");
}
);
test.concurrent(
"should throw error for blocklisted URL on crawl",
async () => {
const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const blocklistedUrl = "https://twitter.com/fake-test"; const blocklistedUrl = "https://twitter.com/fake-test";
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403"); await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow(
}); "Request failed with status code 403"
);
}
);
test.concurrent('should return successful response for crawl and wait for completion', async () => { test.concurrent(
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" }); "should return successful response for crawl and wait for completion",
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 10) as FirecrawlDocumentV0[]; async () => {
const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const response = (await app.crawlUrl(
"https://roastmywebsite.ai",
{ crawlerOptions: { excludes: ["blog/*"] } },
true,
10
)) as FirecrawlDocumentV0[];
expect(response).not.toBeNull(); expect(response).not.toBeNull();
console.log({response}); console.log({ response });
expect(response[0].content).toContain("_Roast_"); expect(response[0].content).toContain("_Roast_");
}, 60000); // 60 seconds timeout },
60000
); // 60 seconds timeout
test.concurrent('should handle idempotency key for crawl', async () => { test.concurrent("should handle idempotency key for crawl", async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" }); const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const uniqueIdempotencyKey = uuidv4(); const uniqueIdempotencyKey = uuidv4();
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey) as CrawlResponseV0; const response = (await app.crawlUrl(
"https://roastmywebsite.ai",
{ crawlerOptions: { excludes: ["blog/*"] } },
false,
2,
uniqueIdempotencyKey
)) as CrawlResponseV0;
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response.jobId).toBeDefined(); expect(response.jobId).toBeDefined();
await expect(app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409"); await expect(
app.crawlUrl(
"https://roastmywebsite.ai",
{ crawlerOptions: { excludes: ["blog/*"] } },
true,
2,
uniqueIdempotencyKey
)
).rejects.toThrow("Request failed with status code 409");
}); });
test.concurrent('should check crawl status', async () => { test.concurrent(
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" }); "should check crawl status",
const response: any = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false) as CrawlResponseV0; async () => {
const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const response: any = (await app.crawlUrl(
"https://roastmywebsite.ai",
{ crawlerOptions: { excludes: ["blog/*"] } },
false
)) as CrawlResponseV0;
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response.jobId).toBeDefined(); expect(response.jobId).toBeDefined();
let statusResponse: any = await app.checkCrawlStatus(response.jobId); let statusResponse = await app.checkCrawlStatus(response.jobId);
const maxChecks = 15; const maxChecks = 15;
let checks = 0; let checks = 0;
while (statusResponse.status === 'active' && checks < maxChecks) { while (statusResponse.status === "active" && checks < maxChecks) {
await new Promise(resolve => setTimeout(resolve, 5000)); await new Promise((resolve) => setTimeout(resolve, 5000));
expect(statusResponse.partial_data).not.toBeNull(); expect(statusResponse.partial_data).not.toBeNull();
// expect(statusResponse.current).toBeGreaterThanOrEqual(1); // expect(statusResponse.current).toBeGreaterThanOrEqual(1);
statusResponse = await app.checkCrawlStatus(response.jobId) as CrawlStatusResponseV0; statusResponse = (await app.checkCrawlStatus(
response.jobId
)) as CrawlStatusResponseV0;
checks++; checks++;
} }
expect(statusResponse).not.toBeNull(); expect(statusResponse).not.toBeNull();
expect(statusResponse.success).toBe(true); expect(statusResponse.success).toBe(true);
expect(statusResponse.status).toBe('completed'); expect(statusResponse.status).toBe("completed");
expect(statusResponse.total).toEqual(statusResponse.current); expect(statusResponse.total).toEqual(statusResponse.current);
expect(statusResponse.current_step).not.toBeNull(); expect(statusResponse.current_step).not.toBeNull();
expect(statusResponse.current).toBeGreaterThanOrEqual(1); expect(statusResponse.current).toBeGreaterThanOrEqual(1);
expect(statusResponse?.data?.length).toBeGreaterThan(0); expect(statusResponse?.data?.length).toBeGreaterThan(0);
}, 35000); // 35 seconds timeout },
35000
); // 35 seconds timeout
test.concurrent('should return successful response for search', async () => { test.concurrent(
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" }); "should return successful response for search",
const response = await app.search("test query") as SearchResponseV0; async () => {
const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const response = (await app.search("test query")) as SearchResponseV0;
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response?.data?.[0]?.content).toBeDefined(); expect(response?.data?.[0]?.content).toBeDefined();
expect(response?.data?.length).toBeGreaterThan(2); expect(response?.data?.length).toBeGreaterThan(2);
}, 30000); // 30 seconds timeout
test.concurrent('should throw error for invalid API key on search', async () => {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL, version: "v0" });
await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
});
test.concurrent('should perform LLM extraction', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
const response = await app.scrapeUrl("https://mendable.ai", {
extractorOptions: {
mode: 'llm-extraction',
extractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
extractionSchema: {
type: 'object',
properties: {
company_mission: { type: 'string' },
supports_sso: { type: 'boolean' },
is_open_source: { type: 'boolean' }
}, },
required: ['company_mission', 'supports_sso', 'is_open_source'] 30000
); // 30 seconds timeout
test.concurrent(
"should throw error for invalid API key on search",
async () => {
const invalidApp = new FirecrawlApp<"v0">({
apiKey: "invalid_api_key",
apiUrl: API_URL,
version: "v0",
});
await expect(invalidApp.search("test query")).rejects.toThrow(
"Request failed with status code 401"
);
} }
} );
}) as ScrapeResponseV0;
test.concurrent(
"should perform LLM extraction",
async () => {
const app = new FirecrawlApp<"v0">({
apiKey: TEST_API_KEY,
apiUrl: API_URL,
version: "v0",
});
const response = (await app.scrapeUrl("https://mendable.ai", {
extractorOptions: {
mode: "llm-extraction",
extractionPrompt:
"Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
extractionSchema: {
type: "object",
properties: {
company_mission: { type: "string" },
supports_sso: { type: "boolean" },
is_open_source: { type: "boolean" },
},
required: ["company_mission", "supports_sso", "is_open_source"],
},
},
})) as ScrapeResponseV0;
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response.data?.llm_extraction).toBeDefined(); expect(response.data?.llm_extraction).toBeDefined();
const llmExtraction = response.data?.llm_extraction; const llmExtraction = response.data?.llm_extraction;
expect(llmExtraction?.company_mission).toBeDefined(); expect(llmExtraction?.company_mission).toBeDefined();
expect(typeof llmExtraction?.supports_sso).toBe('boolean'); expect(typeof llmExtraction?.supports_sso).toBe("boolean");
expect(typeof llmExtraction?.is_open_source).toBe('boolean'); expect(typeof llmExtraction?.is_open_source).toBe("boolean");
}, 30000); // 30 seconds timeout },
30000
); // 30 seconds timeout
}); });
@@ -31,7 +31,7 @@ describe('the firecrawl JS SDK', () => {
}); });
const apiKey = 'YOUR_API_KEY' const apiKey = 'YOUR_API_KEY'
const app = new FirecrawlApp({ apiKey }); const app = new FirecrawlApp<"v0">({ apiKey });
// Scrape a single URL // Scrape a single URL
const url = 'https://mendable.ai'; const url = 'https://mendable.ai';
const scrapedData = await app.scrapeUrl(url); const scrapedData = await app.scrapeUrl(url);
@@ -222,7 +222,7 @@ describe('FirecrawlApp E2E Tests', () => {
expect(response).not.toBeNull(); expect(response).not.toBeNull();
expect(response.id).toBeDefined(); expect(response.id).toBeDefined();
let statusResponse: any = await app.checkCrawlStatus(response.id) as CrawlStatusResponse; let statusResponse = await app.checkCrawlStatus(response.id);
const maxChecks = 15; const maxChecks = 15;
let checks = 0; let checks = 0;
+43 -46
View File
@@ -315,10 +315,10 @@ export interface SearchResponseV0 {
* Main class for interacting with the Firecrawl API. * Main class for interacting with the Firecrawl API.
* Provides methods for scraping, searching, crawling, and mapping web content. * Provides methods for scraping, searching, crawling, and mapping web content.
*/ */
export default class FirecrawlApp { export default class FirecrawlApp<T extends "v0" | "v1"> {
private apiKey: string; private apiKey: string;
private apiUrl: string; private apiUrl: string;
private version: "v0" | "v1"; public version: T;
/** /**
* Initializes a new instance of the FirecrawlApp class. * Initializes a new instance of the FirecrawlApp class.
@@ -327,7 +327,7 @@ export default class FirecrawlApp {
constructor({ apiKey = null, apiUrl = null, version = "v1" }: FirecrawlAppConfig) { constructor({ apiKey = null, apiUrl = null, version = "v1" }: FirecrawlAppConfig) {
this.apiKey = apiKey || ""; this.apiKey = apiKey || "";
this.apiUrl = apiUrl || "https://api.firecrawl.dev"; this.apiUrl = apiUrl || "https://api.firecrawl.dev";
this.version = version; this.version = version as T;
if (!this.apiKey) { if (!this.apiKey) {
throw new Error("No API key provided"); throw new Error("No API key provided");
} }
@@ -342,7 +342,7 @@ export default class FirecrawlApp {
async scrapeUrl( async scrapeUrl(
url: string, url: string,
params?: ScrapeParams | ScrapeParamsV0 params?: ScrapeParams | ScrapeParamsV0
): Promise<ScrapeResponse | ScrapeResponseV0> { ): Promise<this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse> {
const headers: AxiosRequestHeaders = { const headers: AxiosRequestHeaders = {
"Content-Type": "application/json", "Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`, Authorization: `Bearer ${this.apiKey}`,
@@ -372,16 +372,12 @@ export default class FirecrawlApp {
if (response.status === 200) { if (response.status === 200) {
const responseData = response.data; const responseData = response.data;
if (responseData.success) { if (responseData.success) {
if (this.version == 'v0') { return (this.version === 'v0' ? responseData as ScrapeResponseV0 : {
return responseData as ScrapeResponseV0;
} else {
return {
success: true, success: true,
warning: responseData.warning, warning: responseData.warning,
error: responseData.error, error: responseData.error,
...responseData.data ...responseData.data
} as ScrapeResponse; }) as ScrapeResponse;
}
} else { } else {
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`); throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
} }
@@ -391,7 +387,7 @@ export default class FirecrawlApp {
} catch (error: any) { } catch (error: any) {
throw new Error(error.message); throw new Error(error.message);
} }
return { success: false, error: "Internal server error." }; return { success: false, error: "Internal server error." } as this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse;
} }
/** /**
@@ -405,7 +401,7 @@ export default class FirecrawlApp {
params?: SearchParamsV0 params?: SearchParamsV0
): Promise<SearchResponseV0> { ): Promise<SearchResponseV0> {
if (this.version === "v1") { if (this.version === "v1") {
throw new Error("Search is not supported in v1"); throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
} }
const headers: AxiosRequestHeaders = { const headers: AxiosRequestHeaders = {
@@ -449,11 +445,15 @@ export default class FirecrawlApp {
*/ */
async crawlUrl( async crawlUrl(
url: string, url: string,
params?: CrawlParams | CrawlParamsV0, params?: this['version'] extends 'v0' ? CrawlParamsV0 : CrawlParams,
waitUntilDone: boolean = true, waitUntilDone: boolean = true,
pollInterval: number = 2, pollInterval: number = 2,
idempotencyKey?: string idempotencyKey?: string
): Promise<CrawlResponse | CrawlResponseV0 | CrawlStatusResponse | CrawlStatusResponseV0 | FirecrawlDocumentV0[]> { ): Promise<
this['version'] extends 'v0'
? CrawlResponseV0 | CrawlStatusResponseV0 | FirecrawlDocumentV0[]
: CrawlResponse | CrawlStatusResponse
> {
const headers = this.prepareHeaders(idempotencyKey); const headers = this.prepareHeaders(idempotencyKey);
let jsonData: any = { url, ...params }; let jsonData: any = { url, ...params };
try { try {
@@ -463,13 +463,13 @@ export default class FirecrawlApp {
headers headers
); );
if (response.status === 200) { if (response.status === 200) {
const id: string = this.version == 'v0' ? response.data.jobId : response.data.id; const id: string = this.version === 'v0' ? response.data.jobId : response.data.id;
let checkUrl: string | undefined = undefined; let checkUrl: string | undefined = undefined;
if (waitUntilDone) { if (waitUntilDone) {
if (this.version == 'v1') { checkUrl = response.data.url } if (this.version === 'v1') { checkUrl = response.data.url }
return this.monitorJobStatus(id, headers, pollInterval, checkUrl); return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
} else { } else {
if (this.version == 'v0') { if (this.version === 'v0') {
return { return {
success: true, success: true,
jobId: id jobId: id
@@ -485,13 +485,13 @@ export default class FirecrawlApp {
this.handleError(response, "start crawl job"); this.handleError(response, "start crawl job");
} }
} catch (error: any) { } catch (error: any) {
if (error.response.data.error) { if (error.response?.data?.error) {
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`); throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
} else { } else {
throw new Error(error.message); throw new Error(error.message);
} }
} }
return { success: false, error: "Internal server error." }; return { success: false, error: "Internal server error." } as this['version'] extends 'v0' ? CrawlResponseV0 : CrawlResponse;
} }
/** /**
@@ -499,7 +499,7 @@ export default class FirecrawlApp {
* @param id - The ID of the crawl operation. * @param id - The ID of the crawl operation.
* @returns The response containing the job status. * @returns The response containing the job status.
*/ */
async checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | CrawlStatusResponseV0> { async checkCrawlStatus(id?: string): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse> {
if (!id) { if (!id) {
throw new Error("No crawl ID provided"); throw new Error("No crawl ID provided");
} }
@@ -507,14 +507,14 @@ export default class FirecrawlApp {
const headers: AxiosRequestHeaders = this.prepareHeaders(); const headers: AxiosRequestHeaders = this.prepareHeaders();
try { try {
const response: AxiosResponse = await this.getRequest( const response: AxiosResponse = await this.getRequest(
this.version == 'v1' ? this.version === 'v1' ?
this.apiUrl + `/${this.version}/crawl/${id}` : `${this.apiUrl}/${this.version}/crawl/${id}` :
this.apiUrl + `/${this.version}/crawl/status/${id}`, `${this.apiUrl}/${this.version}/crawl/status/${id}`,
headers headers
); );
if (response.status === 200) { if (response.status === 200) {
if (this.version == 'v0') { if (this.version === 'v0') {
return { return ({
success: true, success: true,
status: response.data.status, status: response.data.status,
current: response.data.current, current: response.data.current,
@@ -525,9 +525,9 @@ export default class FirecrawlApp {
partial_data: !response.data.data partial_data: !response.data.data
? response.data.partial_data ? response.data.partial_data
: undefined, : undefined,
} as CrawlStatusResponseV0; } as CrawlStatusResponseV0) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
} else if (this.version == 'v1') { } else {
return { return ({
success: true, success: true,
status: response.data.status, status: response.data.status,
totalCount: response.data.totalCount, totalCount: response.data.totalCount,
@@ -536,7 +536,7 @@ export default class FirecrawlApp {
next: response.data.next, next: response.data.next,
data: response.data.data, data: response.data.data,
error: response.data.error error: response.data.error
} as CrawlStatusResponse; } as CrawlStatusResponse) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
} }
} else { } else {
this.handleError(response, "check crawl status"); this.handleError(response, "check crawl status");
@@ -545,8 +545,8 @@ export default class FirecrawlApp {
throw new Error(error.message); throw new Error(error.message);
} }
if (this.version == 'v0') { return this.version === 'v0' ?
return { ({
success: false, success: false,
status: "unknown", status: "unknown",
current: 0, current: 0,
@@ -554,13 +554,11 @@ export default class FirecrawlApp {
current_step: "", current_step: "",
total: 0, total: 0,
error: "Internal server error.", error: "Internal server error.",
} as CrawlStatusResponseV0; } as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse) :
} else { ({
return {
success: false, success: false,
error: "Internal server error.", error: "Internal server error.",
} as CrawlStatusResponse; } as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse);
}
} }
async mapUrl(url: string, params?: MapParams): Promise<MapResponse> { async mapUrl(url: string, params?: MapParams): Promise<MapResponse> {
@@ -633,6 +631,7 @@ export default class FirecrawlApp {
* @param id - The ID of the crawl operation. * @param id - The ID of the crawl operation.
* @param headers - The headers for the request. * @param headers - The headers for the request.
* @param checkInterval - Interval in seconds for job status checks. * @param checkInterval - Interval in seconds for job status checks.
* @param checkUrl - Optional URL to check the status (used for v1 API)
* @returns The final job status or data. * @returns The final job status or data.
*/ */
async monitorJobStatus( async monitorJobStatus(
@@ -640,13 +639,13 @@ export default class FirecrawlApp {
headers: AxiosRequestHeaders, headers: AxiosRequestHeaders,
checkInterval: number, checkInterval: number,
checkUrl?: string checkUrl?: string
): Promise<CrawlStatusResponse | FirecrawlDocumentV0[]> { ): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 | FirecrawlDocumentV0[] : CrawlStatusResponse> {
let apiUrl: string = ''; let apiUrl: string = '';
while (true) { while (true) {
if (this.version == 'v1') { if (this.version === 'v1') {
apiUrl = checkUrl ?? this.apiUrl + `/v1/crawl/${id}`; apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
} else if (this.version == 'v0') { } else if (this.version === 'v0') {
apiUrl = checkUrl ?? this.apiUrl + `/v0/crawl/status/${id}`; apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
} }
const statusResponse: AxiosResponse = await this.getRequest( const statusResponse: AxiosResponse = await this.getRequest(
apiUrl, apiUrl,
@@ -656,19 +655,17 @@ export default class FirecrawlApp {
const statusData = statusResponse.data; const statusData = statusResponse.data;
if (statusData.status === "completed") { if (statusData.status === "completed") {
if ("data" in statusData) { if ("data" in statusData) {
return this.version == 'v0' ? statusData.data : statusData; return this.version === 'v0' ? statusData.data : statusData;
} else { } else {
throw new Error("Crawl job completed but no data was returned"); throw new Error("Crawl job completed but no data was returned");
} }
} else if ( } else if (
["active", "paused", "pending", "queued", "scraping"].includes(statusData.status) ["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)
) { ) {
if (checkInterval < 2) { checkInterval = Math.max(checkInterval, 2);
checkInterval = 2;
}
await new Promise((resolve) => await new Promise((resolve) =>
setTimeout(resolve, checkInterval * 1000) setTimeout(resolve, checkInterval * 1000)
); // Wait for the specified timeout before checking again );
} else { } else {
throw new Error( throw new Error(
`Crawl job failed or was stopped. Status: ${statusData.status}` `Crawl job failed or was stopped. Status: ${statusData.status}`