Nick: fixed sdk types and map preview
This commit is contained in:
@@ -246,6 +246,13 @@ export async function supaAuthenticateUser(
|
|||||||
subscriptionData.plan
|
subscriptionData.plan
|
||||||
);
|
);
|
||||||
break;
|
break;
|
||||||
|
case RateLimiterMode.Map:
|
||||||
|
rateLimiter = getRateLimiter(
|
||||||
|
RateLimiterMode.Map,
|
||||||
|
token,
|
||||||
|
subscriptionData.plan
|
||||||
|
);
|
||||||
|
break;
|
||||||
case RateLimiterMode.CrawlStatus:
|
case RateLimiterMode.CrawlStatus:
|
||||||
rateLimiter = getRateLimiter(RateLimiterMode.CrawlStatus, token);
|
rateLimiter = getRateLimiter(RateLimiterMode.CrawlStatus, token);
|
||||||
break;
|
break;
|
||||||
@@ -298,6 +305,8 @@ export async function supaAuthenticateUser(
|
|||||||
token === "this_is_just_a_preview_token" &&
|
token === "this_is_just_a_preview_token" &&
|
||||||
(mode === RateLimiterMode.Scrape ||
|
(mode === RateLimiterMode.Scrape ||
|
||||||
mode === RateLimiterMode.Preview ||
|
mode === RateLimiterMode.Preview ||
|
||||||
|
mode === RateLimiterMode.Map ||
|
||||||
|
// mode === RateLimiterMode.Crawl ||
|
||||||
mode === RateLimiterMode.Search)
|
mode === RateLimiterMode.Search)
|
||||||
) {
|
) {
|
||||||
return { success: true, team_id: "preview" };
|
return { success: true, team_id: "preview" };
|
||||||
|
|||||||
@@ -4,14 +4,14 @@ import { CrawlStatusResponse } from './firecrawl/src/index';
|
|||||||
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
|
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
|
||||||
|
|
||||||
// Scrape a website:
|
// Scrape a website:
|
||||||
const scrapeResult = await app.scrapeUrl('firecrawl.dev') as ScrapeResponse;
|
const scrapeResult = await app.scrapeUrl('firecrawl.dev');
|
||||||
|
|
||||||
if (scrapeResult) {
|
if (scrapeResult) {
|
||||||
console.log(scrapeResult.markdown)
|
console.log(scrapeResult.markdown)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Crawl a website:
|
// Crawl a website:
|
||||||
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false);
|
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludePaths: ['blog/*'], limit: 5}}, false);
|
||||||
console.log(crawlResult)
|
console.log(crawlResult)
|
||||||
|
|
||||||
const jobId: string = await crawlResult['jobId'];
|
const jobId: string = await crawlResult['jobId'];
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
import FirecrawlApp, { ScrapeResponseV0, CrawlStatusResponseV0, SearchResponseV0 } from './firecrawl/src/index' //'@mendable/firecrawl-js';
|
import FirecrawlApp, { ScrapeResponseV0, CrawlStatusResponseV0, SearchResponseV0 } from './firecrawl/src/index' //'@mendable/firecrawl-js';
|
||||||
import { z } from "zod";
|
import { z } from "zod";
|
||||||
|
|
||||||
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY", version: "v0"});
|
const app = new FirecrawlApp<"v0">({apiKey: "fc-YOUR_API_KEY", version: "v0"})
|
||||||
|
|
||||||
// Scrape a website:
|
// Scrape a website:
|
||||||
const scrapeResult = await app.scrapeUrl('firecrawl.dev') as ScrapeResponseV0;
|
const scrapeResult = await app.scrapeUrl('firecrawl.dev');
|
||||||
|
|
||||||
if (scrapeResult.data) {
|
if (scrapeResult.data) {
|
||||||
console.log(scrapeResult.data.content)
|
console.log(scrapeResult.data.content)
|
||||||
@@ -53,9 +53,7 @@ const zodSchema = z.object({
|
|||||||
.describe("Top 5 stories on Hacker News"),
|
.describe("Top 5 stories on Hacker News"),
|
||||||
});
|
});
|
||||||
|
|
||||||
let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
|
let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com");
|
||||||
extractorOptions: { extractionSchema: zodSchema },
|
|
||||||
});
|
|
||||||
|
|
||||||
if (llmExtractionResult.data) {
|
if (llmExtractionResult.data) {
|
||||||
console.log(llmExtractionResult.data[0].llm_extraction);
|
console.log(llmExtractionResult.data[0].llm_extraction);
|
||||||
|
|||||||
@@ -1,163 +1,331 @@
|
|||||||
import FirecrawlApp, { CrawlResponseV0, CrawlStatusResponse, CrawlStatusResponseV0, FirecrawlDocumentV0, ScrapeResponseV0, SearchResponseV0 } from '../../index';
|
import FirecrawlApp, {
|
||||||
import { v4 as uuidv4 } from 'uuid';
|
CrawlResponseV0,
|
||||||
import dotenv from 'dotenv';
|
CrawlStatusResponse,
|
||||||
import { describe, test, expect } from '@jest/globals';
|
CrawlStatusResponseV0,
|
||||||
|
FirecrawlDocumentV0,
|
||||||
|
ScrapeResponseV0,
|
||||||
|
SearchResponseV0,
|
||||||
|
} from "../../index";
|
||||||
|
import { v4 as uuidv4 } from "uuid";
|
||||||
|
import dotenv from "dotenv";
|
||||||
|
import { describe, test, expect } from "@jest/globals";
|
||||||
|
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
|
|
||||||
const TEST_API_KEY = process.env.TEST_API_KEY;
|
const TEST_API_KEY = process.env.TEST_API_KEY;
|
||||||
const API_URL = "http://127.0.0.1:3002";
|
const API_URL = "http://127.0.0.1:3002";
|
||||||
|
|
||||||
describe('FirecrawlApp E2E Tests', () => {
|
describe('FirecrawlApp<"v0"> E2E Tests', () => {
|
||||||
test.concurrent('should throw error for no API key', async () => {
|
test.concurrent("should throw error for no API key", async () => {
|
||||||
expect(() => {
|
expect(() => {
|
||||||
new FirecrawlApp({ apiKey: null, apiUrl: API_URL, version: "v0" });
|
new FirecrawlApp<"v0">({ apiKey: null, apiUrl: API_URL, version: "v0" });
|
||||||
}).toThrow("No API key provided");
|
}).toThrow("No API key provided");
|
||||||
});
|
});
|
||||||
|
|
||||||
test.concurrent('should throw error for invalid API key on scrape', async () => {
|
test.concurrent(
|
||||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL, version: "v0" });
|
"should throw error for invalid API key on scrape",
|
||||||
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
async () => {
|
||||||
|
const invalidApp = new FirecrawlApp<"v0">({
|
||||||
|
apiKey: "invalid_api_key",
|
||||||
|
apiUrl: API_URL,
|
||||||
|
version: "v0",
|
||||||
});
|
});
|
||||||
|
await expect(
|
||||||
|
invalidApp.scrapeUrl("https://roastmywebsite.ai")
|
||||||
|
).rejects.toThrow("Request failed with status code 401");
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
test.concurrent('should throw error for blocklisted URL on scrape', async () => {
|
test.concurrent(
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
"should throw error for blocklisted URL on scrape",
|
||||||
|
async () => {
|
||||||
|
const app = new FirecrawlApp<"v0">({
|
||||||
|
apiKey: TEST_API_KEY,
|
||||||
|
apiUrl: API_URL,
|
||||||
|
version: "v0",
|
||||||
|
});
|
||||||
const blocklistedUrl = "https://facebook.com/fake-test";
|
const blocklistedUrl = "https://facebook.com/fake-test";
|
||||||
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow(
|
||||||
|
"Request failed with status code 403"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
test.concurrent(
|
||||||
|
"should return successful response with valid preview token",
|
||||||
|
async () => {
|
||||||
|
const app = new FirecrawlApp<"v0">({
|
||||||
|
apiKey: "this_is_just_a_preview_token",
|
||||||
|
apiUrl: API_URL,
|
||||||
|
version: "v0",
|
||||||
});
|
});
|
||||||
|
const response = (await app.scrapeUrl(
|
||||||
test.concurrent('should return successful response with valid preview token', async () => {
|
"https://roastmywebsite.ai"
|
||||||
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL, version: "v0" });
|
)) as ScrapeResponseV0;
|
||||||
const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponseV0;
|
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.data?.content).toContain("_Roast_");
|
expect(response.data?.content).toContain("_Roast_");
|
||||||
}, 30000); // 30 seconds timeout
|
},
|
||||||
|
30000
|
||||||
|
); // 30 seconds timeout
|
||||||
|
|
||||||
test.concurrent('should return successful response for valid scrape', async () => {
|
test.concurrent(
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
"should return successful response for valid scrape",
|
||||||
const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponseV0;
|
async () => {
|
||||||
|
const app = new FirecrawlApp<"v0">({
|
||||||
|
apiKey: TEST_API_KEY,
|
||||||
|
apiUrl: API_URL,
|
||||||
|
version: "v0",
|
||||||
|
});
|
||||||
|
const response = (await app.scrapeUrl(
|
||||||
|
"https://roastmywebsite.ai"
|
||||||
|
)) as ScrapeResponseV0;
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.data?.content).toContain("_Roast_");
|
expect(response.data?.content).toContain("_Roast_");
|
||||||
expect(response.data).toHaveProperty('markdown');
|
expect(response.data).toHaveProperty("markdown");
|
||||||
expect(response.data).toHaveProperty('metadata');
|
expect(response.data).toHaveProperty("metadata");
|
||||||
expect(response.data).not.toHaveProperty('html');
|
expect(response.data).not.toHaveProperty("html");
|
||||||
}, 30000); // 30 seconds timeout
|
},
|
||||||
|
30000
|
||||||
|
); // 30 seconds timeout
|
||||||
|
|
||||||
test.concurrent('should return successful response with valid API key and include HTML', async () => {
|
test.concurrent(
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
"should return successful response with valid API key and include HTML",
|
||||||
const response = await app.scrapeUrl('https://roastmywebsite.ai', { pageOptions: { includeHtml: true } }) as ScrapeResponseV0;
|
async () => {
|
||||||
|
const app = new FirecrawlApp<"v0">({
|
||||||
|
apiKey: TEST_API_KEY,
|
||||||
|
apiUrl: API_URL,
|
||||||
|
version: "v0",
|
||||||
|
});
|
||||||
|
const response = (await app.scrapeUrl("https://roastmywebsite.ai", {
|
||||||
|
pageOptions: { includeHtml: true },
|
||||||
|
})) as ScrapeResponseV0;
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.data?.content).toContain("_Roast_");
|
expect(response.data?.content).toContain("_Roast_");
|
||||||
expect(response.data?.markdown).toContain("_Roast_");
|
expect(response.data?.markdown).toContain("_Roast_");
|
||||||
expect(response.data?.html).toContain("<h1");
|
expect(response.data?.html).toContain("<h1");
|
||||||
}, 30000); // 30 seconds timeout
|
},
|
||||||
|
30000
|
||||||
|
); // 30 seconds timeout
|
||||||
|
|
||||||
test.concurrent('should return successful response for valid scrape with PDF file', async () => {
|
test.concurrent(
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
"should return successful response for valid scrape with PDF file",
|
||||||
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf') as ScrapeResponseV0;
|
async () => {
|
||||||
expect(response).not.toBeNull();
|
const app = new FirecrawlApp<"v0">({
|
||||||
expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
apiKey: TEST_API_KEY,
|
||||||
}, 30000); // 30 seconds timeout
|
apiUrl: API_URL,
|
||||||
|
version: "v0",
|
||||||
test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => {
|
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
|
||||||
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001') as ScrapeResponseV0;
|
|
||||||
expect(response).not.toBeNull();
|
|
||||||
expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
|
||||||
}, 30000); // 30 seconds timeout
|
|
||||||
|
|
||||||
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
|
||||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL, version: "v0" });
|
|
||||||
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
|
||||||
});
|
});
|
||||||
|
const response = (await app.scrapeUrl(
|
||||||
|
"https://arxiv.org/pdf/astro-ph/9301001.pdf"
|
||||||
|
)) as ScrapeResponseV0;
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response.data?.content).toContain(
|
||||||
|
"We present spectrophotometric observations of the Broad Line Radio Galaxy"
|
||||||
|
);
|
||||||
|
},
|
||||||
|
30000
|
||||||
|
); // 30 seconds timeout
|
||||||
|
|
||||||
test.concurrent('should throw error for blocklisted URL on crawl', async () => {
|
test.concurrent(
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
"should return successful response for valid scrape with PDF file without explicit extension",
|
||||||
|
async () => {
|
||||||
|
const app = new FirecrawlApp<"v0">({
|
||||||
|
apiKey: TEST_API_KEY,
|
||||||
|
apiUrl: API_URL,
|
||||||
|
version: "v0",
|
||||||
|
});
|
||||||
|
const response = (await app.scrapeUrl(
|
||||||
|
"https://arxiv.org/pdf/astro-ph/9301001"
|
||||||
|
)) as ScrapeResponseV0;
|
||||||
|
expect(response).not.toBeNull();
|
||||||
|
expect(response.data?.content).toContain(
|
||||||
|
"We present spectrophotometric observations of the Broad Line Radio Galaxy"
|
||||||
|
);
|
||||||
|
},
|
||||||
|
30000
|
||||||
|
); // 30 seconds timeout
|
||||||
|
|
||||||
|
test.concurrent(
|
||||||
|
"should throw error for invalid API key on crawl",
|
||||||
|
async () => {
|
||||||
|
const invalidApp = new FirecrawlApp<"v0">({
|
||||||
|
apiKey: "invalid_api_key",
|
||||||
|
apiUrl: API_URL,
|
||||||
|
version: "v0",
|
||||||
|
});
|
||||||
|
await expect(
|
||||||
|
invalidApp.crawlUrl("https://roastmywebsite.ai")
|
||||||
|
).rejects.toThrow("Request failed with status code 401");
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
test.concurrent(
|
||||||
|
"should throw error for blocklisted URL on crawl",
|
||||||
|
async () => {
|
||||||
|
const app = new FirecrawlApp<"v0">({
|
||||||
|
apiKey: TEST_API_KEY,
|
||||||
|
apiUrl: API_URL,
|
||||||
|
version: "v0",
|
||||||
|
});
|
||||||
const blocklistedUrl = "https://twitter.com/fake-test";
|
const blocklistedUrl = "https://twitter.com/fake-test";
|
||||||
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow(
|
||||||
});
|
"Request failed with status code 403"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
test.concurrent('should return successful response for crawl and wait for completion', async () => {
|
test.concurrent(
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
"should return successful response for crawl and wait for completion",
|
||||||
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 10) as FirecrawlDocumentV0[];
|
async () => {
|
||||||
|
const app = new FirecrawlApp<"v0">({
|
||||||
|
apiKey: TEST_API_KEY,
|
||||||
|
apiUrl: API_URL,
|
||||||
|
version: "v0",
|
||||||
|
});
|
||||||
|
const response = (await app.crawlUrl(
|
||||||
|
"https://roastmywebsite.ai",
|
||||||
|
{ crawlerOptions: { excludes: ["blog/*"] } },
|
||||||
|
true,
|
||||||
|
10
|
||||||
|
)) as FirecrawlDocumentV0[];
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
console.log({ response });
|
console.log({ response });
|
||||||
expect(response[0].content).toContain("_Roast_");
|
expect(response[0].content).toContain("_Roast_");
|
||||||
}, 60000); // 60 seconds timeout
|
},
|
||||||
|
60000
|
||||||
|
); // 60 seconds timeout
|
||||||
|
|
||||||
test.concurrent('should handle idempotency key for crawl', async () => {
|
test.concurrent("should handle idempotency key for crawl", async () => {
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
const app = new FirecrawlApp<"v0">({
|
||||||
|
apiKey: TEST_API_KEY,
|
||||||
|
apiUrl: API_URL,
|
||||||
|
version: "v0",
|
||||||
|
});
|
||||||
const uniqueIdempotencyKey = uuidv4();
|
const uniqueIdempotencyKey = uuidv4();
|
||||||
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey) as CrawlResponseV0;
|
const response = (await app.crawlUrl(
|
||||||
|
"https://roastmywebsite.ai",
|
||||||
|
{ crawlerOptions: { excludes: ["blog/*"] } },
|
||||||
|
false,
|
||||||
|
2,
|
||||||
|
uniqueIdempotencyKey
|
||||||
|
)) as CrawlResponseV0;
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.jobId).toBeDefined();
|
expect(response.jobId).toBeDefined();
|
||||||
|
|
||||||
await expect(app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
|
await expect(
|
||||||
|
app.crawlUrl(
|
||||||
|
"https://roastmywebsite.ai",
|
||||||
|
{ crawlerOptions: { excludes: ["blog/*"] } },
|
||||||
|
true,
|
||||||
|
2,
|
||||||
|
uniqueIdempotencyKey
|
||||||
|
)
|
||||||
|
).rejects.toThrow("Request failed with status code 409");
|
||||||
});
|
});
|
||||||
|
|
||||||
test.concurrent('should check crawl status', async () => {
|
test.concurrent(
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
"should check crawl status",
|
||||||
const response: any = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false) as CrawlResponseV0;
|
async () => {
|
||||||
|
const app = new FirecrawlApp<"v0">({
|
||||||
|
apiKey: TEST_API_KEY,
|
||||||
|
apiUrl: API_URL,
|
||||||
|
version: "v0",
|
||||||
|
});
|
||||||
|
const response: any = (await app.crawlUrl(
|
||||||
|
"https://roastmywebsite.ai",
|
||||||
|
{ crawlerOptions: { excludes: ["blog/*"] } },
|
||||||
|
false
|
||||||
|
)) as CrawlResponseV0;
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.jobId).toBeDefined();
|
expect(response.jobId).toBeDefined();
|
||||||
|
|
||||||
let statusResponse: any = await app.checkCrawlStatus(response.jobId);
|
let statusResponse = await app.checkCrawlStatus(response.jobId);
|
||||||
const maxChecks = 15;
|
const maxChecks = 15;
|
||||||
let checks = 0;
|
let checks = 0;
|
||||||
|
|
||||||
while (statusResponse.status === 'active' && checks < maxChecks) {
|
while (statusResponse.status === "active" && checks < maxChecks) {
|
||||||
await new Promise(resolve => setTimeout(resolve, 5000));
|
await new Promise((resolve) => setTimeout(resolve, 5000));
|
||||||
expect(statusResponse.partial_data).not.toBeNull();
|
expect(statusResponse.partial_data).not.toBeNull();
|
||||||
// expect(statusResponse.current).toBeGreaterThanOrEqual(1);
|
// expect(statusResponse.current).toBeGreaterThanOrEqual(1);
|
||||||
statusResponse = await app.checkCrawlStatus(response.jobId) as CrawlStatusResponseV0;
|
statusResponse = (await app.checkCrawlStatus(
|
||||||
|
response.jobId
|
||||||
|
)) as CrawlStatusResponseV0;
|
||||||
checks++;
|
checks++;
|
||||||
}
|
}
|
||||||
|
|
||||||
expect(statusResponse).not.toBeNull();
|
expect(statusResponse).not.toBeNull();
|
||||||
expect(statusResponse.success).toBe(true);
|
expect(statusResponse.success).toBe(true);
|
||||||
expect(statusResponse.status).toBe('completed');
|
expect(statusResponse.status).toBe("completed");
|
||||||
expect(statusResponse.total).toEqual(statusResponse.current);
|
expect(statusResponse.total).toEqual(statusResponse.current);
|
||||||
expect(statusResponse.current_step).not.toBeNull();
|
expect(statusResponse.current_step).not.toBeNull();
|
||||||
expect(statusResponse.current).toBeGreaterThanOrEqual(1);
|
expect(statusResponse.current).toBeGreaterThanOrEqual(1);
|
||||||
|
|
||||||
expect(statusResponse?.data?.length).toBeGreaterThan(0);
|
expect(statusResponse?.data?.length).toBeGreaterThan(0);
|
||||||
}, 35000); // 35 seconds timeout
|
},
|
||||||
|
35000
|
||||||
|
); // 35 seconds timeout
|
||||||
|
|
||||||
test.concurrent('should return successful response for search', async () => {
|
test.concurrent(
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
"should return successful response for search",
|
||||||
const response = await app.search("test query") as SearchResponseV0;
|
async () => {
|
||||||
|
const app = new FirecrawlApp<"v0">({
|
||||||
|
apiKey: TEST_API_KEY,
|
||||||
|
apiUrl: API_URL,
|
||||||
|
version: "v0",
|
||||||
|
});
|
||||||
|
const response = (await app.search("test query")) as SearchResponseV0;
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response?.data?.[0]?.content).toBeDefined();
|
expect(response?.data?.[0]?.content).toBeDefined();
|
||||||
expect(response?.data?.length).toBeGreaterThan(2);
|
expect(response?.data?.length).toBeGreaterThan(2);
|
||||||
}, 30000); // 30 seconds timeout
|
|
||||||
|
|
||||||
test.concurrent('should throw error for invalid API key on search', async () => {
|
|
||||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL, version: "v0" });
|
|
||||||
await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
|
|
||||||
});
|
|
||||||
|
|
||||||
test.concurrent('should perform LLM extraction', async () => {
|
|
||||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL, version: "v0" });
|
|
||||||
const response = await app.scrapeUrl("https://mendable.ai", {
|
|
||||||
extractorOptions: {
|
|
||||||
mode: 'llm-extraction',
|
|
||||||
extractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
|
|
||||||
extractionSchema: {
|
|
||||||
type: 'object',
|
|
||||||
properties: {
|
|
||||||
company_mission: { type: 'string' },
|
|
||||||
supports_sso: { type: 'boolean' },
|
|
||||||
is_open_source: { type: 'boolean' }
|
|
||||||
},
|
},
|
||||||
required: ['company_mission', 'supports_sso', 'is_open_source']
|
30000
|
||||||
|
); // 30 seconds timeout
|
||||||
|
|
||||||
|
test.concurrent(
|
||||||
|
"should throw error for invalid API key on search",
|
||||||
|
async () => {
|
||||||
|
const invalidApp = new FirecrawlApp<"v0">({
|
||||||
|
apiKey: "invalid_api_key",
|
||||||
|
apiUrl: API_URL,
|
||||||
|
version: "v0",
|
||||||
|
});
|
||||||
|
await expect(invalidApp.search("test query")).rejects.toThrow(
|
||||||
|
"Request failed with status code 401"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
);
|
||||||
}) as ScrapeResponseV0;
|
|
||||||
|
test.concurrent(
|
||||||
|
"should perform LLM extraction",
|
||||||
|
async () => {
|
||||||
|
const app = new FirecrawlApp<"v0">({
|
||||||
|
apiKey: TEST_API_KEY,
|
||||||
|
apiUrl: API_URL,
|
||||||
|
version: "v0",
|
||||||
|
});
|
||||||
|
const response = (await app.scrapeUrl("https://mendable.ai", {
|
||||||
|
extractorOptions: {
|
||||||
|
mode: "llm-extraction",
|
||||||
|
extractionPrompt:
|
||||||
|
"Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
|
||||||
|
extractionSchema: {
|
||||||
|
type: "object",
|
||||||
|
properties: {
|
||||||
|
company_mission: { type: "string" },
|
||||||
|
supports_sso: { type: "boolean" },
|
||||||
|
is_open_source: { type: "boolean" },
|
||||||
|
},
|
||||||
|
required: ["company_mission", "supports_sso", "is_open_source"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})) as ScrapeResponseV0;
|
||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.data?.llm_extraction).toBeDefined();
|
expect(response.data?.llm_extraction).toBeDefined();
|
||||||
const llmExtraction = response.data?.llm_extraction;
|
const llmExtraction = response.data?.llm_extraction;
|
||||||
expect(llmExtraction?.company_mission).toBeDefined();
|
expect(llmExtraction?.company_mission).toBeDefined();
|
||||||
expect(typeof llmExtraction?.supports_sso).toBe('boolean');
|
expect(typeof llmExtraction?.supports_sso).toBe("boolean");
|
||||||
expect(typeof llmExtraction?.is_open_source).toBe('boolean');
|
expect(typeof llmExtraction?.is_open_source).toBe("boolean");
|
||||||
}, 30000); // 30 seconds timeout
|
},
|
||||||
|
30000
|
||||||
|
); // 30 seconds timeout
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ describe('the firecrawl JS SDK', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
const apiKey = 'YOUR_API_KEY'
|
const apiKey = 'YOUR_API_KEY'
|
||||||
const app = new FirecrawlApp({ apiKey });
|
const app = new FirecrawlApp<"v0">({ apiKey });
|
||||||
// Scrape a single URL
|
// Scrape a single URL
|
||||||
const url = 'https://mendable.ai';
|
const url = 'https://mendable.ai';
|
||||||
const scrapedData = await app.scrapeUrl(url);
|
const scrapedData = await app.scrapeUrl(url);
|
||||||
|
|||||||
@@ -222,7 +222,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|||||||
expect(response).not.toBeNull();
|
expect(response).not.toBeNull();
|
||||||
expect(response.id).toBeDefined();
|
expect(response.id).toBeDefined();
|
||||||
|
|
||||||
let statusResponse: any = await app.checkCrawlStatus(response.id) as CrawlStatusResponse;
|
let statusResponse = await app.checkCrawlStatus(response.id);
|
||||||
const maxChecks = 15;
|
const maxChecks = 15;
|
||||||
let checks = 0;
|
let checks = 0;
|
||||||
|
|
||||||
|
|||||||
@@ -315,10 +315,10 @@ export interface SearchResponseV0 {
|
|||||||
* Main class for interacting with the Firecrawl API.
|
* Main class for interacting with the Firecrawl API.
|
||||||
* Provides methods for scraping, searching, crawling, and mapping web content.
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
||||||
*/
|
*/
|
||||||
export default class FirecrawlApp {
|
export default class FirecrawlApp<T extends "v0" | "v1"> {
|
||||||
private apiKey: string;
|
private apiKey: string;
|
||||||
private apiUrl: string;
|
private apiUrl: string;
|
||||||
private version: "v0" | "v1";
|
public version: T;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initializes a new instance of the FirecrawlApp class.
|
* Initializes a new instance of the FirecrawlApp class.
|
||||||
@@ -327,7 +327,7 @@ export default class FirecrawlApp {
|
|||||||
constructor({ apiKey = null, apiUrl = null, version = "v1" }: FirecrawlAppConfig) {
|
constructor({ apiKey = null, apiUrl = null, version = "v1" }: FirecrawlAppConfig) {
|
||||||
this.apiKey = apiKey || "";
|
this.apiKey = apiKey || "";
|
||||||
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
||||||
this.version = version;
|
this.version = version as T;
|
||||||
if (!this.apiKey) {
|
if (!this.apiKey) {
|
||||||
throw new Error("No API key provided");
|
throw new Error("No API key provided");
|
||||||
}
|
}
|
||||||
@@ -342,7 +342,7 @@ export default class FirecrawlApp {
|
|||||||
async scrapeUrl(
|
async scrapeUrl(
|
||||||
url: string,
|
url: string,
|
||||||
params?: ScrapeParams | ScrapeParamsV0
|
params?: ScrapeParams | ScrapeParamsV0
|
||||||
): Promise<ScrapeResponse | ScrapeResponseV0> {
|
): Promise<this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse> {
|
||||||
const headers: AxiosRequestHeaders = {
|
const headers: AxiosRequestHeaders = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
Authorization: `Bearer ${this.apiKey}`,
|
Authorization: `Bearer ${this.apiKey}`,
|
||||||
@@ -372,16 +372,12 @@ export default class FirecrawlApp {
|
|||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
const responseData = response.data;
|
const responseData = response.data;
|
||||||
if (responseData.success) {
|
if (responseData.success) {
|
||||||
if (this.version == 'v0') {
|
return (this.version === 'v0' ? responseData as ScrapeResponseV0 : {
|
||||||
return responseData as ScrapeResponseV0;
|
|
||||||
} else {
|
|
||||||
return {
|
|
||||||
success: true,
|
success: true,
|
||||||
warning: responseData.warning,
|
warning: responseData.warning,
|
||||||
error: responseData.error,
|
error: responseData.error,
|
||||||
...responseData.data
|
...responseData.data
|
||||||
} as ScrapeResponse;
|
}) as ScrapeResponse;
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
||||||
}
|
}
|
||||||
@@ -391,7 +387,7 @@ export default class FirecrawlApp {
|
|||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
throw new Error(error.message);
|
throw new Error(error.message);
|
||||||
}
|
}
|
||||||
return { success: false, error: "Internal server error." };
|
return { success: false, error: "Internal server error." } as this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -405,7 +401,7 @@ export default class FirecrawlApp {
|
|||||||
params?: SearchParamsV0
|
params?: SearchParamsV0
|
||||||
): Promise<SearchResponseV0> {
|
): Promise<SearchResponseV0> {
|
||||||
if (this.version === "v1") {
|
if (this.version === "v1") {
|
||||||
throw new Error("Search is not supported in v1");
|
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
|
||||||
}
|
}
|
||||||
|
|
||||||
const headers: AxiosRequestHeaders = {
|
const headers: AxiosRequestHeaders = {
|
||||||
@@ -449,11 +445,15 @@ export default class FirecrawlApp {
|
|||||||
*/
|
*/
|
||||||
async crawlUrl(
|
async crawlUrl(
|
||||||
url: string,
|
url: string,
|
||||||
params?: CrawlParams | CrawlParamsV0,
|
params?: this['version'] extends 'v0' ? CrawlParamsV0 : CrawlParams,
|
||||||
waitUntilDone: boolean = true,
|
waitUntilDone: boolean = true,
|
||||||
pollInterval: number = 2,
|
pollInterval: number = 2,
|
||||||
idempotencyKey?: string
|
idempotencyKey?: string
|
||||||
): Promise<CrawlResponse | CrawlResponseV0 | CrawlStatusResponse | CrawlStatusResponseV0 | FirecrawlDocumentV0[]> {
|
): Promise<
|
||||||
|
this['version'] extends 'v0'
|
||||||
|
? CrawlResponseV0 | CrawlStatusResponseV0 | FirecrawlDocumentV0[]
|
||||||
|
: CrawlResponse | CrawlStatusResponse
|
||||||
|
> {
|
||||||
const headers = this.prepareHeaders(idempotencyKey);
|
const headers = this.prepareHeaders(idempotencyKey);
|
||||||
let jsonData: any = { url, ...params };
|
let jsonData: any = { url, ...params };
|
||||||
try {
|
try {
|
||||||
@@ -463,13 +463,13 @@ export default class FirecrawlApp {
|
|||||||
headers
|
headers
|
||||||
);
|
);
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
const id: string = this.version == 'v0' ? response.data.jobId : response.data.id;
|
const id: string = this.version === 'v0' ? response.data.jobId : response.data.id;
|
||||||
let checkUrl: string | undefined = undefined;
|
let checkUrl: string | undefined = undefined;
|
||||||
if (waitUntilDone) {
|
if (waitUntilDone) {
|
||||||
if (this.version == 'v1') { checkUrl = response.data.url }
|
if (this.version === 'v1') { checkUrl = response.data.url }
|
||||||
return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
|
return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
|
||||||
} else {
|
} else {
|
||||||
if (this.version == 'v0') {
|
if (this.version === 'v0') {
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
jobId: id
|
jobId: id
|
||||||
@@ -485,13 +485,13 @@ export default class FirecrawlApp {
|
|||||||
this.handleError(response, "start crawl job");
|
this.handleError(response, "start crawl job");
|
||||||
}
|
}
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
if (error.response.data.error) {
|
if (error.response?.data?.error) {
|
||||||
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
|
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
|
||||||
} else {
|
} else {
|
||||||
throw new Error(error.message);
|
throw new Error(error.message);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return { success: false, error: "Internal server error." };
|
return { success: false, error: "Internal server error." } as this['version'] extends 'v0' ? CrawlResponseV0 : CrawlResponse;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -499,7 +499,7 @@ export default class FirecrawlApp {
|
|||||||
* @param id - The ID of the crawl operation.
|
* @param id - The ID of the crawl operation.
|
||||||
* @returns The response containing the job status.
|
* @returns The response containing the job status.
|
||||||
*/
|
*/
|
||||||
async checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | CrawlStatusResponseV0> {
|
async checkCrawlStatus(id?: string): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse> {
|
||||||
if (!id) {
|
if (!id) {
|
||||||
throw new Error("No crawl ID provided");
|
throw new Error("No crawl ID provided");
|
||||||
}
|
}
|
||||||
@@ -507,14 +507,14 @@ export default class FirecrawlApp {
|
|||||||
const headers: AxiosRequestHeaders = this.prepareHeaders();
|
const headers: AxiosRequestHeaders = this.prepareHeaders();
|
||||||
try {
|
try {
|
||||||
const response: AxiosResponse = await this.getRequest(
|
const response: AxiosResponse = await this.getRequest(
|
||||||
this.version == 'v1' ?
|
this.version === 'v1' ?
|
||||||
this.apiUrl + `/${this.version}/crawl/${id}` :
|
`${this.apiUrl}/${this.version}/crawl/${id}` :
|
||||||
this.apiUrl + `/${this.version}/crawl/status/${id}`,
|
`${this.apiUrl}/${this.version}/crawl/status/${id}`,
|
||||||
headers
|
headers
|
||||||
);
|
);
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
if (this.version == 'v0') {
|
if (this.version === 'v0') {
|
||||||
return {
|
return ({
|
||||||
success: true,
|
success: true,
|
||||||
status: response.data.status,
|
status: response.data.status,
|
||||||
current: response.data.current,
|
current: response.data.current,
|
||||||
@@ -525,9 +525,9 @@ export default class FirecrawlApp {
|
|||||||
partial_data: !response.data.data
|
partial_data: !response.data.data
|
||||||
? response.data.partial_data
|
? response.data.partial_data
|
||||||
: undefined,
|
: undefined,
|
||||||
} as CrawlStatusResponseV0;
|
} as CrawlStatusResponseV0) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
|
||||||
} else if (this.version == 'v1') {
|
} else {
|
||||||
return {
|
return ({
|
||||||
success: true,
|
success: true,
|
||||||
status: response.data.status,
|
status: response.data.status,
|
||||||
totalCount: response.data.totalCount,
|
totalCount: response.data.totalCount,
|
||||||
@@ -536,7 +536,7 @@ export default class FirecrawlApp {
|
|||||||
next: response.data.next,
|
next: response.data.next,
|
||||||
data: response.data.data,
|
data: response.data.data,
|
||||||
error: response.data.error
|
error: response.data.error
|
||||||
} as CrawlStatusResponse;
|
} as CrawlStatusResponse) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
this.handleError(response, "check crawl status");
|
this.handleError(response, "check crawl status");
|
||||||
@@ -545,8 +545,8 @@ export default class FirecrawlApp {
|
|||||||
throw new Error(error.message);
|
throw new Error(error.message);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this.version == 'v0') {
|
return this.version === 'v0' ?
|
||||||
return {
|
({
|
||||||
success: false,
|
success: false,
|
||||||
status: "unknown",
|
status: "unknown",
|
||||||
current: 0,
|
current: 0,
|
||||||
@@ -554,13 +554,11 @@ export default class FirecrawlApp {
|
|||||||
current_step: "",
|
current_step: "",
|
||||||
total: 0,
|
total: 0,
|
||||||
error: "Internal server error.",
|
error: "Internal server error.",
|
||||||
} as CrawlStatusResponseV0;
|
} as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse) :
|
||||||
} else {
|
({
|
||||||
return {
|
|
||||||
success: false,
|
success: false,
|
||||||
error: "Internal server error.",
|
error: "Internal server error.",
|
||||||
} as CrawlStatusResponse;
|
} as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async mapUrl(url: string, params?: MapParams): Promise<MapResponse> {
|
async mapUrl(url: string, params?: MapParams): Promise<MapResponse> {
|
||||||
@@ -633,6 +631,7 @@ export default class FirecrawlApp {
|
|||||||
* @param id - The ID of the crawl operation.
|
* @param id - The ID of the crawl operation.
|
||||||
* @param headers - The headers for the request.
|
* @param headers - The headers for the request.
|
||||||
* @param checkInterval - Interval in seconds for job status checks.
|
* @param checkInterval - Interval in seconds for job status checks.
|
||||||
|
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
||||||
* @returns The final job status or data.
|
* @returns The final job status or data.
|
||||||
*/
|
*/
|
||||||
async monitorJobStatus(
|
async monitorJobStatus(
|
||||||
@@ -640,13 +639,13 @@ export default class FirecrawlApp {
|
|||||||
headers: AxiosRequestHeaders,
|
headers: AxiosRequestHeaders,
|
||||||
checkInterval: number,
|
checkInterval: number,
|
||||||
checkUrl?: string
|
checkUrl?: string
|
||||||
): Promise<CrawlStatusResponse | FirecrawlDocumentV0[]> {
|
): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 | FirecrawlDocumentV0[] : CrawlStatusResponse> {
|
||||||
let apiUrl: string = '';
|
let apiUrl: string = '';
|
||||||
while (true) {
|
while (true) {
|
||||||
if (this.version == 'v1') {
|
if (this.version === 'v1') {
|
||||||
apiUrl = checkUrl ?? this.apiUrl + `/v1/crawl/${id}`;
|
apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
|
||||||
} else if (this.version == 'v0') {
|
} else if (this.version === 'v0') {
|
||||||
apiUrl = checkUrl ?? this.apiUrl + `/v0/crawl/status/${id}`;
|
apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
|
||||||
}
|
}
|
||||||
const statusResponse: AxiosResponse = await this.getRequest(
|
const statusResponse: AxiosResponse = await this.getRequest(
|
||||||
apiUrl,
|
apiUrl,
|
||||||
@@ -656,19 +655,17 @@ export default class FirecrawlApp {
|
|||||||
const statusData = statusResponse.data;
|
const statusData = statusResponse.data;
|
||||||
if (statusData.status === "completed") {
|
if (statusData.status === "completed") {
|
||||||
if ("data" in statusData) {
|
if ("data" in statusData) {
|
||||||
return this.version == 'v0' ? statusData.data : statusData;
|
return this.version === 'v0' ? statusData.data : statusData;
|
||||||
} else {
|
} else {
|
||||||
throw new Error("Crawl job completed but no data was returned");
|
throw new Error("Crawl job completed but no data was returned");
|
||||||
}
|
}
|
||||||
} else if (
|
} else if (
|
||||||
["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)
|
["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)
|
||||||
) {
|
) {
|
||||||
if (checkInterval < 2) {
|
checkInterval = Math.max(checkInterval, 2);
|
||||||
checkInterval = 2;
|
|
||||||
}
|
|
||||||
await new Promise((resolve) =>
|
await new Promise((resolve) =>
|
||||||
setTimeout(resolve, checkInterval * 1000)
|
setTimeout(resolve, checkInterval * 1000)
|
||||||
); // Wait for the specified timeout before checking again
|
);
|
||||||
} else {
|
} else {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`Crawl job failed or was stopped. Status: ${statusData.status}`
|
`Crawl job failed or was stopped. Status: ${statusData.status}`
|
||||||
|
|||||||
Reference in New Issue
Block a user