Merge branch 'main' into fix-attw
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
import FirecrawlApp from '@mendable/firecrawl-js';
|
||||
import FirecrawlApp from 'firecrawl';
|
||||
|
||||
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import FirecrawlApp, { CrawlStatusResponse, ErrorResponse } from '@mendable/firecrawl-js';
|
||||
import FirecrawlApp, { CrawlStatusResponse, ErrorResponse } from 'firecrawl';
|
||||
|
||||
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
|
||||
|
||||
|
||||
Generated
+5
-3
@@ -10,10 +10,8 @@
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8",
|
||||
"dotenv": "^16.4.5",
|
||||
"isows": "^1.0.4",
|
||||
"typescript-event-target": "^1.1.1",
|
||||
"uuid": "^9.0.1",
|
||||
"zod": "^3.23.8",
|
||||
"zod-to-json-schema": "^3.23.0"
|
||||
},
|
||||
@@ -25,10 +23,12 @@
|
||||
"@types/mocha": "^10.0.6",
|
||||
"@types/node": "^20.12.12",
|
||||
"@types/uuid": "^9.0.8",
|
||||
"dotenv": "^16.4.5",
|
||||
"jest": "^29.7.0",
|
||||
"ts-jest": "^29.2.2",
|
||||
"tsup": "^8.2.4",
|
||||
"typescript": "^5.4.5"
|
||||
"typescript": "^5.4.5",
|
||||
"uuid": "^9.0.1"
|
||||
}
|
||||
},
|
||||
"node_modules/@ampproject/remapping": {
|
||||
@@ -2502,6 +2502,7 @@
|
||||
"version": "16.4.5",
|
||||
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz",
|
||||
"integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
@@ -5290,6 +5291,7 @@
|
||||
"version": "9.0.1",
|
||||
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
|
||||
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
|
||||
"dev": true,
|
||||
"funding": [
|
||||
"https://github.com/sponsors/broofa",
|
||||
"https://github.com/sponsors/ctavan"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "1.2.1",
|
||||
"version": "1.2.3",
|
||||
"description": "JavaScript SDK for Firecrawl API",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
@@ -26,10 +26,8 @@
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8",
|
||||
"dotenv": "^16.4.5",
|
||||
"isows": "^1.0.4",
|
||||
"typescript-event-target": "^1.1.1",
|
||||
"uuid": "^9.0.1",
|
||||
"zod": "^3.23.8",
|
||||
"zod-to-json-schema": "^3.23.0"
|
||||
},
|
||||
@@ -38,6 +36,8 @@
|
||||
},
|
||||
"homepage": "https://github.com/mendableai/firecrawl#readme",
|
||||
"devDependencies": {
|
||||
"uuid": "^9.0.1",
|
||||
"dotenv": "^16.4.5",
|
||||
"@jest/globals": "^29.7.0",
|
||||
"@types/axios": "^0.14.0",
|
||||
"@types/dotenv": "^8.2.0",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import axios, { type AxiosResponse, type AxiosRequestHeaders } from "axios";
|
||||
import { z } from "zod";
|
||||
import type { ZodSchema } from "zod";
|
||||
import { zodToJsonSchema } from "zod-to-json-schema";
|
||||
import { WebSocket } from "isows";
|
||||
import { TypedEventTarget } from "typescript-event-target";
|
||||
@@ -81,7 +81,7 @@ export interface ScrapeParams {
|
||||
onlyMainContent?: boolean;
|
||||
extract?: {
|
||||
prompt?: string;
|
||||
schema?: z.ZodSchema | any;
|
||||
schema?: ZodSchema | any;
|
||||
systemPrompt?: string;
|
||||
};
|
||||
waitFor?: number;
|
||||
@@ -131,15 +131,14 @@ export interface CrawlResponse {
|
||||
*/
|
||||
export interface CrawlStatusResponse {
|
||||
success: true;
|
||||
total: number;
|
||||
status: "scraping" | "completed" | "failed" | "cancelled";
|
||||
completed: number;
|
||||
total: number;
|
||||
creditsUsed: number;
|
||||
expiresAt: Date;
|
||||
status: "scraping" | "completed" | "failed";
|
||||
next: string;
|
||||
data?: FirecrawlDocument[];
|
||||
error?: string;
|
||||
}
|
||||
next?: string;
|
||||
data: FirecrawlDocument[];
|
||||
};
|
||||
|
||||
/**
|
||||
* Parameters for mapping operations.
|
||||
@@ -329,9 +328,10 @@ export default class FirecrawlApp {
|
||||
/**
|
||||
* Checks the status of a crawl job using the Firecrawl API.
|
||||
* @param id - The ID of the crawl operation.
|
||||
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
||||
* @returns The response containing the job status.
|
||||
*/
|
||||
async checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse> {
|
||||
async checkCrawlStatus(id?: string, getAllData = false): Promise<CrawlStatusResponse | ErrorResponse> {
|
||||
if (!id) {
|
||||
throw new Error("No crawl ID provided");
|
||||
}
|
||||
@@ -342,17 +342,29 @@ export default class FirecrawlApp {
|
||||
`${this.apiUrl}/v1/crawl/${id}`,
|
||||
headers
|
||||
);
|
||||
if (response.status === 200) {
|
||||
if (response.status === 200 && getAllData) {
|
||||
let allData = response.data.data;
|
||||
if (response.data.status === "completed") {
|
||||
let statusData = response.data
|
||||
if ("data" in statusData) {
|
||||
let data = statusData.data;
|
||||
while ('next' in statusData) {
|
||||
statusData = (await this.getRequest(statusData.next, headers)).data;
|
||||
data = data.concat(statusData.data);
|
||||
}
|
||||
allData = data;
|
||||
}
|
||||
}
|
||||
return ({
|
||||
success: true,
|
||||
success: response.data.success,
|
||||
status: response.data.status,
|
||||
total: response.data.total,
|
||||
completed: response.data.completed,
|
||||
creditsUsed: response.data.creditsUsed,
|
||||
expiresAt: new Date(response.data.expiresAt),
|
||||
next: response.data.next,
|
||||
data: response.data.data,
|
||||
error: response.data.error
|
||||
data: allData,
|
||||
error: response.data.error,
|
||||
})
|
||||
} else {
|
||||
this.handleError(response, "check crawl status");
|
||||
@@ -452,22 +464,29 @@ export default class FirecrawlApp {
|
||||
id: string,
|
||||
headers: AxiosRequestHeaders,
|
||||
checkInterval: number
|
||||
): Promise<CrawlStatusResponse> {
|
||||
): Promise<CrawlStatusResponse | ErrorResponse> {
|
||||
while (true) {
|
||||
const statusResponse: AxiosResponse = await this.getRequest(
|
||||
let statusResponse: AxiosResponse = await this.getRequest(
|
||||
`${this.apiUrl}/v1/crawl/${id}`,
|
||||
headers
|
||||
);
|
||||
if (statusResponse.status === 200) {
|
||||
const statusData = statusResponse.data;
|
||||
if (statusData.status === "completed") {
|
||||
if ("data" in statusData) {
|
||||
return statusData;
|
||||
} else {
|
||||
throw new Error("Crawl job completed but no data was returned");
|
||||
}
|
||||
} else if (
|
||||
["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)
|
||||
let statusData = statusResponse.data;
|
||||
if (statusData.status === "completed") {
|
||||
if ("data" in statusData) {
|
||||
let data = statusData.data;
|
||||
while ('next' in statusData) {
|
||||
statusResponse = await this.getRequest(statusData.next, headers);
|
||||
statusData = statusResponse.data;
|
||||
data = data.concat(statusData.data);
|
||||
}
|
||||
statusData.data = data;
|
||||
return statusData;
|
||||
} else {
|
||||
throw new Error("Crawl job completed but no data was returned");
|
||||
}
|
||||
} else if (
|
||||
["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)
|
||||
) {
|
||||
checkInterval = Math.max(checkInterval, 2);
|
||||
await new Promise((resolve) =>
|
||||
|
||||
Generated
+72
-4
@@ -9,8 +9,8 @@
|
||||
"version": "1.0.0",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"@mendable/firecrawl-js": "^0.0.36",
|
||||
"axios": "^1.6.8",
|
||||
"firecrawl": "^1.2.0",
|
||||
"ts-node": "^10.9.2",
|
||||
"typescript": "^5.4.5",
|
||||
"uuid": "^10.0.0",
|
||||
@@ -422,12 +422,14 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@mendable/firecrawl-js": {
|
||||
"version": "0.0.36",
|
||||
"resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-0.0.36.tgz",
|
||||
"integrity": "sha512-5zQMWUD49r6Q7cxj+QBthQ964Bm9fMooW4E8E4nIca3BMXCeEuQFVf5C3OEWwZf0SjJvR+5Yx2wUbXJWd1wCOA==",
|
||||
"version": "1.2.2",
|
||||
"resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-1.2.2.tgz",
|
||||
"integrity": "sha512-2A1GzLD0bczlFIlcjxHcm/x8i76ndtV4EUzOfc81oOJ/HbycE2mbT6EUthoL+r4s5A8yO3bKr9o/GxmEn456VA==",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8",
|
||||
"dotenv": "^16.4.5",
|
||||
"isows": "^1.0.4",
|
||||
"typescript-event-target": "^1.1.1",
|
||||
"uuid": "^9.0.1",
|
||||
"zod": "^3.23.8",
|
||||
"zod-to-json-schema": "^3.23.0"
|
||||
@@ -594,6 +596,32 @@
|
||||
"@esbuild/win32-x64": "0.20.2"
|
||||
}
|
||||
},
|
||||
"node_modules/firecrawl": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/firecrawl/-/firecrawl-1.2.0.tgz",
|
||||
"integrity": "sha512-Sy1BCCvs5FhGc4yxPP7NG9iWnK8RXdvA1ZS/K1Gj+LrEN3iAT2WRzhYET7x8G2bif25F6rHJg57vdVb5sr6RyQ==",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8",
|
||||
"dotenv": "^16.4.5",
|
||||
"isows": "^1.0.4",
|
||||
"typescript-event-target": "^1.1.1",
|
||||
"uuid": "^9.0.1",
|
||||
"zod": "^3.23.8",
|
||||
"zod-to-json-schema": "^3.23.0"
|
||||
}
|
||||
},
|
||||
"node_modules/firecrawl/node_modules/uuid": {
|
||||
"version": "9.0.1",
|
||||
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
|
||||
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
|
||||
"funding": [
|
||||
"https://github.com/sponsors/broofa",
|
||||
"https://github.com/sponsors/ctavan"
|
||||
],
|
||||
"bin": {
|
||||
"uuid": "dist/bin/uuid"
|
||||
}
|
||||
},
|
||||
"node_modules/follow-redirects": {
|
||||
"version": "1.15.6",
|
||||
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
|
||||
@@ -652,6 +680,20 @@
|
||||
"url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/isows": {
|
||||
"version": "1.0.4",
|
||||
"resolved": "https://registry.npmjs.org/isows/-/isows-1.0.4.tgz",
|
||||
"integrity": "sha512-hEzjY+x9u9hPmBom9IIAqdJCwNLax+xrPb51vEPpERoFlIxgmZcHzsT5jKG06nvInKOBGvReAVz80Umed5CczQ==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/wagmi-dev"
|
||||
}
|
||||
],
|
||||
"peerDependencies": {
|
||||
"ws": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/make-error": {
|
||||
"version": "1.3.6",
|
||||
"resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz",
|
||||
@@ -763,6 +805,11 @@
|
||||
"node": ">=14.17"
|
||||
}
|
||||
},
|
||||
"node_modules/typescript-event-target": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/typescript-event-target/-/typescript-event-target-1.1.1.tgz",
|
||||
"integrity": "sha512-dFSOFBKV6uwaloBCCUhxlD3Pr/P1a/tJdcmPrTXCHlEFD3faj0mztjcGn6VBAhQ0/Bdy8K3VWrrqwbt/ffsYsg=="
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "5.26.5",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
|
||||
@@ -786,6 +833,27 @@
|
||||
"resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",
|
||||
"integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg=="
|
||||
},
|
||||
"node_modules/ws": {
|
||||
"version": "8.18.0",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.0.tgz",
|
||||
"integrity": "sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bufferutil": "^4.0.1",
|
||||
"utf-8-validate": ">=5.0.2"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bufferutil": {
|
||||
"optional": true
|
||||
},
|
||||
"utf-8-validate": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/yn": {
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz",
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
"dependencies": {
|
||||
"@mendable/firecrawl-js": "^1.0.3",
|
||||
"axios": "^1.6.8",
|
||||
"firecrawl": "^1.2.0",
|
||||
"ts-node": "^10.9.2",
|
||||
"typescript": "^5.4.5",
|
||||
"uuid": "^10.0.0",
|
||||
|
||||
Reference in New Issue
Block a user