Merge branch 'main' into fix-attw

This commit is contained in:
Gergő Móricz
2024-09-11 19:45:13 +02:00
committed by GitHub
62 changed files with 898 additions and 280 deletions
+1 -1
View File
@@ -1,4 +1,4 @@
import FirecrawlApp from '@mendable/firecrawl-js';
import FirecrawlApp from 'firecrawl';
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
+1 -1
View File
@@ -1,4 +1,4 @@
import FirecrawlApp, { CrawlStatusResponse, ErrorResponse } from '@mendable/firecrawl-js';
import FirecrawlApp, { CrawlStatusResponse, ErrorResponse } from 'firecrawl';
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
+5 -3
View File
@@ -10,10 +10,8 @@
"license": "MIT",
"dependencies": {
"axios": "^1.6.8",
"dotenv": "^16.4.5",
"isows": "^1.0.4",
"typescript-event-target": "^1.1.1",
"uuid": "^9.0.1",
"zod": "^3.23.8",
"zod-to-json-schema": "^3.23.0"
},
@@ -25,10 +23,12 @@
"@types/mocha": "^10.0.6",
"@types/node": "^20.12.12",
"@types/uuid": "^9.0.8",
"dotenv": "^16.4.5",
"jest": "^29.7.0",
"ts-jest": "^29.2.2",
"tsup": "^8.2.4",
"typescript": "^5.4.5"
"typescript": "^5.4.5",
"uuid": "^9.0.1"
}
},
"node_modules/@ampproject/remapping": {
@@ -2502,6 +2502,7 @@
"version": "16.4.5",
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz",
"integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==",
"dev": true,
"engines": {
"node": ">=12"
},
@@ -5290,6 +5291,7 @@
"version": "9.0.1",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
"dev": true,
"funding": [
"https://github.com/sponsors/broofa",
"https://github.com/sponsors/ctavan"
+3 -3
View File
@@ -1,6 +1,6 @@
{
"name": "@mendable/firecrawl-js",
"version": "1.2.1",
"version": "1.2.3",
"description": "JavaScript SDK for Firecrawl API",
"main": "dist/index.js",
"types": "dist/index.d.ts",
@@ -26,10 +26,8 @@
"license": "MIT",
"dependencies": {
"axios": "^1.6.8",
"dotenv": "^16.4.5",
"isows": "^1.0.4",
"typescript-event-target": "^1.1.1",
"uuid": "^9.0.1",
"zod": "^3.23.8",
"zod-to-json-schema": "^3.23.0"
},
@@ -38,6 +36,8 @@
},
"homepage": "https://github.com/mendableai/firecrawl#readme",
"devDependencies": {
"uuid": "^9.0.1",
"dotenv": "^16.4.5",
"@jest/globals": "^29.7.0",
"@types/axios": "^0.14.0",
"@types/dotenv": "^8.2.0",
+43 -24
View File
@@ -1,5 +1,5 @@
import axios, { type AxiosResponse, type AxiosRequestHeaders } from "axios";
import { z } from "zod";
import type { ZodSchema } from "zod";
import { zodToJsonSchema } from "zod-to-json-schema";
import { WebSocket } from "isows";
import { TypedEventTarget } from "typescript-event-target";
@@ -81,7 +81,7 @@ export interface ScrapeParams {
onlyMainContent?: boolean;
extract?: {
prompt?: string;
schema?: z.ZodSchema | any;
schema?: ZodSchema | any;
systemPrompt?: string;
};
waitFor?: number;
@@ -131,15 +131,14 @@ export interface CrawlResponse {
*/
export interface CrawlStatusResponse {
success: true;
total: number;
status: "scraping" | "completed" | "failed" | "cancelled";
completed: number;
total: number;
creditsUsed: number;
expiresAt: Date;
status: "scraping" | "completed" | "failed";
next: string;
data?: FirecrawlDocument[];
error?: string;
}
next?: string;
data: FirecrawlDocument[];
};
/**
* Parameters for mapping operations.
@@ -329,9 +328,10 @@ export default class FirecrawlApp {
/**
* Checks the status of a crawl job using the Firecrawl API.
* @param id - The ID of the crawl operation.
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
* @returns The response containing the job status.
*/
async checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse> {
async checkCrawlStatus(id?: string, getAllData = false): Promise<CrawlStatusResponse | ErrorResponse> {
if (!id) {
throw new Error("No crawl ID provided");
}
@@ -342,17 +342,29 @@ export default class FirecrawlApp {
`${this.apiUrl}/v1/crawl/${id}`,
headers
);
if (response.status === 200) {
if (response.status === 200 && getAllData) {
let allData = response.data.data;
if (response.data.status === "completed") {
let statusData = response.data
if ("data" in statusData) {
let data = statusData.data;
while ('next' in statusData) {
statusData = (await this.getRequest(statusData.next, headers)).data;
data = data.concat(statusData.data);
}
allData = data;
}
}
return ({
success: true,
success: response.data.success,
status: response.data.status,
total: response.data.total,
completed: response.data.completed,
creditsUsed: response.data.creditsUsed,
expiresAt: new Date(response.data.expiresAt),
next: response.data.next,
data: response.data.data,
error: response.data.error
data: allData,
error: response.data.error,
})
} else {
this.handleError(response, "check crawl status");
@@ -452,22 +464,29 @@ export default class FirecrawlApp {
id: string,
headers: AxiosRequestHeaders,
checkInterval: number
): Promise<CrawlStatusResponse> {
): Promise<CrawlStatusResponse | ErrorResponse> {
while (true) {
const statusResponse: AxiosResponse = await this.getRequest(
let statusResponse: AxiosResponse = await this.getRequest(
`${this.apiUrl}/v1/crawl/${id}`,
headers
);
if (statusResponse.status === 200) {
const statusData = statusResponse.data;
if (statusData.status === "completed") {
if ("data" in statusData) {
return statusData;
} else {
throw new Error("Crawl job completed but no data was returned");
}
} else if (
["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)
let statusData = statusResponse.data;
if (statusData.status === "completed") {
if ("data" in statusData) {
let data = statusData.data;
while ('next' in statusData) {
statusResponse = await this.getRequest(statusData.next, headers);
statusData = statusResponse.data;
data = data.concat(statusData.data);
}
statusData.data = data;
return statusData;
} else {
throw new Error("Crawl job completed but no data was returned");
}
} else if (
["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)
) {
checkInterval = Math.max(checkInterval, 2);
await new Promise((resolve) =>
+72 -4
View File
@@ -9,8 +9,8 @@
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"@mendable/firecrawl-js": "^0.0.36",
"axios": "^1.6.8",
"firecrawl": "^1.2.0",
"ts-node": "^10.9.2",
"typescript": "^5.4.5",
"uuid": "^10.0.0",
@@ -422,12 +422,14 @@
}
},
"node_modules/@mendable/firecrawl-js": {
"version": "0.0.36",
"resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-0.0.36.tgz",
"integrity": "sha512-5zQMWUD49r6Q7cxj+QBthQ964Bm9fMooW4E8E4nIca3BMXCeEuQFVf5C3OEWwZf0SjJvR+5Yx2wUbXJWd1wCOA==",
"version": "1.2.2",
"resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-1.2.2.tgz",
"integrity": "sha512-2A1GzLD0bczlFIlcjxHcm/x8i76ndtV4EUzOfc81oOJ/HbycE2mbT6EUthoL+r4s5A8yO3bKr9o/GxmEn456VA==",
"dependencies": {
"axios": "^1.6.8",
"dotenv": "^16.4.5",
"isows": "^1.0.4",
"typescript-event-target": "^1.1.1",
"uuid": "^9.0.1",
"zod": "^3.23.8",
"zod-to-json-schema": "^3.23.0"
@@ -594,6 +596,32 @@
"@esbuild/win32-x64": "0.20.2"
}
},
"node_modules/firecrawl": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/firecrawl/-/firecrawl-1.2.0.tgz",
"integrity": "sha512-Sy1BCCvs5FhGc4yxPP7NG9iWnK8RXdvA1ZS/K1Gj+LrEN3iAT2WRzhYET7x8G2bif25F6rHJg57vdVb5sr6RyQ==",
"dependencies": {
"axios": "^1.6.8",
"dotenv": "^16.4.5",
"isows": "^1.0.4",
"typescript-event-target": "^1.1.1",
"uuid": "^9.0.1",
"zod": "^3.23.8",
"zod-to-json-schema": "^3.23.0"
}
},
"node_modules/firecrawl/node_modules/uuid": {
"version": "9.0.1",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
"funding": [
"https://github.com/sponsors/broofa",
"https://github.com/sponsors/ctavan"
],
"bin": {
"uuid": "dist/bin/uuid"
}
},
"node_modules/follow-redirects": {
"version": "1.15.6",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
@@ -652,6 +680,20 @@
"url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
}
},
"node_modules/isows": {
"version": "1.0.4",
"resolved": "https://registry.npmjs.org/isows/-/isows-1.0.4.tgz",
"integrity": "sha512-hEzjY+x9u9hPmBom9IIAqdJCwNLax+xrPb51vEPpERoFlIxgmZcHzsT5jKG06nvInKOBGvReAVz80Umed5CczQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/wagmi-dev"
}
],
"peerDependencies": {
"ws": "*"
}
},
"node_modules/make-error": {
"version": "1.3.6",
"resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz",
@@ -763,6 +805,11 @@
"node": ">=14.17"
}
},
"node_modules/typescript-event-target": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/typescript-event-target/-/typescript-event-target-1.1.1.tgz",
"integrity": "sha512-dFSOFBKV6uwaloBCCUhxlD3Pr/P1a/tJdcmPrTXCHlEFD3faj0mztjcGn6VBAhQ0/Bdy8K3VWrrqwbt/ffsYsg=="
},
"node_modules/undici-types": {
"version": "5.26.5",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
@@ -786,6 +833,27 @@
"resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",
"integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg=="
},
"node_modules/ws": {
"version": "8.18.0",
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.0.tgz",
"integrity": "sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==",
"peer": true,
"engines": {
"node": ">=10.0.0"
},
"peerDependencies": {
"bufferutil": "^4.0.1",
"utf-8-validate": ">=5.0.2"
},
"peerDependenciesMeta": {
"bufferutil": {
"optional": true
},
"utf-8-validate": {
"optional": true
}
}
},
"node_modules/yn": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz",
+1
View File
@@ -13,6 +13,7 @@
"dependencies": {
"@mendable/firecrawl-js": "^1.0.3",
"axios": "^1.6.8",
"firecrawl": "^1.2.0",
"ts-node": "^10.9.2",
"typescript": "^5.4.5",
"uuid": "^10.0.0",