This commit is contained in:
Nicolas
2024-05-08 16:38:49 -07:00
parent 9541ff6b30
commit c89964b230
9 changed files with 954 additions and 97 deletions
+37 -32
View File
@@ -7,9 +7,8 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
import axios from 'axios';
import dotenv from 'dotenv';
dotenv.config();
import axios from "axios";
import { zodToJsonSchema } from "zod-to-json-schema";
/**
* Main class for interacting with the Firecrawl API.
*/
@@ -19,9 +18,9 @@ export default class FirecrawlApp {
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
*/
constructor({ apiKey = null }) {
this.apiKey = apiKey || process.env.FIRECRAWL_API_KEY || '';
this.apiKey = apiKey || "";
if (!this.apiKey) {
throw new Error('No API key provided');
throw new Error("No API key provided");
}
}
/**
@@ -32,16 +31,18 @@ export default class FirecrawlApp {
*/
scrapeUrl(url_1) {
return __awaiter(this, arguments, void 0, function* (url, params = null) {
var _a;
const headers = {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`,
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
};
let jsonData = { url };
if (params) {
jsonData = Object.assign(Object.assign({}, jsonData), params);
let jsonData = Object.assign({ url }, params);
if ((_a = params === null || params === void 0 ? void 0 : params.extractorOptions) === null || _a === void 0 ? void 0 : _a.extractionSchema) {
const schema = zodToJsonSchema(params.extractorOptions.extractionSchema);
jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) });
}
try {
const response = yield axios.post('https://api.firecrawl.dev/v0/scrape', jsonData, { headers });
const response = yield axios.post("https://api.firecrawl.dev/v0/scrape", jsonData, { headers });
if (response.status === 200) {
const responseData = response.data;
if (responseData.success) {
@@ -52,13 +53,13 @@ export default class FirecrawlApp {
}
}
else {
this.handleError(response, 'scrape URL');
this.handleError(response, "scrape URL");
}
}
catch (error) {
throw new Error(error.message);
}
return { success: false, error: 'Internal server error.' };
return { success: false, error: "Internal server error." };
});
}
/**
@@ -70,15 +71,15 @@ export default class FirecrawlApp {
search(query_1) {
return __awaiter(this, arguments, void 0, function* (query, params = null) {
const headers = {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`,
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
};
let jsonData = { query };
if (params) {
jsonData = Object.assign(Object.assign({}, jsonData), params);
}
try {
const response = yield axios.post('https://api.firecrawl.dev/v0/search', jsonData, { headers });
const response = yield axios.post("https://api.firecrawl.dev/v0/search", jsonData, { headers });
if (response.status === 200) {
const responseData = response.data;
if (responseData.success) {
@@ -89,13 +90,13 @@ export default class FirecrawlApp {
}
}
else {
this.handleError(response, 'search');
this.handleError(response, "search");
}
}
catch (error) {
throw new Error(error.message);
}
return { success: false, error: 'Internal server error.' };
return { success: false, error: "Internal server error." };
});
}
/**
@@ -114,7 +115,7 @@ export default class FirecrawlApp {
jsonData = Object.assign(Object.assign({}, jsonData), params);
}
try {
const response = yield this.postRequest('https://api.firecrawl.dev/v0/crawl', jsonData, headers);
const response = yield this.postRequest("https://api.firecrawl.dev/v0/crawl", jsonData, headers);
if (response.status === 200) {
const jobId = response.data.jobId;
if (waitUntilDone) {
@@ -125,14 +126,14 @@ export default class FirecrawlApp {
}
}
else {
this.handleError(response, 'start crawl job');
this.handleError(response, "start crawl job");
}
}
catch (error) {
console.log(error);
throw new Error(error.message);
}
return { success: false, error: 'Internal server error.' };
return { success: false, error: "Internal server error." };
});
}
/**
@@ -149,13 +150,17 @@ export default class FirecrawlApp {
return response.data;
}
else {
this.handleError(response, 'check crawl status');
this.handleError(response, "check crawl status");
}
}
catch (error) {
throw new Error(error.message);
}
return { success: false, status: 'unknown', error: 'Internal server error.' };
return {
success: false,
status: "unknown",
error: "Internal server error.",
};
});
}
/**
@@ -164,8 +169,8 @@ export default class FirecrawlApp {
*/
prepareHeaders() {
return {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`,
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
};
}
/**
@@ -200,26 +205,26 @@ export default class FirecrawlApp {
const statusResponse = yield this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
if (statusResponse.status === 200) {
const statusData = statusResponse.data;
if (statusData.status === 'completed') {
if ('data' in statusData) {
if (statusData.status === "completed") {
if ("data" in statusData) {
return statusData.data;
}
else {
throw new Error('Crawl job completed but no data was returned');
throw new Error("Crawl job completed but no data was returned");
}
}
else if (['active', 'paused', 'pending', 'queued'].includes(statusData.status)) {
else if (["active", "paused", "pending", "queued"].includes(statusData.status)) {
if (timeout < 2) {
timeout = 2;
}
yield new Promise(resolve => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again
yield new Promise((resolve) => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again
}
else {
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
}
}
else {
this.handleError(statusResponse, 'check crawl status');
this.handleError(statusResponse, "check crawl status");
}
}
});
@@ -231,7 +236,7 @@ export default class FirecrawlApp {
*/
handleError(response, action) {
if ([402, 409, 500].includes(response.status)) {
const errorMessage = response.data.error || 'Unknown error occurred';
const errorMessage = response.data.error || "Unknown error occurred";
throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`);
}
else {
+21 -3
View File
@@ -1,15 +1,17 @@
{
"name": "@mendable/firecrawl-js",
"version": "0.0.13",
"version": "0.0.17-beta.8",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@mendable/firecrawl-js",
"version": "0.0.13",
"version": "0.0.17-beta.8",
"license": "MIT",
"dependencies": {
"axios": "^1.6.8"
"axios": "^1.6.8",
"zod": "^3.23.8",
"zod-to-json-schema": "^3.23.0"
},
"devDependencies": {
"@jest/globals": "^29.7.0",
@@ -3766,6 +3768,22 @@
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/zod": {
"version": "3.23.8",
"resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz",
"integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==",
"funding": {
"url": "https://github.com/sponsors/colinhacks"
}
},
"node_modules/zod-to-json-schema": {
"version": "3.23.0",
"resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.0.tgz",
"integrity": "sha512-az0uJ243PxsRIa2x1WmNE/pnuA05gUq/JB8Lwe1EDCCL/Fz9MgjYQ0fPlyc2Tcv6aF2ZA7WM5TWaRZVEFaAIag==",
"peerDependencies": {
"zod": "^3.23.3"
}
}
}
}
+5 -2
View File
@@ -1,6 +1,6 @@
{
"name": "@mendable/firecrawl-js",
"version": "0.0.16",
"version": "0.0.17",
"description": "JavaScript SDK for Firecrawl API",
"main": "build/index.js",
"types": "types/index.d.ts",
@@ -8,6 +8,7 @@
"scripts": {
"build": "tsc",
"publish": "npm run build && npm publish --access public",
"publish-beta": "npm run build && npm publish --access public --tag beta",
"test": "jest src/**/*.test.ts"
},
"repository": {
@@ -17,7 +18,9 @@
"author": "Mendable.ai",
"license": "MIT",
"dependencies": {
"axios": "^1.6.8"
"axios": "^1.6.8",
"zod": "^3.23.8",
"zod-to-json-schema": "^3.23.0"
},
"bugs": {
"url": "https://github.com/mendableai/firecrawl/issues"
+114 -45
View File
@@ -1,5 +1,6 @@
import axios, { AxiosResponse, AxiosRequestHeaders } from 'axios';
import axios, { AxiosResponse, AxiosRequestHeaders } from "axios";
import { z } from "zod";
import { zodToJsonSchema } from "zod-to-json-schema";
/**
* Configuration interface for FirecrawlApp.
*/
@@ -12,6 +13,11 @@ export interface FirecrawlAppConfig {
*/
export interface Params {
[key: string]: any;
extractorOptions?: {
extractionSchema: z.ZodSchema | any;
mode?: "llm-extraction";
extractionPrompt?: string;
};
}
/**
@@ -63,9 +69,9 @@ export default class FirecrawlApp {
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
*/
constructor({ apiKey = null }: FirecrawlAppConfig) {
this.apiKey = apiKey || '';
this.apiKey = apiKey || "";
if (!this.apiKey) {
throw new Error('No API key provided');
throw new Error("No API key provided");
}
}
@@ -75,31 +81,48 @@ export default class FirecrawlApp {
* @param {Params | null} params - Additional parameters for the scrape request.
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
*/
async scrapeUrl(url: string, params: Params | null = null): Promise<ScrapeResponse> {
async scrapeUrl(
url: string,
params: Params | null = null
): Promise<ScrapeResponse> {
const headers: AxiosRequestHeaders = {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`,
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
} as AxiosRequestHeaders;
let jsonData: Params = { url };
if (params) {
jsonData = { ...jsonData, ...params };
let jsonData: Params = { url, ...params };
if (params?.extractorOptions?.extractionSchema) {
const schema = zodToJsonSchema(
params.extractorOptions.extractionSchema as z.ZodSchema
);
jsonData = {
...jsonData,
extractorOptions: {
...params.extractorOptions,
extractionSchema: schema,
mode: params.extractorOptions.mode || "llm-extraction",
},
};
}
try {
const response: AxiosResponse = await axios.post('https://api.firecrawl.dev/v0/scrape', jsonData, { headers });
const response: AxiosResponse = await axios.post(
"https://api.firecrawl.dev/v0/scrape",
jsonData,
{ headers }
);
if (response.status === 200) {
const responseData = response.data;
if (responseData.success) {
return responseData;
return responseData;
} else {
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
}
} else {
this.handleError(response, 'scrape URL');
this.handleError(response, "scrape URL");
}
} catch (error: any) {
throw new Error(error.message);
}
return { success: false, error: 'Internal server error.' };
return { success: false, error: "Internal server error." };
}
/**
@@ -108,31 +131,38 @@ export default class FirecrawlApp {
* @param {Params | null} params - Additional parameters for the search request.
* @returns {Promise<SearchResponse>} The response from the search operation.
*/
async search(query: string, params: Params | null = null): Promise<SearchResponse> {
async search(
query: string,
params: Params | null = null
): Promise<SearchResponse> {
const headers: AxiosRequestHeaders = {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`,
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
} as AxiosRequestHeaders;
let jsonData: Params = { query };
if (params) {
jsonData = { ...jsonData, ...params };
}
try {
const response: AxiosResponse = await axios.post('https://api.firecrawl.dev/v0/search', jsonData, { headers });
const response: AxiosResponse = await axios.post(
"https://api.firecrawl.dev/v0/search",
jsonData,
{ headers }
);
if (response.status === 200) {
const responseData = response.data;
if (responseData.success) {
return responseData;
return responseData;
} else {
throw new Error(`Failed to search. Error: ${responseData.error}`);
}
} else {
this.handleError(response, 'search');
this.handleError(response, "search");
}
} catch (error: any) {
throw new Error(error.message);
}
return { success: false, error: 'Internal server error.' };
return { success: false, error: "Internal server error." };
}
/**
@@ -143,14 +173,23 @@ export default class FirecrawlApp {
* @param {number} timeout - Timeout in seconds for job status checks.
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
*/
async crawlUrl(url: string, params: Params | null = null, waitUntilDone: boolean = true, timeout: number = 2): Promise<CrawlResponse | any> {
async crawlUrl(
url: string,
params: Params | null = null,
waitUntilDone: boolean = true,
timeout: number = 2
): Promise<CrawlResponse | any> {
const headers = this.prepareHeaders();
let jsonData: Params = { url };
if (params) {
jsonData = { ...jsonData, ...params };
}
try {
const response: AxiosResponse = await this.postRequest('https://api.firecrawl.dev/v0/crawl', jsonData, headers);
const response: AxiosResponse = await this.postRequest(
"https://api.firecrawl.dev/v0/crawl",
jsonData,
headers
);
if (response.status === 200) {
const jobId: string = response.data.jobId;
if (waitUntilDone) {
@@ -159,13 +198,13 @@ export default class FirecrawlApp {
return { success: true, jobId };
}
} else {
this.handleError(response, 'start crawl job');
this.handleError(response, "start crawl job");
}
} catch (error: any) {
console.log(error)
console.log(error);
throw new Error(error.message);
}
return { success: false, error: 'Internal server error.' };
return { success: false, error: "Internal server error." };
}
/**
@@ -176,16 +215,23 @@ export default class FirecrawlApp {
async checkCrawlStatus(jobId: string): Promise<JobStatusResponse> {
const headers: AxiosRequestHeaders = this.prepareHeaders();
try {
const response: AxiosResponse = await this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
const response: AxiosResponse = await this.getRequest(
`https://api.firecrawl.dev/v0/crawl/status/${jobId}`,
headers
);
if (response.status === 200) {
return response.data;
} else {
this.handleError(response, 'check crawl status');
this.handleError(response, "check crawl status");
}
} catch (error: any) {
throw new Error(error.message);
}
return { success: false, status: 'unknown', error: 'Internal server error.' };
return {
success: false,
status: "unknown",
error: "Internal server error.",
};
}
/**
@@ -194,8 +240,8 @@ export default class FirecrawlApp {
*/
prepareHeaders(): AxiosRequestHeaders {
return {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`,
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
} as AxiosRequestHeaders;
}
@@ -206,7 +252,11 @@ export default class FirecrawlApp {
* @param {AxiosRequestHeaders} headers - The headers for the request.
* @returns {Promise<AxiosResponse>} The response from the POST request.
*/
postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise<AxiosResponse> {
postRequest(
url: string,
data: Params,
headers: AxiosRequestHeaders
): Promise<AxiosResponse> {
return axios.post(url, data, { headers });
}
@@ -216,7 +266,10 @@ export default class FirecrawlApp {
* @param {AxiosRequestHeaders} headers - The headers for the request.
* @returns {Promise<AxiosResponse>} The response from the GET request.
*/
getRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse> {
getRequest(
url: string,
headers: AxiosRequestHeaders
): Promise<AxiosResponse> {
return axios.get(url, { headers });
}
@@ -227,27 +280,38 @@ export default class FirecrawlApp {
* @param {number} timeout - Timeout in seconds for job status checks.
* @returns {Promise<any>} The final job status or data.
*/
async monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, timeout: number): Promise<any> {
async monitorJobStatus(
jobId: string,
headers: AxiosRequestHeaders,
timeout: number
): Promise<any> {
while (true) {
const statusResponse: AxiosResponse = await this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
const statusResponse: AxiosResponse = await this.getRequest(
`https://api.firecrawl.dev/v0/crawl/status/${jobId}`,
headers
);
if (statusResponse.status === 200) {
const statusData = statusResponse.data;
if (statusData.status === 'completed') {
if ('data' in statusData) {
if (statusData.status === "completed") {
if ("data" in statusData) {
return statusData.data;
} else {
throw new Error('Crawl job completed but no data was returned');
throw new Error("Crawl job completed but no data was returned");
}
} else if (['active', 'paused', 'pending', 'queued'].includes(statusData.status)) {
} else if (
["active", "paused", "pending", "queued"].includes(statusData.status)
) {
if (timeout < 2) {
timeout = 2;
}
await new Promise(resolve => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again
await new Promise((resolve) => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again
} else {
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
throw new Error(
`Crawl job failed or was stopped. Status: ${statusData.status}`
);
}
} else {
this.handleError(statusResponse, 'check crawl status');
this.handleError(statusResponse, "check crawl status");
}
}
}
@@ -259,10 +323,15 @@ export default class FirecrawlApp {
*/
handleError(response: AxiosResponse, action: string): void {
if ([402, 409, 500].includes(response.status)) {
const errorMessage: string = response.data.error || 'Unknown error occurred';
throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`);
const errorMessage: string =
response.data.error || "Unknown error occurred";
throw new Error(
`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`
);
} else {
throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`);
throw new Error(
`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`
);
}
}
}
+7 -1
View File
@@ -1,4 +1,5 @@
import { AxiosResponse, AxiosRequestHeaders } from 'axios';
import { AxiosResponse, AxiosRequestHeaders } from "axios";
import { z } from "zod";
/**
* Configuration interface for FirecrawlApp.
*/
@@ -10,6 +11,11 @@ export interface FirecrawlAppConfig {
*/
export interface Params {
[key: string]: any;
extractorOptions?: {
extractionSchema: z.ZodSchema | any;
mode?: "llm-extraction";
extractionPrompt?: string;
};
}
/**
* Response interface for scraping operations.