Merge branch 'main' into feat/idempotency-key
This commit is contained in:
@@ -77,6 +77,42 @@ To scrape a single URL with error handling, use the `scrapeUrl` method. It takes
|
||||
scrapeExample();
|
||||
```
|
||||
|
||||
### Extracting structured data from a URL
|
||||
|
||||
With LLM extraction, you can easily extract structured data from any URL. We support zod schemas to make it easier for you too. Here is how you to use it:
|
||||
|
||||
```js
|
||||
import { z } from "zod";
|
||||
|
||||
const zodSchema = z.object({
|
||||
top: z
|
||||
.array(
|
||||
z.object({
|
||||
title: z.string(),
|
||||
points: z.number(),
|
||||
by: z.string(),
|
||||
commentsURL: z.string(),
|
||||
})
|
||||
)
|
||||
.length(5)
|
||||
.describe("Top 5 stories on Hacker News"),
|
||||
});
|
||||
|
||||
let llmExtractionResult = await app.scrapeUrl("https://news.ycombinator.com", {
|
||||
extractorOptions: { extractionSchema: zodSchema },
|
||||
});
|
||||
|
||||
console.log(llmExtractionResult.data.llm_extraction);
|
||||
```
|
||||
|
||||
### Search for a query
|
||||
|
||||
Used to search the web, get the most relevant results, scrap each page and return the markdown.
|
||||
|
||||
```js
|
||||
query = 'what is mendable?'
|
||||
searchResult = app.search(query)
|
||||
```
|
||||
|
||||
### Crawling a Website
|
||||
|
||||
|
||||
@@ -7,7 +7,9 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
||||
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
||||
});
|
||||
};
|
||||
import axios from 'axios';
|
||||
import axios from "axios";
|
||||
import { z } from "zod";
|
||||
import { zodToJsonSchema } from "zod-to-json-schema";
|
||||
/**
|
||||
* Main class for interacting with the Firecrawl API.
|
||||
*/
|
||||
@@ -19,7 +21,7 @@ export default class FirecrawlApp {
|
||||
constructor({ apiKey = null }) {
|
||||
this.apiKey = apiKey || '';
|
||||
if (!this.apiKey) {
|
||||
throw new Error('No API key provided');
|
||||
throw new Error("No API key provided");
|
||||
}
|
||||
}
|
||||
/**
|
||||
@@ -30,16 +32,22 @@ export default class FirecrawlApp {
|
||||
*/
|
||||
scrapeUrl(url_1) {
|
||||
return __awaiter(this, arguments, void 0, function* (url, params = null) {
|
||||
var _a;
|
||||
const headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
};
|
||||
let jsonData = { url };
|
||||
if (params) {
|
||||
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
||||
let jsonData = Object.assign({ url }, params);
|
||||
if ((_a = params === null || params === void 0 ? void 0 : params.extractorOptions) === null || _a === void 0 ? void 0 : _a.extractionSchema) {
|
||||
let schema = params.extractorOptions.extractionSchema;
|
||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||
if (schema instanceof z.ZodSchema) {
|
||||
schema = zodToJsonSchema(schema);
|
||||
}
|
||||
jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) });
|
||||
}
|
||||
try {
|
||||
const response = yield axios.post('https://api.firecrawl.dev/v0/scrape', jsonData, { headers });
|
||||
const response = yield axios.post("https://api.firecrawl.dev/v0/scrape", jsonData, { headers });
|
||||
if (response.status === 200) {
|
||||
const responseData = response.data;
|
||||
if (responseData.success) {
|
||||
@@ -50,13 +58,13 @@ export default class FirecrawlApp {
|
||||
}
|
||||
}
|
||||
else {
|
||||
this.handleError(response, 'scrape URL');
|
||||
this.handleError(response, "scrape URL");
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: 'Internal server error.' };
|
||||
return { success: false, error: "Internal server error." };
|
||||
});
|
||||
}
|
||||
/**
|
||||
@@ -68,15 +76,15 @@ export default class FirecrawlApp {
|
||||
search(query_1) {
|
||||
return __awaiter(this, arguments, void 0, function* (query, params = null) {
|
||||
const headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
};
|
||||
let jsonData = { query };
|
||||
if (params) {
|
||||
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
||||
}
|
||||
try {
|
||||
const response = yield axios.post('https://api.firecrawl.dev/v0/search', jsonData, { headers });
|
||||
const response = yield axios.post("https://api.firecrawl.dev/v0/search", jsonData, { headers });
|
||||
if (response.status === 200) {
|
||||
const responseData = response.data;
|
||||
if (responseData.success) {
|
||||
@@ -87,13 +95,13 @@ export default class FirecrawlApp {
|
||||
}
|
||||
}
|
||||
else {
|
||||
this.handleError(response, 'search');
|
||||
this.handleError(response, "search");
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: 'Internal server error.' };
|
||||
return { success: false, error: "Internal server error." };
|
||||
});
|
||||
}
|
||||
/**
|
||||
@@ -113,7 +121,7 @@ export default class FirecrawlApp {
|
||||
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
||||
}
|
||||
try {
|
||||
const response = yield this.postRequest('https://api.firecrawl.dev/v0/crawl', jsonData, headers);
|
||||
const response = yield this.postRequest("https://api.firecrawl.dev/v0/crawl", jsonData, headers);
|
||||
if (response.status === 200) {
|
||||
const jobId = response.data.jobId;
|
||||
if (waitUntilDone) {
|
||||
@@ -124,14 +132,14 @@ export default class FirecrawlApp {
|
||||
}
|
||||
}
|
||||
else {
|
||||
this.handleError(response, 'start crawl job');
|
||||
this.handleError(response, "start crawl job");
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
console.log(error);
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: 'Internal server error.' };
|
||||
return { success: false, error: "Internal server error." };
|
||||
});
|
||||
}
|
||||
/**
|
||||
@@ -148,13 +156,17 @@ export default class FirecrawlApp {
|
||||
return response.data;
|
||||
}
|
||||
else {
|
||||
this.handleError(response, 'check crawl status');
|
||||
this.handleError(response, "check crawl status");
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, status: 'unknown', error: 'Internal server error.' };
|
||||
return {
|
||||
success: false,
|
||||
status: "unknown",
|
||||
error: "Internal server error.",
|
||||
};
|
||||
});
|
||||
}
|
||||
/**
|
||||
@@ -196,26 +208,26 @@ export default class FirecrawlApp {
|
||||
const statusResponse = yield this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
|
||||
if (statusResponse.status === 200) {
|
||||
const statusData = statusResponse.data;
|
||||
if (statusData.status === 'completed') {
|
||||
if ('data' in statusData) {
|
||||
if (statusData.status === "completed") {
|
||||
if ("data" in statusData) {
|
||||
return statusData.data;
|
||||
}
|
||||
else {
|
||||
throw new Error('Crawl job completed but no data was returned');
|
||||
throw new Error("Crawl job completed but no data was returned");
|
||||
}
|
||||
}
|
||||
else if (['active', 'paused', 'pending', 'queued'].includes(statusData.status)) {
|
||||
else if (["active", "paused", "pending", "queued"].includes(statusData.status)) {
|
||||
if (timeout < 2) {
|
||||
timeout = 2;
|
||||
}
|
||||
yield new Promise(resolve => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again
|
||||
yield new Promise((resolve) => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again
|
||||
}
|
||||
else {
|
||||
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
||||
}
|
||||
}
|
||||
else {
|
||||
this.handleError(statusResponse, 'check crawl status');
|
||||
this.handleError(statusResponse, "check crawl status");
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -226,8 +238,8 @@ export default class FirecrawlApp {
|
||||
* @param {string} action - The action being performed when the error occurred.
|
||||
*/
|
||||
handleError(response, action) {
|
||||
if ([402, 409, 500].includes(response.status)) {
|
||||
const errorMessage = response.data.error || 'Unknown error occurred';
|
||||
if ([402, 408, 409, 500].includes(response.status)) {
|
||||
const errorMessage = response.data.error || "Unknown error occurred";
|
||||
throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`);
|
||||
}
|
||||
else {
|
||||
|
||||
Generated
+21
-3
@@ -1,15 +1,17 @@
|
||||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "0.0.13",
|
||||
"version": "0.0.17-beta.8",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "0.0.13",
|
||||
"version": "0.0.17-beta.8",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8"
|
||||
"axios": "^1.6.8",
|
||||
"zod": "^3.23.8",
|
||||
"zod-to-json-schema": "^3.23.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@jest/globals": "^29.7.0",
|
||||
@@ -3766,6 +3768,22 @@
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/zod": {
|
||||
"version": "3.23.8",
|
||||
"resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz",
|
||||
"integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==",
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/colinhacks"
|
||||
}
|
||||
},
|
||||
"node_modules/zod-to-json-schema": {
|
||||
"version": "3.23.0",
|
||||
"resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.0.tgz",
|
||||
"integrity": "sha512-az0uJ243PxsRIa2x1WmNE/pnuA05gUq/JB8Lwe1EDCCL/Fz9MgjYQ0fPlyc2Tcv6aF2ZA7WM5TWaRZVEFaAIag==",
|
||||
"peerDependencies": {
|
||||
"zod": "^3.23.3"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "0.0.16",
|
||||
"version": "0.0.21",
|
||||
"description": "JavaScript SDK for Firecrawl API",
|
||||
"main": "build/index.js",
|
||||
"types": "types/index.d.ts",
|
||||
@@ -8,6 +8,7 @@
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"publish": "npm run build && npm publish --access public",
|
||||
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
||||
"test": "jest src/**/*.test.ts"
|
||||
},
|
||||
"repository": {
|
||||
@@ -17,7 +18,9 @@
|
||||
"author": "Mendable.ai",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8"
|
||||
"axios": "^1.6.8",
|
||||
"zod": "^3.23.8",
|
||||
"zod-to-json-schema": "^3.23.0"
|
||||
},
|
||||
"bugs": {
|
||||
"url": "https://github.com/mendableai/firecrawl/issues"
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import axios, { AxiosResponse, AxiosRequestHeaders } from 'axios';
|
||||
|
||||
import axios, { AxiosResponse, AxiosRequestHeaders } from "axios";
|
||||
import { z } from "zod";
|
||||
import { zodToJsonSchema } from "zod-to-json-schema";
|
||||
/**
|
||||
* Configuration interface for FirecrawlApp.
|
||||
*/
|
||||
@@ -12,6 +13,11 @@ export interface FirecrawlAppConfig {
|
||||
*/
|
||||
export interface Params {
|
||||
[key: string]: any;
|
||||
extractorOptions?: {
|
||||
extractionSchema: z.ZodSchema | any;
|
||||
mode?: "llm-extraction";
|
||||
extractionPrompt?: string;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -63,9 +69,9 @@ export default class FirecrawlApp {
|
||||
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
||||
*/
|
||||
constructor({ apiKey = null }: FirecrawlAppConfig) {
|
||||
this.apiKey = apiKey || '';
|
||||
this.apiKey = apiKey || "";
|
||||
if (!this.apiKey) {
|
||||
throw new Error('No API key provided');
|
||||
throw new Error("No API key provided");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,31 +81,50 @@ export default class FirecrawlApp {
|
||||
* @param {Params | null} params - Additional parameters for the scrape request.
|
||||
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
|
||||
*/
|
||||
async scrapeUrl(url: string, params: Params | null = null): Promise<ScrapeResponse> {
|
||||
async scrapeUrl(
|
||||
url: string,
|
||||
params: Params | null = null
|
||||
): Promise<ScrapeResponse> {
|
||||
const headers: AxiosRequestHeaders = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
} as AxiosRequestHeaders;
|
||||
let jsonData: Params = { url };
|
||||
if (params) {
|
||||
jsonData = { ...jsonData, ...params };
|
||||
let jsonData: Params = { url, ...params };
|
||||
if (params?.extractorOptions?.extractionSchema) {
|
||||
let schema = params.extractorOptions.extractionSchema;
|
||||
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
||||
if (schema instanceof z.ZodSchema) {
|
||||
schema = zodToJsonSchema(schema);
|
||||
}
|
||||
jsonData = {
|
||||
...jsonData,
|
||||
extractorOptions: {
|
||||
...params.extractorOptions,
|
||||
extractionSchema: schema,
|
||||
mode: params.extractorOptions.mode || "llm-extraction",
|
||||
},
|
||||
};
|
||||
}
|
||||
try {
|
||||
const response: AxiosResponse = await axios.post('https://api.firecrawl.dev/v0/scrape', jsonData, { headers });
|
||||
const response: AxiosResponse = await axios.post(
|
||||
"https://api.firecrawl.dev/v0/scrape",
|
||||
jsonData,
|
||||
{ headers },
|
||||
);
|
||||
if (response.status === 200) {
|
||||
const responseData = response.data;
|
||||
if (responseData.success) {
|
||||
return responseData;
|
||||
return responseData;
|
||||
} else {
|
||||
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
||||
}
|
||||
} else {
|
||||
this.handleError(response, 'scrape URL');
|
||||
this.handleError(response, "scrape URL");
|
||||
}
|
||||
} catch (error: any) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: 'Internal server error.' };
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -108,31 +133,38 @@ export default class FirecrawlApp {
|
||||
* @param {Params | null} params - Additional parameters for the search request.
|
||||
* @returns {Promise<SearchResponse>} The response from the search operation.
|
||||
*/
|
||||
async search(query: string, params: Params | null = null): Promise<SearchResponse> {
|
||||
async search(
|
||||
query: string,
|
||||
params: Params | null = null
|
||||
): Promise<SearchResponse> {
|
||||
const headers: AxiosRequestHeaders = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
} as AxiosRequestHeaders;
|
||||
let jsonData: Params = { query };
|
||||
if (params) {
|
||||
jsonData = { ...jsonData, ...params };
|
||||
}
|
||||
try {
|
||||
const response: AxiosResponse = await axios.post('https://api.firecrawl.dev/v0/search', jsonData, { headers });
|
||||
const response: AxiosResponse = await axios.post(
|
||||
"https://api.firecrawl.dev/v0/search",
|
||||
jsonData,
|
||||
{ headers }
|
||||
);
|
||||
if (response.status === 200) {
|
||||
const responseData = response.data;
|
||||
if (responseData.success) {
|
||||
return responseData;
|
||||
return responseData;
|
||||
} else {
|
||||
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
||||
}
|
||||
} else {
|
||||
this.handleError(response, 'search');
|
||||
this.handleError(response, "search");
|
||||
}
|
||||
} catch (error: any) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: 'Internal server error.' };
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -144,14 +176,24 @@ export default class FirecrawlApp {
|
||||
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
||||
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
||||
*/
|
||||
async crawlUrl(url: string, params: Params | null = null, waitUntilDone: boolean = true, timeout: number = 2, idempotencyKey?: string): Promise<CrawlResponse | any> {
|
||||
async crawlUrl(
|
||||
url: string,
|
||||
params: Params | null = null,
|
||||
waitUntilDone: boolean = true,
|
||||
timeout: number = 2,
|
||||
idempotencyKey?: string
|
||||
): Promise<CrawlResponse | any> {
|
||||
const headers = this.prepareHeaders(idempotencyKey);
|
||||
let jsonData: Params = { url };
|
||||
if (params) {
|
||||
jsonData = { ...jsonData, ...params };
|
||||
}
|
||||
try {
|
||||
const response: AxiosResponse = await this.postRequest('https://api.firecrawl.dev/v0/crawl', jsonData, headers);
|
||||
const response: AxiosResponse = await this.postRequest(
|
||||
"https://api.firecrawl.dev/v0/crawl",
|
||||
jsonData,
|
||||
headers
|
||||
);
|
||||
if (response.status === 200) {
|
||||
const jobId: string = response.data.jobId;
|
||||
if (waitUntilDone) {
|
||||
@@ -160,13 +202,13 @@ export default class FirecrawlApp {
|
||||
return { success: true, jobId };
|
||||
}
|
||||
} else {
|
||||
this.handleError(response, 'start crawl job');
|
||||
this.handleError(response, "start crawl job");
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.log(error)
|
||||
console.log(error);
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, error: 'Internal server error.' };
|
||||
return { success: false, error: "Internal server error." };
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -177,16 +219,23 @@ export default class FirecrawlApp {
|
||||
async checkCrawlStatus(jobId: string): Promise<JobStatusResponse> {
|
||||
const headers: AxiosRequestHeaders = this.prepareHeaders();
|
||||
try {
|
||||
const response: AxiosResponse = await this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
|
||||
const response: AxiosResponse = await this.getRequest(
|
||||
`https://api.firecrawl.dev/v0/crawl/status/${jobId}`,
|
||||
headers
|
||||
);
|
||||
if (response.status === 200) {
|
||||
return response.data;
|
||||
} else {
|
||||
this.handleError(response, 'check crawl status');
|
||||
this.handleError(response, "check crawl status");
|
||||
}
|
||||
} catch (error: any) {
|
||||
throw new Error(error.message);
|
||||
}
|
||||
return { success: false, status: 'unknown', error: 'Internal server error.' };
|
||||
return {
|
||||
success: false,
|
||||
status: "unknown",
|
||||
error: "Internal server error.",
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -208,7 +257,11 @@ export default class FirecrawlApp {
|
||||
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
||||
* @returns {Promise<AxiosResponse>} The response from the POST request.
|
||||
*/
|
||||
postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise<AxiosResponse> {
|
||||
postRequest(
|
||||
url: string,
|
||||
data: Params,
|
||||
headers: AxiosRequestHeaders
|
||||
): Promise<AxiosResponse> {
|
||||
return axios.post(url, data, { headers });
|
||||
}
|
||||
|
||||
@@ -218,7 +271,10 @@ export default class FirecrawlApp {
|
||||
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
||||
* @returns {Promise<AxiosResponse>} The response from the GET request.
|
||||
*/
|
||||
getRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse> {
|
||||
getRequest(
|
||||
url: string,
|
||||
headers: AxiosRequestHeaders
|
||||
): Promise<AxiosResponse> {
|
||||
return axios.get(url, { headers });
|
||||
}
|
||||
|
||||
@@ -229,27 +285,38 @@ export default class FirecrawlApp {
|
||||
* @param {number} timeout - Timeout in seconds for job status checks.
|
||||
* @returns {Promise<any>} The final job status or data.
|
||||
*/
|
||||
async monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, timeout: number): Promise<any> {
|
||||
async monitorJobStatus(
|
||||
jobId: string,
|
||||
headers: AxiosRequestHeaders,
|
||||
timeout: number
|
||||
): Promise<any> {
|
||||
while (true) {
|
||||
const statusResponse: AxiosResponse = await this.getRequest(`https://api.firecrawl.dev/v0/crawl/status/${jobId}`, headers);
|
||||
const statusResponse: AxiosResponse = await this.getRequest(
|
||||
`https://api.firecrawl.dev/v0/crawl/status/${jobId}`,
|
||||
headers
|
||||
);
|
||||
if (statusResponse.status === 200) {
|
||||
const statusData = statusResponse.data;
|
||||
if (statusData.status === 'completed') {
|
||||
if ('data' in statusData) {
|
||||
if (statusData.status === "completed") {
|
||||
if ("data" in statusData) {
|
||||
return statusData.data;
|
||||
} else {
|
||||
throw new Error('Crawl job completed but no data was returned');
|
||||
throw new Error("Crawl job completed but no data was returned");
|
||||
}
|
||||
} else if (['active', 'paused', 'pending', 'queued'].includes(statusData.status)) {
|
||||
} else if (
|
||||
["active", "paused", "pending", "queued"].includes(statusData.status)
|
||||
) {
|
||||
if (timeout < 2) {
|
||||
timeout = 2;
|
||||
}
|
||||
await new Promise(resolve => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again
|
||||
await new Promise((resolve) => setTimeout(resolve, timeout * 1000)); // Wait for the specified timeout before checking again
|
||||
} else {
|
||||
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
||||
throw new Error(
|
||||
`Crawl job failed or was stopped. Status: ${statusData.status}`
|
||||
);
|
||||
}
|
||||
} else {
|
||||
this.handleError(statusResponse, 'check crawl status');
|
||||
this.handleError(statusResponse, "check crawl status");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -260,11 +327,16 @@ export default class FirecrawlApp {
|
||||
* @param {string} action - The action being performed when the error occurred.
|
||||
*/
|
||||
handleError(response: AxiosResponse, action: string): void {
|
||||
if ([402, 409, 500].includes(response.status)) {
|
||||
const errorMessage: string = response.data.error || 'Unknown error occurred';
|
||||
throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`);
|
||||
if ([402, 408, 409, 500].includes(response.status)) {
|
||||
const errorMessage: string =
|
||||
response.data.error || "Unknown error occurred";
|
||||
throw new Error(
|
||||
`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`
|
||||
);
|
||||
} else {
|
||||
throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`);
|
||||
throw new Error(
|
||||
`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+7
-1
@@ -1,4 +1,5 @@
|
||||
import { AxiosResponse, AxiosRequestHeaders } from 'axios';
|
||||
import { AxiosResponse, AxiosRequestHeaders } from "axios";
|
||||
import { z } from "zod";
|
||||
/**
|
||||
* Configuration interface for FirecrawlApp.
|
||||
*/
|
||||
@@ -10,6 +11,11 @@ export interface FirecrawlAppConfig {
|
||||
*/
|
||||
export interface Params {
|
||||
[key: string]: any;
|
||||
extractorOptions?: {
|
||||
extractionSchema: z.ZodSchema | any;
|
||||
mode?: "llm-extraction";
|
||||
extractionPrompt?: string;
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Response interface for scraping operations.
|
||||
|
||||
Reference in New Issue
Block a user