This commit is contained in:
rafaelmmiller
2024-11-14 15:51:27 -03:00
parent a1c018fdb0
commit 80d6cb16fb
7 changed files with 167 additions and 7 deletions
+13
View File
@@ -1,4 +1,5 @@
import FirecrawlApp from 'firecrawl';
import { z } from 'zod';
const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"});
@@ -42,6 +43,18 @@ const main = async () => {
const mapResult = await app.mapUrl('https://firecrawl.dev');
console.log(mapResult)
// Extract information from a website using LLM:
const extractSchema = z.object({
title: z.string(),
description: z.string(),
links: z.array(z.string())
});
const extractResult = await app.extractUrls(['https://firecrawl.dev'], {
prompt: "Extract the title, description, and links from the website",
schema: extractSchema
});
console.log(extractResult);
// Crawl a website with WebSockets:
const watch = await app.crawlUrlAndWatch('mendable.ai', { excludePaths: ['blog/*'], limit: 5});
+13
View File
@@ -42,6 +42,19 @@ const main = async () => {
const mapResult = await app.mapUrl('https://firecrawl.dev');
console.log(mapResult)
// // Extract information from a website using LLM:
// const extractSchema = z.object({
// title: z.string(),
// description: z.string(),
// links: z.array(z.string())
// });
// const extractResult = await app.extractUrls(['https://firecrawl.dev'], {
// prompt: "Extract the title, description, and links from the website",
// schema: extractSchema
// });
// console.log(extractResult);
// Crawl a website with WebSockets:
const watch = await app.crawlUrlAndWatch('mendable.ai', { excludePaths: ['blog/*'], limit: 5});
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@mendable/firecrawl-js",
"version": "1.8.2",
"version": "1.9.0",
"description": "JavaScript SDK for Firecrawl API",
"main": "dist/index.js",
"types": "dist/index.d.ts",
+58 -1
View File
@@ -234,6 +234,26 @@ export interface MapResponse {
error?: string;
}
/**
* Parameters for extracting information from URLs.
* Defines options for extracting information from URLs.
*/
export interface ExtractParams {
prompt: string;
schema?: zt.ZodSchema;
systemPrompt?: string;
}
/**
* Response interface for extracting information from URLs.
* Defines the structure of the response received after extracting information from URLs.
*/
export interface ExtractResponse {
success: true;
data: zt.infer<zt.ZodSchema>;
error?: string;
}
/**
* Error response interface.
* Defines the structure of the response received when an error occurs.
@@ -243,7 +263,6 @@ export interface ErrorResponse {
error: string;
}
/**
* Custom error class for Firecrawl.
* Extends the built-in Error class to include a status code.
@@ -675,6 +694,44 @@ export default class FirecrawlApp {
return { success: false, error: "Internal server error." };
}
/**
* Extracts information from a URL using the Firecrawl API.
* @param url - The URL to extract information from.
* @param params - Additional parameters for the extract request.
* @returns The response from the extract operation.
*/
async extractUrls(urls: string[], params?: ExtractParams): Promise<ExtractResponse | ErrorResponse> {
const headers = this.prepareHeaders();
if (!params?.prompt) {
throw new FirecrawlError("Prompt is required", 400);
}
let jsonData: { urls: string[] } & ExtractParams= { urls, ...params };
let jsonSchema: any;
try {
jsonSchema = params?.schema ? zodToJsonSchema(params.schema) : undefined;
} catch (error: any) {
throw new FirecrawlError("Invalid schema. Use a valid Zod schema.", 400);
}
try {
const response: AxiosResponse = await this.postRequest(
this.apiUrl + `/v1/extract`,
{ ...jsonData, schema: jsonSchema },
headers
);
if (response.status === 200) {
return response.data as ExtractResponse;
} else {
this.handleError(response, "extract");
}
} catch (error: any) {
throw new FirecrawlError(error.message, 500);
}
return { success: false, error: "Internal server error." };
}
/**
* Prepares the headers for an API request.
* @param idempotencyKey - Optional key to ensure idempotency.