feat(js-sdk): add crawlUrlAndWatch
This commit is contained in:
@@ -29,5 +29,21 @@ if (job.data) {
|
|||||||
console.log(job.data[0].markdown);
|
console.log(job.data[0].markdown);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Map a website:
|
||||||
const mapResult = await app.map('https://firecrawl.dev');
|
const mapResult = await app.map('https://firecrawl.dev');
|
||||||
console.log(mapResult)
|
console.log(mapResult)
|
||||||
|
|
||||||
|
// Crawl a website with WebSockets:
|
||||||
|
const watch = await app.crawlUrlAndWatch('mendable.ai', { excludePaths: ['blog/*'], limit: 5});
|
||||||
|
|
||||||
|
watch.addEventListener("document", doc => {
|
||||||
|
console.log("DOC", doc.detail);
|
||||||
|
});
|
||||||
|
|
||||||
|
watch.addEventListener("error", err => {
|
||||||
|
console.error("ERR", err.detail.error);
|
||||||
|
});
|
||||||
|
|
||||||
|
watch.addEventListener("done", state => {
|
||||||
|
console.log("DONE", state.detail.status);
|
||||||
|
});
|
||||||
|
|||||||
@@ -32,8 +32,24 @@ const main = async () => {
|
|||||||
console.log(checkStatus.data[0].markdown);
|
console.log(checkStatus.data[0].markdown);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Map a website:
|
||||||
const mapResult = await app.mapUrl('https://firecrawl.dev');
|
const mapResult = await app.mapUrl('https://firecrawl.dev');
|
||||||
console.log(mapResult)
|
console.log(mapResult)
|
||||||
|
|
||||||
|
// Crawl a website with WebSockets:
|
||||||
|
const watch = await app.crawlUrlAndWatch('mendable.ai', { excludePaths: ['blog/*'], limit: 5});
|
||||||
|
|
||||||
|
watch.addEventListener("document", doc => {
|
||||||
|
console.log("DOC", doc.detail);
|
||||||
|
});
|
||||||
|
|
||||||
|
watch.addEventListener("error", err => {
|
||||||
|
console.error("ERR", err.detail.error);
|
||||||
|
});
|
||||||
|
|
||||||
|
watch.addEventListener("done", state => {
|
||||||
|
console.log("DONE", state.detail.status);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
main()
|
main()
|
||||||
Generated
+44
-2
@@ -1,16 +1,18 @@
|
|||||||
{
|
{
|
||||||
"name": "@mendable/firecrawl-js",
|
"name": "@mendable/firecrawl-js",
|
||||||
"version": "0.0.36",
|
"version": "1.0.3",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@mendable/firecrawl-js",
|
"name": "@mendable/firecrawl-js",
|
||||||
"version": "0.0.36",
|
"version": "1.0.3",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"axios": "^1.6.8",
|
"axios": "^1.6.8",
|
||||||
"dotenv": "^16.4.5",
|
"dotenv": "^16.4.5",
|
||||||
|
"isows": "^1.0.4",
|
||||||
|
"typescript-event-target": "^1.1.1",
|
||||||
"uuid": "^9.0.1",
|
"uuid": "^9.0.1",
|
||||||
"zod": "^3.23.8",
|
"zod": "^3.23.8",
|
||||||
"zod-to-json-schema": "^3.23.0"
|
"zod-to-json-schema": "^3.23.0"
|
||||||
@@ -2137,6 +2139,20 @@
|
|||||||
"integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
|
"integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"node_modules/isows": {
|
||||||
|
"version": "1.0.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/isows/-/isows-1.0.4.tgz",
|
||||||
|
"integrity": "sha512-hEzjY+x9u9hPmBom9IIAqdJCwNLax+xrPb51vEPpERoFlIxgmZcHzsT5jKG06nvInKOBGvReAVz80Umed5CczQ==",
|
||||||
|
"funding": [
|
||||||
|
{
|
||||||
|
"type": "github",
|
||||||
|
"url": "https://github.com/sponsors/wagmi-dev"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"peerDependencies": {
|
||||||
|
"ws": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/istanbul-lib-coverage": {
|
"node_modules/istanbul-lib-coverage": {
|
||||||
"version": "3.2.2",
|
"version": "3.2.2",
|
||||||
"resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz",
|
"resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz",
|
||||||
@@ -3733,6 +3749,11 @@
|
|||||||
"node": ">=14.17"
|
"node": ">=14.17"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/typescript-event-target": {
|
||||||
|
"version": "1.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/typescript-event-target/-/typescript-event-target-1.1.1.tgz",
|
||||||
|
"integrity": "sha512-dFSOFBKV6uwaloBCCUhxlD3Pr/P1a/tJdcmPrTXCHlEFD3faj0mztjcGn6VBAhQ0/Bdy8K3VWrrqwbt/ffsYsg=="
|
||||||
|
},
|
||||||
"node_modules/undici-types": {
|
"node_modules/undici-types": {
|
||||||
"version": "5.26.5",
|
"version": "5.26.5",
|
||||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
|
||||||
@@ -3855,6 +3876,27 @@
|
|||||||
"node": "^12.13.0 || ^14.15.0 || >=16.0.0"
|
"node": "^12.13.0 || ^14.15.0 || >=16.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/ws": {
|
||||||
|
"version": "8.18.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.0.tgz",
|
||||||
|
"integrity": "sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==",
|
||||||
|
"peer": true,
|
||||||
|
"engines": {
|
||||||
|
"node": ">=10.0.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"bufferutil": "^4.0.1",
|
||||||
|
"utf-8-validate": ">=5.0.2"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"bufferutil": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"utf-8-validate": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/y18n": {
|
"node_modules/y18n": {
|
||||||
"version": "5.0.8",
|
"version": "5.0.8",
|
||||||
"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
|
"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@mendable/firecrawl-js",
|
"name": "@mendable/firecrawl-js",
|
||||||
"version": "1.0.3",
|
"version": "1.0.4",
|
||||||
"description": "JavaScript SDK for Firecrawl API",
|
"description": "JavaScript SDK for Firecrawl API",
|
||||||
"main": "build/cjs/index.js",
|
"main": "build/cjs/index.js",
|
||||||
"types": "types/index.d.ts",
|
"types": "types/index.d.ts",
|
||||||
@@ -30,6 +30,8 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"axios": "^1.6.8",
|
"axios": "^1.6.8",
|
||||||
"dotenv": "^16.4.5",
|
"dotenv": "^16.4.5",
|
||||||
|
"isows": "^1.0.4",
|
||||||
|
"typescript-event-target": "^1.1.1",
|
||||||
"uuid": "^9.0.1",
|
"uuid": "^9.0.1",
|
||||||
"zod": "^3.23.8",
|
"zod": "^3.23.8",
|
||||||
"zod-to-json-schema": "^3.23.0"
|
"zod-to-json-schema": "^3.23.0"
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
import axios, { AxiosResponse, AxiosRequestHeaders } from "axios";
|
import axios, { AxiosResponse, AxiosRequestHeaders } from "axios";
|
||||||
import { z } from "zod";
|
import { z } from "zod";
|
||||||
import { zodToJsonSchema } from "zod-to-json-schema";
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
||||||
|
import { WebSocket } from "isows";
|
||||||
|
import { TypedEventTarget } from "typescript-event-target";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Configuration interface for FirecrawlApp.
|
* Configuration interface for FirecrawlApp.
|
||||||
@@ -315,8 +317,8 @@ export interface SearchResponseV0 {
|
|||||||
* Provides methods for scraping, searching, crawling, and mapping web content.
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
||||||
*/
|
*/
|
||||||
export default class FirecrawlApp<T extends "v0" | "v1"> {
|
export default class FirecrawlApp<T extends "v0" | "v1"> {
|
||||||
private apiKey: string;
|
public apiKey: string;
|
||||||
private apiUrl: string;
|
public apiUrl: string;
|
||||||
public version: T;
|
public version: T;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -561,6 +563,21 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|||||||
} as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse);
|
} as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async crawlUrlAndWatch(
|
||||||
|
url: string,
|
||||||
|
params?: this['version'] extends 'v0' ? CrawlParamsV0 : CrawlParams,
|
||||||
|
idempotencyKey?: string,
|
||||||
|
) {
|
||||||
|
if (this.version === 'v0') {
|
||||||
|
throw new Error("crawlUrlAndWatch is only available on v1");
|
||||||
|
}
|
||||||
|
|
||||||
|
const crawl = await this.crawlUrl(url, params, false, 0, idempotencyKey);
|
||||||
|
const id = this.version === 'v0' ? (crawl as CrawlResponseV0).jobId : (crawl as CrawlResponse).id;
|
||||||
|
|
||||||
|
return new CrawlWatcher(id as string, this as FirecrawlApp<"v1">);
|
||||||
|
}
|
||||||
|
|
||||||
async mapUrl(url: string, params?: MapParams): Promise<MapResponse> {
|
async mapUrl(url: string, params?: MapParams): Promise<MapResponse> {
|
||||||
if (this.version == 'v0') {
|
if (this.version == 'v0') {
|
||||||
throw new Error("Map is not supported in v0");
|
throw new Error("Map is not supported in v0");
|
||||||
@@ -696,3 +713,111 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface CrawlWatcherEvents {
|
||||||
|
document: CustomEvent<FirecrawlDocument>,
|
||||||
|
done: CustomEvent<{
|
||||||
|
status: CrawlStatusResponse["status"];
|
||||||
|
data: FirecrawlDocument[];
|
||||||
|
}>,
|
||||||
|
error: CustomEvent<{
|
||||||
|
status: CrawlStatusResponse["status"],
|
||||||
|
data: FirecrawlDocument[],
|
||||||
|
error: string,
|
||||||
|
}>,
|
||||||
|
}
|
||||||
|
|
||||||
|
export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
||||||
|
private ws: WebSocket;
|
||||||
|
public data: FirecrawlDocument[];
|
||||||
|
public status: CrawlStatusResponse["status"];
|
||||||
|
|
||||||
|
constructor(id: string, app: FirecrawlApp<"v1">) {
|
||||||
|
super();
|
||||||
|
this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
|
||||||
|
this.status = "scraping";
|
||||||
|
this.data = [];
|
||||||
|
|
||||||
|
type ErrorMessage = {
|
||||||
|
type: "error",
|
||||||
|
error: string,
|
||||||
|
}
|
||||||
|
|
||||||
|
type CatchupMessage = {
|
||||||
|
type: "catchup",
|
||||||
|
data: CrawlStatusResponse,
|
||||||
|
}
|
||||||
|
|
||||||
|
type DocumentMessage = {
|
||||||
|
type: "document",
|
||||||
|
data: FirecrawlDocument,
|
||||||
|
}
|
||||||
|
|
||||||
|
type DoneMessage = { type: "done" }
|
||||||
|
|
||||||
|
type Message = ErrorMessage | CatchupMessage | DoneMessage | DocumentMessage;
|
||||||
|
|
||||||
|
const messageHandler = (msg: Message) => {
|
||||||
|
if (msg.type === "done") {
|
||||||
|
this.status = "completed";
|
||||||
|
this.dispatchTypedEvent("done", new CustomEvent("done", {
|
||||||
|
detail: {
|
||||||
|
status: this.status,
|
||||||
|
data: this.data,
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
} else if (msg.type === "error") {
|
||||||
|
this.status = "failed";
|
||||||
|
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
||||||
|
detail: {
|
||||||
|
status: this.status,
|
||||||
|
data: this.data,
|
||||||
|
error: msg.error,
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
} else if (msg.type === "catchup") {
|
||||||
|
this.status = msg.data.status;
|
||||||
|
this.data.push(...(msg.data.data ?? []));
|
||||||
|
for (const doc of this.data) {
|
||||||
|
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
||||||
|
detail: doc,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
} else if (msg.type === "document") {
|
||||||
|
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
||||||
|
detail: msg.data,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
this.ws.onmessage = ((ev: MessageEvent) => {
|
||||||
|
if (typeof ev.data !== "string") {
|
||||||
|
this.ws.close();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const msg = JSON.parse(ev.data) as Message;
|
||||||
|
messageHandler(msg);
|
||||||
|
}).bind(this);
|
||||||
|
|
||||||
|
this.ws.onclose = ((ev: CloseEvent) => {
|
||||||
|
const msg = JSON.parse(ev.reason) as Message;
|
||||||
|
messageHandler(msg);
|
||||||
|
}).bind(this);
|
||||||
|
|
||||||
|
this.ws.onerror = ((_: Event) => {
|
||||||
|
this.status = "failed"
|
||||||
|
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
||||||
|
detail: {
|
||||||
|
status: this.status,
|
||||||
|
data: this.data,
|
||||||
|
error: "WebSocket error",
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
}).bind(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
close() {
|
||||||
|
this.ws.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user