added github action workflow
This commit is contained in:
@@ -0,0 +1,3 @@
|
||||
API_URL=http://localhost:3002
|
||||
TEST_API_KEY=fc-YOUR_API_KEY
|
||||
|
||||
Generated
+37
-6
@@ -1,22 +1,25 @@
|
||||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "0.0.17-beta.8",
|
||||
"version": "0.0.22",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "0.0.17-beta.8",
|
||||
"version": "0.0.22",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8",
|
||||
"uuid": "^9.0.1",
|
||||
"zod": "^3.23.8",
|
||||
"zod-to-json-schema": "^3.23.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@jest/globals": "^29.7.0",
|
||||
"@types/axios": "^0.14.0",
|
||||
"@types/node": "^20.12.7",
|
||||
"@types/jest": "^29.5.12",
|
||||
"@types/node": "^20.12.12",
|
||||
"@types/uuid": "^9.0.8",
|
||||
"jest": "^29.7.0",
|
||||
"ts-jest": "^29.1.2",
|
||||
"typescript": "^5.4.5"
|
||||
@@ -1046,10 +1049,20 @@
|
||||
"@types/istanbul-lib-report": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/jest": {
|
||||
"version": "29.5.12",
|
||||
"resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.12.tgz",
|
||||
"integrity": "sha512-eDC8bTvT/QhYdxJAulQikueigY5AsdBRH2yDKW3yveW7svY3+DzN84/2NUgkw10RTiJbWqZrTtoGVdYlvFJdLw==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"expect": "^29.0.0",
|
||||
"pretty-format": "^29.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "20.12.7",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.7.tgz",
|
||||
"integrity": "sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg==",
|
||||
"version": "20.12.12",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.12.tgz",
|
||||
"integrity": "sha512-eWLDGF/FOSPtAvEqeRAQ4C8LSA7M1I7i0ky1I8U7kD1J5ITyW3AsRhQrKVoWf5pFKZ2kILsEGJhsI9r93PYnOw==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"undici-types": "~5.26.4"
|
||||
@@ -1061,6 +1074,12 @@
|
||||
"integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/uuid": {
|
||||
"version": "9.0.8",
|
||||
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
||||
"integrity": "sha512-jg+97EGIcY9AGHJJRaaPVgetKDsrTgbRjQ5Msgjh/DQKEFl0DtyRr/VCOyD1T2R1MNeWPK/u7JoGhlDZnKBAfA==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/yargs": {
|
||||
"version": "17.0.32",
|
||||
"resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.32.tgz",
|
||||
@@ -3641,6 +3660,18 @@
|
||||
"browserslist": ">= 4.21.0"
|
||||
}
|
||||
},
|
||||
"node_modules/uuid": {
|
||||
"version": "9.0.1",
|
||||
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
|
||||
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
|
||||
"funding": [
|
||||
"https://github.com/sponsors/broofa",
|
||||
"https://github.com/sponsors/ctavan"
|
||||
],
|
||||
"bin": {
|
||||
"uuid": "dist/bin/uuid"
|
||||
}
|
||||
},
|
||||
"node_modules/v8-to-istanbul": {
|
||||
"version": "9.2.0",
|
||||
"resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.2.0.tgz",
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"build": "tsc",
|
||||
"publish": "npm run build && npm publish --access public",
|
||||
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
||||
"test": "jest src/**/*.test.ts"
|
||||
"test": "jest src/__tests__/**/*.test.ts"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
@@ -19,6 +19,7 @@
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.8",
|
||||
"uuid": "^9.0.1",
|
||||
"zod": "^3.23.8",
|
||||
"zod-to-json-schema": "^3.23.0"
|
||||
},
|
||||
@@ -29,7 +30,9 @@
|
||||
"devDependencies": {
|
||||
"@jest/globals": "^29.7.0",
|
||||
"@types/axios": "^0.14.0",
|
||||
"@types/node": "^20.12.7",
|
||||
"@types/jest": "^29.5.12",
|
||||
"@types/node": "^20.12.12",
|
||||
"@types/uuid": "^9.0.8",
|
||||
"jest": "^29.7.0",
|
||||
"ts-jest": "^29.1.2",
|
||||
"typescript": "^5.4.5"
|
||||
|
||||
@@ -0,0 +1,147 @@
|
||||
import FirecrawlApp from '../../index';
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import dotenv from 'dotenv';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
const TEST_API_KEY = process.env.TEST_API_KEY;
|
||||
const API_URL = process.env.API_URL;
|
||||
|
||||
describe('FirecrawlApp E2E Tests', () => {
|
||||
test('should throw error for no API key', () => {
|
||||
expect(() => {
|
||||
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
|
||||
}).toThrow("No API key provided");
|
||||
});
|
||||
|
||||
test('should throw error for invalid API key on scrape', async () => {
|
||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||
await expect(invalidApp.scrapeUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
|
||||
});
|
||||
|
||||
test('should throw error for blocklisted URL on scrape', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const blocklistedUrl = "https://facebook.com/fake-test";
|
||||
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
||||
});
|
||||
|
||||
test('should return successful response with valid preview token', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
|
||||
const response = await app.scrapeUrl('https://firecrawl.dev');
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.data.content).toContain("🔥 Firecrawl");
|
||||
}, 10000); // 10 seconds timeout
|
||||
|
||||
test('should return successful response for valid scrape', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.scrapeUrl('https://firecrawl.dev');
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.data.content).toContain("🔥 Firecrawl");
|
||||
expect(response.data).toHaveProperty('markdown');
|
||||
expect(response.data).toHaveProperty('metadata');
|
||||
expect(response.data).not.toHaveProperty('html');
|
||||
}, 10000); // 10 seconds timeout
|
||||
|
||||
test('should return successful response with valid API key and include HTML', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.scrapeUrl('https://firecrawl.dev', { pageOptions: { includeHtml: true } });
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.data.content).toContain("🔥 Firecrawl");
|
||||
expect(response.data.markdown).toContain("🔥 Firecrawl");
|
||||
expect(response.data.html).toContain("<h1");
|
||||
}, 10000); // 10 seconds timeout
|
||||
|
||||
test('should return successful response for valid scrape with PDF file', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test('should return successful response for valid scrape with PDF file without explicit extension', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
|
||||
await new Promise(resolve => setTimeout(resolve, 6000)); // wait for 6 seconds
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test('should throw error for invalid API key on crawl', async () => {
|
||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||
await expect(invalidApp.crawlUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
|
||||
});
|
||||
|
||||
test('should throw error for blocklisted URL on crawl', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const blocklistedUrl = "https://twitter.com/fake-test";
|
||||
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
||||
});
|
||||
|
||||
test('should return successful response for crawl and wait for completion', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true);
|
||||
expect(response).not.toBeNull();
|
||||
expect(response[0].content).toContain("🔥 Firecrawl");
|
||||
}, 60000); // 60 seconds timeout
|
||||
|
||||
test('should handle idempotency key for crawl', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const uniqueIdempotencyKey = uuidv4();
|
||||
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey);
|
||||
expect(response).not.toBeNull();
|
||||
expect(response[0].content).toContain("🔥 Firecrawl");
|
||||
|
||||
await expect(app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test('should check crawl status', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false);
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.jobId).toBeDefined();
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, 10000)); // wait for 10 seconds
|
||||
const statusResponse = await app.checkCrawlStatus(response.jobId);
|
||||
expect(statusResponse).not.toBeNull();
|
||||
expect(statusResponse.status).toBe('completed');
|
||||
expect(statusResponse.data.length).toBeGreaterThan(0);
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test('should return successful response for search', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.search("test query");
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.data[0].content).toBeDefined();
|
||||
expect(response.data.length).toBeGreaterThan(2);
|
||||
}, 30000); // 30 seconds timeout
|
||||
|
||||
test('should throw error for invalid API key on search', async () => {
|
||||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
||||
await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
|
||||
});
|
||||
|
||||
test('should perform LLM extraction', async () => {
|
||||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
||||
const response = await app.scrapeUrl("https://mendable.ai", {
|
||||
extractorOptions: {
|
||||
mode: 'llm-extraction',
|
||||
extractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
|
||||
extractionSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
company_mission: { type: 'string' },
|
||||
supports_sso: { type: 'boolean' },
|
||||
is_open_source: { type: 'boolean' }
|
||||
},
|
||||
required: ['company_mission', 'supports_sso', 'is_open_source']
|
||||
}
|
||||
}
|
||||
});
|
||||
expect(response).not.toBeNull();
|
||||
expect(response.data.llm_extraction).toBeDefined();
|
||||
const llmExtraction = response.data.llm_extraction;
|
||||
expect(llmExtraction.company_mission).toBeDefined();
|
||||
expect(typeof llmExtraction.supports_sso).toBe('boolean');
|
||||
expect(typeof llmExtraction.is_open_source).toBe('boolean');
|
||||
}, 30000); // 30 seconds timeout
|
||||
});
|
||||
@@ -6,6 +6,7 @@ import { zodToJsonSchema } from "zod-to-json-schema";
|
||||
*/
|
||||
export interface FirecrawlAppConfig {
|
||||
apiKey?: string | null;
|
||||
apiUrl?: string | null;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -63,6 +64,7 @@ export interface JobStatusResponse {
|
||||
*/
|
||||
export default class FirecrawlApp {
|
||||
private apiKey: string;
|
||||
private apiUrl: string = "https://api.firecrawl.dev";
|
||||
|
||||
/**
|
||||
* Initializes a new instance of the FirecrawlApp class.
|
||||
@@ -107,7 +109,7 @@ export default class FirecrawlApp {
|
||||
}
|
||||
try {
|
||||
const response: AxiosResponse = await axios.post(
|
||||
"https://api.firecrawl.dev/v0/scrape",
|
||||
this.apiUrl + "/v0/scrape",
|
||||
jsonData,
|
||||
{ headers },
|
||||
);
|
||||
@@ -147,7 +149,7 @@ export default class FirecrawlApp {
|
||||
}
|
||||
try {
|
||||
const response: AxiosResponse = await axios.post(
|
||||
"https://api.firecrawl.dev/v0/search",
|
||||
this.apiUrl + "/v0/search",
|
||||
jsonData,
|
||||
{ headers }
|
||||
);
|
||||
@@ -190,7 +192,7 @@ export default class FirecrawlApp {
|
||||
}
|
||||
try {
|
||||
const response: AxiosResponse = await this.postRequest(
|
||||
"https://api.firecrawl.dev/v0/crawl",
|
||||
this.apiUrl + "/v0/crawl",
|
||||
jsonData,
|
||||
headers
|
||||
);
|
||||
@@ -220,7 +222,7 @@ export default class FirecrawlApp {
|
||||
const headers: AxiosRequestHeaders = this.prepareHeaders();
|
||||
try {
|
||||
const response: AxiosResponse = await this.getRequest(
|
||||
`https://api.firecrawl.dev/v0/crawl/status/${jobId}`,
|
||||
this.apiUrl + `/v0/crawl/status/${jobId}`,
|
||||
headers
|
||||
);
|
||||
if (response.status === 200) {
|
||||
@@ -292,7 +294,7 @@ export default class FirecrawlApp {
|
||||
): Promise<any> {
|
||||
while (true) {
|
||||
const statusResponse: AxiosResponse = await this.getRequest(
|
||||
`https://api.firecrawl.dev/v0/crawl/status/${jobId}`,
|
||||
this.apiUrl + `/v0/crawl/status/${jobId}`,
|
||||
headers
|
||||
);
|
||||
if (statusResponse.status === 200) {
|
||||
|
||||
Reference in New Issue
Block a user