From 00335e2ba9a827db9964a9b82c5feaa990633533 Mon Sep 17 00:00:00 2001
From: Nicolas This page is used for end-to-end (e2e) testing with Firecrawl. This page is used for end-to-end (e2e) testing with Firecrawl. {
+ const response = await request(TEST_URL)
+ .post("/v0/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({
+ url: "https://roastmywebsite.ai",
+ pageOptions: { includeHtml: true }
+ });
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ expect(response.body.data).toHaveProperty("content");
+ expect(response.body.data).toHaveProperty("markdown");
+ expect(response.body.data).toHaveProperty("html");
+ expect(response.body.data).toHaveProperty("metadata");
+ expect(response.body.data.content).toContain("_Roast_");
+ expect(response.body.data.markdown).toContain("_Roast_");
+ expect(response.body.data.html).toContain("
{
- const response = await request(TEST_URL)
- .post("/v0/scrape")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({
- url: "https://roastmywebsite.ai",
- pageOptions: { includeRawHtml: true },
- });
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("data");
- expect(response.body.data).toHaveProperty("content");
- expect(response.body.data).toHaveProperty("markdown");
- expect(response.body.data).toHaveProperty("rawHtml");
- expect(response.body.data).toHaveProperty("metadata");
- expect(response.body.data.content).toContain("_Roast_");
- expect(response.body.data.markdown).toContain("_Roast_");
- expect(response.body.data.rawHtml).toContain("
{
- const response = await request(TEST_URL)
- .post('/v0/scrape')
- .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
- .set('Content-Type', 'application/json')
- .send({ url: 'https://arxiv.org/pdf/astro-ph/9301001.pdf' });
- await new Promise((r) => setTimeout(r, 6000));
+ it.concurrent(
+ "should return a successful response with a valid API key and includeRawHtml set to true",
+ async () => {
+ const response = await request(TEST_URL)
+ .post("/v0/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({
+ url: "https://roastmywebsite.ai",
+ pageOptions: { includeRawHtml: true }
+ });
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ expect(response.body.data).toHaveProperty("content");
+ expect(response.body.data).toHaveProperty("markdown");
+ expect(response.body.data).toHaveProperty("rawHtml");
+ expect(response.body.data).toHaveProperty("metadata");
+ expect(response.body.data.content).toContain("_Roast_");
+ expect(response.body.data.markdown).toContain("_Roast_");
+ expect(response.body.data.rawHtml).toContain("
{
- const response = await request(TEST_URL)
- .post('/v0/scrape')
- .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
- .set('Content-Type', 'application/json')
- .send({ url: 'https://arxiv.org/pdf/astro-ph/9301001' });
- await new Promise((r) => setTimeout(r, 6000));
+ it.concurrent(
+ "should return a successful response for a valid scrape with PDF file",
+ async () => {
+ const response = await request(TEST_URL)
+ .post("/v0/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://arxiv.org/pdf/astro-ph/9301001.pdf" });
+ await new Promise((r) => setTimeout(r, 6000));
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty('data');
- expect(response.body.data).toHaveProperty('content');
- expect(response.body.data).toHaveProperty('metadata');
- expect(response.body.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
- expect(response.body.data.metadata.pageStatusCode).toBe(200);
- expect(response.body.data.metadata.pageError).toBeUndefined();
- }, 60000); // 60 seconds
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ expect(response.body.data).toHaveProperty("content");
+ expect(response.body.data).toHaveProperty("metadata");
+ expect(response.body.data.content).toContain(
+ "We present spectrophotometric observations of the Broad Line Radio Galaxy"
+ );
+ expect(response.body.data.metadata.pageStatusCode).toBe(200);
+ expect(response.body.data.metadata.pageError).toBeUndefined();
+ },
+ 60000
+ ); // 60 seconds
- it.concurrent('should return a successful response for a valid scrape with PDF file and parsePDF set to false', async () => {
- const response = await request(TEST_URL)
- .post('/v0/scrape')
- .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
- .set('Content-Type', 'application/json')
- .send({ url: 'https://arxiv.org/pdf/astro-ph/9301001.pdf', pageOptions: { parsePDF: false } });
- await new Promise((r) => setTimeout(r, 6000));
+ it.concurrent(
+ "should return a successful response for a valid scrape with PDF file without explicit .pdf extension",
+ async () => {
+ const response = await request(TEST_URL)
+ .post("/v0/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://arxiv.org/pdf/astro-ph/9301001" });
+ await new Promise((r) => setTimeout(r, 6000));
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty('data');
- expect(response.body.data).toHaveProperty('content');
- expect(response.body.data).toHaveProperty('metadata');
- expect(response.body.data.content).toContain('/Title(arXiv:astro-ph/9301001v1 7 Jan 1993)>>endobj');
- }, 60000); // 60 seconds
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ expect(response.body.data).toHaveProperty("content");
+ expect(response.body.data).toHaveProperty("metadata");
+ expect(response.body.data.content).toContain(
+ "We present spectrophotometric observations of the Broad Line Radio Galaxy"
+ );
+ expect(response.body.data.metadata.pageStatusCode).toBe(200);
+ expect(response.body.data.metadata.pageError).toBeUndefined();
+ },
+ 60000
+ ); // 60 seconds
- it.concurrent("should return a successful response with a valid API key with removeTags option", async () => {
- const responseWithoutRemoveTags = await request(TEST_URL)
- .post("/v0/scrape")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({ url: "https://www.scrapethissite.com/" });
- expect(responseWithoutRemoveTags.statusCode).toBe(200);
- expect(responseWithoutRemoveTags.body).toHaveProperty("data");
- expect(responseWithoutRemoveTags.body.data).toHaveProperty("content");
- expect(responseWithoutRemoveTags.body.data).toHaveProperty("markdown");
- expect(responseWithoutRemoveTags.body.data).toHaveProperty("metadata");
- expect(responseWithoutRemoveTags.body.data).not.toHaveProperty("html");
- expect(responseWithoutRemoveTags.body.data.content).toContain("Scrape This Site");
- expect(responseWithoutRemoveTags.body.data.content).toContain("Lessons and Videos"); // #footer
- expect(responseWithoutRemoveTags.body.data.content).toContain("[Sandbox]("); // .nav
- expect(responseWithoutRemoveTags.body.data.content).toContain("web scraping"); // strong
+ it.concurrent(
+ "should return a successful response for a valid scrape with PDF file and parsePDF set to false",
+ async () => {
+ const response = await request(TEST_URL)
+ .post("/v0/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({
+ url: "https://arxiv.org/pdf/astro-ph/9301001.pdf",
+ pageOptions: { parsePDF: false }
+ });
+ await new Promise((r) => setTimeout(r, 6000));
- const response = await request(TEST_URL)
- .post("/v0/scrape")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({ url: "https://www.scrapethissite.com/", pageOptions: { removeTags: ['.nav', '#footer', 'strong'] } });
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("data");
- expect(response.body.data).toHaveProperty("content");
- expect(response.body.data).toHaveProperty("markdown");
- expect(response.body.data).toHaveProperty("metadata");
- expect(response.body.data).not.toHaveProperty("html");
- expect(response.body.data.content).toContain("Scrape This Site");
- expect(response.body.data.content).not.toContain("Lessons and Videos"); // #footer
- expect(response.body.data.content).not.toContain("[Sandbox]("); // .nav
- expect(response.body.data.content).not.toContain("web scraping"); // strong
- }, 30000); // 30 seconds timeout
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ expect(response.body.data).toHaveProperty("content");
+ expect(response.body.data).toHaveProperty("metadata");
+ expect(response.body.data.content).toContain(
+ "/Title(arXiv:astro-ph/9301001v1 7 Jan 1993)>>endobj"
+ );
+ },
+ 60000
+ ); // 60 seconds
+
+ it.concurrent(
+ "should return a successful response with a valid API key with removeTags option",
+ async () => {
+ const responseWithoutRemoveTags = await request(TEST_URL)
+ .post("/v0/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://www.scrapethissite.com/" });
+ expect(responseWithoutRemoveTags.statusCode).toBe(200);
+ expect(responseWithoutRemoveTags.body).toHaveProperty("data");
+ expect(responseWithoutRemoveTags.body.data).toHaveProperty("content");
+ expect(responseWithoutRemoveTags.body.data).toHaveProperty("markdown");
+ expect(responseWithoutRemoveTags.body.data).toHaveProperty("metadata");
+ expect(responseWithoutRemoveTags.body.data).not.toHaveProperty("html");
+ expect(responseWithoutRemoveTags.body.data.content).toContain(
+ "Scrape This Site"
+ );
+ expect(responseWithoutRemoveTags.body.data.content).toContain(
+ "Lessons and Videos"
+ ); // #footer
+ expect(responseWithoutRemoveTags.body.data.content).toContain(
+ "[Sandbox]("
+ ); // .nav
+ expect(responseWithoutRemoveTags.body.data.content).toContain(
+ "web scraping"
+ ); // strong
+
+ const response = await request(TEST_URL)
+ .post("/v0/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({
+ url: "https://www.scrapethissite.com/",
+ pageOptions: { removeTags: [".nav", "#footer", "strong"] }
+ });
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ expect(response.body.data).toHaveProperty("content");
+ expect(response.body.data).toHaveProperty("markdown");
+ expect(response.body.data).toHaveProperty("metadata");
+ expect(response.body.data).not.toHaveProperty("html");
+ expect(response.body.data.content).toContain("Scrape This Site");
+ expect(response.body.data.content).not.toContain("Lessons and Videos"); // #footer
+ expect(response.body.data.content).not.toContain("[Sandbox]("); // .nav
+ expect(response.body.data.content).not.toContain("web scraping"); // strong
+ },
+ 30000
+ ); // 30 seconds timeout
// TODO: add this test back once we nail the waitFor option to be more deterministic
// it.concurrent("should return a successful response with a valid API key and waitFor option", async () => {
@@ -258,101 +321,137 @@ describe("E2E Tests for API Routes", () => {
// expect(duration).toBeGreaterThanOrEqual(7000);
// }, 12000); // 12 seconds timeout
- it.concurrent('should return a successful response for a scrape with 400 page', async () => {
- const response = await request(TEST_URL)
- .post('/v0/scrape')
- .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
- .set('Content-Type', 'application/json')
- .send({ url: 'https://httpstat.us/400' });
- await new Promise((r) => setTimeout(r, 5000));
+ it.concurrent(
+ "should return a successful response for a scrape with 400 page",
+ async () => {
+ const response = await request(TEST_URL)
+ .post("/v0/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://httpstat.us/400" });
+ await new Promise((r) => setTimeout(r, 5000));
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty('data');
- expect(response.body.data).toHaveProperty('content');
- expect(response.body.data).toHaveProperty('metadata');
- expect(response.body.data.metadata.pageStatusCode).toBe(400);
- expect(response.body.data.metadata.pageError.toLowerCase()).toContain("bad request");
- }, 60000); // 60 seconds
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ expect(response.body.data).toHaveProperty("content");
+ expect(response.body.data).toHaveProperty("metadata");
+ expect(response.body.data.metadata.pageStatusCode).toBe(400);
+ expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
+ "bad request"
+ );
+ },
+ 60000
+ ); // 60 seconds
- it.concurrent('should return a successful response for a scrape with 401 page', async () => {
- const response = await request(TEST_URL)
- .post('/v0/scrape')
- .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
- .set('Content-Type', 'application/json')
- .send({ url: 'https://httpstat.us/401' });
- await new Promise((r) => setTimeout(r, 5000));
+ it.concurrent(
+ "should return a successful response for a scrape with 401 page",
+ async () => {
+ const response = await request(TEST_URL)
+ .post("/v0/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://httpstat.us/401" });
+ await new Promise((r) => setTimeout(r, 5000));
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty('data');
- expect(response.body.data).toHaveProperty('content');
- expect(response.body.data).toHaveProperty('metadata');
- expect(response.body.data.metadata.pageStatusCode).toBe(401);
- expect(response.body.data.metadata.pageError.toLowerCase()).toContain("unauthorized");
- }, 60000); // 60 seconds
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ expect(response.body.data).toHaveProperty("content");
+ expect(response.body.data).toHaveProperty("metadata");
+ expect(response.body.data.metadata.pageStatusCode).toBe(401);
+ expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
+ "unauthorized"
+ );
+ },
+ 60000
+ ); // 60 seconds
- it.concurrent("should return a successful response for a scrape with 403 page", async () => {
- const response = await request(TEST_URL)
- .post('/v0/scrape')
- .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
- .set('Content-Type', 'application/json')
- .send({ url: 'https://httpstat.us/403' });
+ it.concurrent(
+ "should return a successful response for a scrape with 403 page",
+ async () => {
+ const response = await request(TEST_URL)
+ .post("/v0/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://httpstat.us/403" });
- await new Promise((r) => setTimeout(r, 5000));
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty('data');
- expect(response.body.data).toHaveProperty('content');
- expect(response.body.data).toHaveProperty('metadata');
- expect(response.body.data.metadata.pageStatusCode).toBe(403);
- expect(response.body.data.metadata.pageError.toLowerCase()).toContain("forbidden");
- }, 60000); // 60 seconds
+ await new Promise((r) => setTimeout(r, 5000));
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ expect(response.body.data).toHaveProperty("content");
+ expect(response.body.data).toHaveProperty("metadata");
+ expect(response.body.data.metadata.pageStatusCode).toBe(403);
+ expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
+ "forbidden"
+ );
+ },
+ 60000
+ ); // 60 seconds
- it.concurrent('should return a successful response for a scrape with 404 page', async () => {
- const response = await request(TEST_URL)
- .post('/v0/scrape')
- .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
- .set('Content-Type', 'application/json')
- .send({ url: 'https://httpstat.us/404' });
- await new Promise((r) => setTimeout(r, 5000));
+ it.concurrent(
+ "should return a successful response for a scrape with 404 page",
+ async () => {
+ const response = await request(TEST_URL)
+ .post("/v0/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://httpstat.us/404" });
+ await new Promise((r) => setTimeout(r, 5000));
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty('data');
- expect(response.body.data).toHaveProperty('content');
- expect(response.body.data).toHaveProperty('metadata');
- expect(response.body.data.metadata.pageStatusCode).toBe(404);
- expect(response.body.data.metadata.pageError.toLowerCase()).toContain("not found");
- }, 60000); // 60 seconds
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ expect(response.body.data).toHaveProperty("content");
+ expect(response.body.data).toHaveProperty("metadata");
+ expect(response.body.data.metadata.pageStatusCode).toBe(404);
+ expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
+ "not found"
+ );
+ },
+ 60000
+ ); // 60 seconds
- it.concurrent('should return a successful response for a scrape with 405 page', async () => {
- const response = await request(TEST_URL)
- .post('/v0/scrape')
- .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
- .set('Content-Type', 'application/json')
- .send({ url: 'https://httpstat.us/405' });
- await new Promise((r) => setTimeout(r, 5000));
+ it.concurrent(
+ "should return a successful response for a scrape with 405 page",
+ async () => {
+ const response = await request(TEST_URL)
+ .post("/v0/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://httpstat.us/405" });
+ await new Promise((r) => setTimeout(r, 5000));
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty('data');
- expect(response.body.data).toHaveProperty('content');
- expect(response.body.data).toHaveProperty('metadata');
- expect(response.body.data.metadata.pageStatusCode).toBe(405);
- expect(response.body.data.metadata.pageError.toLowerCase()).toContain("method not allowed");
- }, 60000); // 60 seconds
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ expect(response.body.data).toHaveProperty("content");
+ expect(response.body.data).toHaveProperty("metadata");
+ expect(response.body.data.metadata.pageStatusCode).toBe(405);
+ expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
+ "method not allowed"
+ );
+ },
+ 60000
+ ); // 60 seconds
- it.concurrent('should return a successful response for a scrape with 500 page', async () => {
- const response = await request(TEST_URL)
- .post('/v0/scrape')
- .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
- .set('Content-Type', 'application/json')
- .send({ url: 'https://httpstat.us/500' });
- await new Promise((r) => setTimeout(r, 5000));
+ it.concurrent(
+ "should return a successful response for a scrape with 500 page",
+ async () => {
+ const response = await request(TEST_URL)
+ .post("/v0/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://httpstat.us/500" });
+ await new Promise((r) => setTimeout(r, 5000));
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty('data');
- expect(response.body.data).toHaveProperty('content');
- expect(response.body.data).toHaveProperty('metadata');
- expect(response.body.data.metadata.pageStatusCode).toBe(500);
- expect(response.body.data.metadata.pageError.toLowerCase()).toContain("internal server error");
- }, 60000); // 60 seconds
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ expect(response.body.data).toHaveProperty("content");
+ expect(response.body.data).toHaveProperty("metadata");
+ expect(response.body.data.metadata.pageStatusCode).toBe(500);
+ expect(response.body.data.metadata.pageError.toLowerCase()).toContain(
+ "internal server error"
+ );
+ },
+ 60000
+ ); // 60 seconds
});
describe("POST /v0/crawl", () => {
@@ -361,14 +460,17 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(401);
});
- it.concurrent("should return an error response with an invalid API key", async () => {
- const response = await request(TEST_URL)
- .post("/v0/crawl")
- .set("Authorization", `Bearer invalid-api-key`)
- .set("Content-Type", "application/json")
- .send({ url: "https://firecrawl.dev" });
- expect(response.statusCode).toBe(401);
- });
+ it.concurrent(
+ "should return an error response with an invalid API key",
+ async () => {
+ const response = await request(TEST_URL)
+ .post("/v0/crawl")
+ .set("Authorization", `Bearer invalid-api-key`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://firecrawl.dev" });
+ expect(response.statusCode).toBe(401);
+ }
+ );
it.concurrent("should return an error for a blocklisted URL", async () => {
const blocklistedUrl = "https://twitter.com/fake-test";
@@ -383,56 +485,64 @@ describe("E2E Tests for API Routes", () => {
);
});
- it.concurrent("should return a successful response with a valid API key for crawl", async () => {
- const response = await request(TEST_URL)
- .post("/v0/crawl")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({ url: "https://firecrawl.dev" });
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("jobId");
- expect(response.body.jobId).toMatch(
- /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
- );
- });
- it.concurrent('should prevent duplicate requests using the same idempotency key', async () => {
- const uniqueIdempotencyKey = uuidv4();
-
- // First request with the idempotency key
- const firstResponse = await request(TEST_URL)
- .post('/v0/crawl')
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .set("x-idempotency-key", uniqueIdempotencyKey)
- .send({ url: 'https://docs.firecrawl.dev' });
-
- expect(firstResponse.statusCode).toBe(200);
-
- // Second request with the same idempotency key
- const secondResponse = await request(TEST_URL)
- .post('/v0/crawl')
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .set("x-idempotency-key", uniqueIdempotencyKey)
- .send({ url: 'https://docs.firecrawl.dev' });
-
- expect(secondResponse.statusCode).toBe(409);
- expect(secondResponse.body.error).toBe('Idempotency key already used');
- });
+ it.concurrent(
+ "should return a successful response with a valid API key for crawl",
+ async () => {
+ const response = await request(TEST_URL)
+ .post("/v0/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://firecrawl.dev" });
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("jobId");
+ expect(response.body.jobId).toMatch(
+ /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
+ );
+ }
+ );
+ it.concurrent(
+ "should prevent duplicate requests using the same idempotency key",
+ async () => {
+ const uniqueIdempotencyKey = uuidv4();
+
+ // First request with the idempotency key
+ const firstResponse = await request(TEST_URL)
+ .post("/v0/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .set("x-idempotency-key", uniqueIdempotencyKey)
+ .send({ url: "https://docs.firecrawl.dev" });
+
+ expect(firstResponse.statusCode).toBe(200);
+
+ // Second request with the same idempotency key
+ const secondResponse = await request(TEST_URL)
+ .post("/v0/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .set("x-idempotency-key", uniqueIdempotencyKey)
+ .send({ url: "https://docs.firecrawl.dev" });
+
+ expect(secondResponse.statusCode).toBe(409);
+ expect(secondResponse.body.error).toBe("Idempotency key already used");
+ }
+ );
+
+ it.concurrent(
+ "should return a successful response with a valid API key and valid includes option",
+ async () => {
+ const crawlResponse = await request(TEST_URL)
+ .post("/v0/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({
+ url: "https://mendable.ai",
+ limit: 10,
+ crawlerOptions: {
+ includes: ["blog/*"]
+ }
+ });
- it.concurrent("should return a successful response with a valid API key and valid includes option", async () => {
- const crawlResponse = await request(TEST_URL)
- .post("/v0/crawl")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({
- url: "https://mendable.ai",
- limit: 10,
- crawlerOptions: {
- includes: ["blog/*"],
- },
- });
-
let response;
let isFinished = false;
@@ -453,278 +563,322 @@ describe("E2E Tests for API Routes", () => {
const completedResponse = response;
const urls = completedResponse.body.data.map(
- (item: any) => item.metadata?.sourceURL
- );
- expect(urls.length).toBeGreaterThan(5);
- urls.forEach((url: string) => {
- expect(url.startsWith("https://www.mendable.ai/blog/")).toBeTruthy();
- });
-
- expect(completedResponse.statusCode).toBe(200);
- expect(completedResponse.body).toHaveProperty("status");
- expect(completedResponse.body.status).toBe("completed");
- expect(completedResponse.body).toHaveProperty("data");
- expect(completedResponse.body.data[0]).toHaveProperty("content");
- expect(completedResponse.body.data[0]).toHaveProperty("markdown");
- expect(completedResponse.body.data[0]).toHaveProperty("metadata");
- expect(completedResponse.body.data[0].content).toContain("Mendable");
- expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(200);
- expect(completedResponse.body.data[0].metadata.pageError).toBeUndefined();
- }, 60000); // 60 seconds
-
- it.concurrent("should return a successful response with a valid API key and valid excludes option", async () => {
- const crawlResponse = await request(TEST_URL)
- .post("/v0/crawl")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({
- url: "https://mendable.ai",
- limit: 10,
- crawlerOptions: {
- excludes: ["blog/*"],
- },
+ (item: any) => item.metadata?.sourceURL
+ );
+ expect(urls.length).toBeGreaterThan(5);
+ urls.forEach((url: string) => {
+ expect(url.startsWith("https://www.mendable.ai/blog/")).toBeTruthy();
});
-
- let isFinished = false;
- let response;
- while (!isFinished) {
- response = await request(TEST_URL)
+ expect(completedResponse.statusCode).toBe(200);
+ expect(completedResponse.body).toHaveProperty("status");
+ expect(completedResponse.body.status).toBe("completed");
+ expect(completedResponse.body).toHaveProperty("data");
+ expect(completedResponse.body.data[0]).toHaveProperty("content");
+ expect(completedResponse.body.data[0]).toHaveProperty("markdown");
+ expect(completedResponse.body.data[0]).toHaveProperty("metadata");
+ expect(completedResponse.body.data[0].content).toContain("Mendable");
+ expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
+ 200
+ );
+ expect(
+ completedResponse.body.data[0].metadata.pageError
+ ).toBeUndefined();
+ },
+ 60000
+ ); // 60 seconds
+
+ it.concurrent(
+ "should return a successful response with a valid API key and valid excludes option",
+ async () => {
+ const crawlResponse = await request(TEST_URL)
+ .post("/v0/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({
+ url: "https://mendable.ai",
+ limit: 10,
+ crawlerOptions: {
+ excludes: ["blog/*"]
+ }
+ });
+
+ let isFinished = false;
+ let response;
+
+ while (!isFinished) {
+ response = await request(TEST_URL)
+ .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("status");
+ isFinished = response.body.status === "completed";
+
+ if (!isFinished) {
+ await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
+ }
+ }
+
+ const completedResponse = response;
+
+ const urls = completedResponse.body.data.map(
+ (item: any) => item.metadata?.sourceURL
+ );
+ expect(urls.length).toBeGreaterThan(5);
+ urls.forEach((url: string) => {
+ expect(url.startsWith("https://wwww.mendable.ai/blog/")).toBeFalsy();
+ });
+ },
+ 90000
+ ); // 90 seconds
+
+ it.concurrent(
+ "should return a successful response with a valid API key and limit to 3",
+ async () => {
+ const crawlResponse = await request(TEST_URL)
+ .post("/v0/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({
+ url: "https://mendable.ai",
+ crawlerOptions: { limit: 3 }
+ });
+
+ let isFinished = false;
+ let response;
+
+ while (!isFinished) {
+ response = await request(TEST_URL)
+ .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("status");
+ isFinished = response.body.status === "completed";
+
+ if (!isFinished) {
+ await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
+ }
+ }
+
+ const completedResponse = response;
+
+ expect(completedResponse.statusCode).toBe(200);
+ expect(completedResponse.body).toHaveProperty("status");
+ expect(completedResponse.body.status).toBe("completed");
+ expect(completedResponse.body).toHaveProperty("data");
+ expect(completedResponse.body.data.length).toBe(3);
+ expect(completedResponse.body.data[0]).toHaveProperty("content");
+ expect(completedResponse.body.data[0]).toHaveProperty("markdown");
+ expect(completedResponse.body.data[0]).toHaveProperty("metadata");
+ expect(completedResponse.body.data[0].content).toContain("Mendable");
+ expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
+ 200
+ );
+ expect(
+ completedResponse.body.data[0].metadata.pageError
+ ).toBeUndefined();
+ },
+ 60000
+ ); // 60 seconds
+
+ it.concurrent(
+ "should return a successful response with max depth option for a valid crawl job",
+ async () => {
+ const crawlResponse = await request(TEST_URL)
+ .post("/v0/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({
+ url: "https://www.scrapethissite.com",
+ crawlerOptions: { maxDepth: 1 }
+ });
+ expect(crawlResponse.statusCode).toBe(200);
+
+ const response = await request(TEST_URL)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
-
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("status");
- isFinished = response.body.status === "completed";
-
- if (!isFinished) {
- await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
+ expect(["active", "waiting"]).toContain(response.body.status);
+ // wait for 60 seconds
+ let isCompleted = false;
+ while (!isCompleted) {
+ const statusCheckResponse = await request(TEST_URL)
+ .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ expect(statusCheckResponse.statusCode).toBe(200);
+ isCompleted = statusCheckResponse.body.status === "completed";
+ if (!isCompleted) {
+ await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
+ }
}
- }
-
- const completedResponse = response;
-
- const urls = completedResponse.body.data.map(
- (item: any) => item.metadata?.sourceURL
- );
- expect(urls.length).toBeGreaterThan(5);
- urls.forEach((url: string) => {
- expect(url.startsWith("https://wwww.mendable.ai/blog/")).toBeFalsy();
- });
- }, 90000); // 90 seconds
-
- it.concurrent("should return a successful response with a valid API key and limit to 3", async () => {
- const crawlResponse = await request(TEST_URL)
- .post("/v0/crawl")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({
- url: "https://mendable.ai",
- crawlerOptions: { limit: 3 },
- });
-
- let isFinished = false;
- let response;
-
- while (!isFinished) {
- response = await request(TEST_URL)
+ const completedResponse = await request(TEST_URL)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ expect(completedResponse.statusCode).toBe(200);
+ expect(completedResponse.body).toHaveProperty("status");
+ expect(completedResponse.body.status).toBe("completed");
+ expect(completedResponse.body).toHaveProperty("data");
+ expect(completedResponse.body.data[0]).toHaveProperty("content");
+ expect(completedResponse.body.data[0]).toHaveProperty("markdown");
+ expect(completedResponse.body.data[0]).toHaveProperty("metadata");
+ expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
+ 200
+ );
+ expect(
+ completedResponse.body.data[0].metadata.pageError
+ ).toBeUndefined();
+ const urls = completedResponse.body.data.map(
+ (item: any) => item.metadata?.sourceURL
+ );
+ expect(urls.length).toBeGreaterThan(1);
+
+ // Check if all URLs have a maximum depth of 1
+ urls.forEach((url: string) => {
+ const pathSplits = new URL(url).pathname.split("/");
+ const depth =
+ pathSplits.length -
+ (pathSplits[0].length === 0 &&
+ pathSplits[pathSplits.length - 1].length === 0
+ ? 1
+ : 0);
+ expect(depth).toBeLessThanOrEqual(2);
+ });
+ },
+ 180000
+ );
+
+ it.concurrent(
+ "should return a successful response with relative max depth option for a valid crawl job",
+ async () => {
+ const crawlResponse = await request(TEST_URL)
+ .post("/v0/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({
+ url: "https://www.scrapethissite.com/pages/",
+ crawlerOptions: { maxDepth: 1 }
+ });
+ expect(crawlResponse.statusCode).toBe(200);
+
+ const response = await request(TEST_URL)
+ .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("status");
- isFinished = response.body.status === "completed";
-
- if (!isFinished) {
- await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
+ expect(["active", "waiting"]).toContain(response.body.status);
+ // wait for 60 seconds
+ let isCompleted = false;
+ while (!isCompleted) {
+ const statusCheckResponse = await request(TEST_URL)
+ .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ expect(statusCheckResponse.statusCode).toBe(200);
+ isCompleted = statusCheckResponse.body.status === "completed";
+ if (!isCompleted) {
+ await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
+ }
}
- }
-
- const completedResponse = response;
-
- expect(completedResponse.statusCode).toBe(200);
- expect(completedResponse.body).toHaveProperty("status");
- expect(completedResponse.body.status).toBe("completed");
- expect(completedResponse.body).toHaveProperty("data");
- expect(completedResponse.body.data.length).toBe(3);
- expect(completedResponse.body.data[0]).toHaveProperty("content");
- expect(completedResponse.body.data[0]).toHaveProperty("markdown");
- expect(completedResponse.body.data[0]).toHaveProperty("metadata");
- expect(completedResponse.body.data[0].content).toContain("Mendable");
- expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(200);
- expect(completedResponse.body.data[0].metadata.pageError).toBeUndefined();
- }, 60000); // 60 seconds
-
- it.concurrent("should return a successful response with max depth option for a valid crawl job", async () => {
- const crawlResponse = await request(TEST_URL)
- .post("/v0/crawl")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({
- url: "https://www.scrapethissite.com",
- crawlerOptions: { maxDepth: 1 },
- });
- expect(crawlResponse.statusCode).toBe(200);
-
- const response = await request(TEST_URL)
- .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("status");
- expect(["active", "waiting"]).toContain(response.body.status);
- // wait for 60 seconds
- let isCompleted = false;
- while (!isCompleted) {
- const statusCheckResponse = await request(TEST_URL)
+ const completedResponse = await request(TEST_URL)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
- expect(statusCheckResponse.statusCode).toBe(200);
- isCompleted = statusCheckResponse.body.status === "completed";
- if (!isCompleted) {
- await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
- }
- }
- const completedResponse = await request(TEST_URL)
- .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
- expect(completedResponse.statusCode).toBe(200);
- expect(completedResponse.body).toHaveProperty("status");
- expect(completedResponse.body.status).toBe("completed");
- expect(completedResponse.body).toHaveProperty("data");
- expect(completedResponse.body.data[0]).toHaveProperty("content");
- expect(completedResponse.body.data[0]).toHaveProperty("markdown");
- expect(completedResponse.body.data[0]).toHaveProperty("metadata");
- expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(200);
- expect(completedResponse.body.data[0].metadata.pageError).toBeUndefined();
- const urls = completedResponse.body.data.map(
- (item: any) => item.metadata?.sourceURL
- );
- expect(urls.length).toBeGreaterThan(1);
+ expect(completedResponse.statusCode).toBe(200);
+ expect(completedResponse.body).toHaveProperty("status");
+ expect(completedResponse.body.status).toBe("completed");
+ expect(completedResponse.body).toHaveProperty("data");
+ expect(completedResponse.body.data[0]).toHaveProperty("content");
+ expect(completedResponse.body.data[0]).toHaveProperty("markdown");
+ expect(completedResponse.body.data[0]).toHaveProperty("metadata");
+ const urls = completedResponse.body.data.map(
+ (item: any) => item.metadata?.sourceURL
+ );
+ expect(urls.length).toBeGreaterThan(1);
- // Check if all URLs have a maximum depth of 1
- urls.forEach((url: string) => {
- const pathSplits = new URL(url).pathname.split('/');
- const depth = pathSplits.length - (pathSplits[0].length === 0 && pathSplits[pathSplits.length - 1].length === 0 ? 1 : 0);
- expect(depth).toBeLessThanOrEqual(2);
- });
- }, 180000);
-
- it.concurrent("should return a successful response with relative max depth option for a valid crawl job", async () => {
- const crawlResponse = await request(TEST_URL)
- .post("/v0/crawl")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({
- url: "https://www.scrapethissite.com/pages/",
- crawlerOptions: { maxDepth: 1 },
+ // Check if all URLs have an absolute maximum depth of 3 after the base URL depth was 2 and the maxDepth was 1
+ urls.forEach((url: string) => {
+ const pathSplits = new URL(url).pathname.split("/");
+ const depth =
+ pathSplits.length -
+ (pathSplits[0].length === 0 &&
+ pathSplits[pathSplits.length - 1].length === 0
+ ? 1
+ : 0);
+ expect(depth).toBeLessThanOrEqual(3);
});
- expect(crawlResponse.statusCode).toBe(200);
+ },
+ 180000
+ );
- const response = await request(TEST_URL)
- .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("status");
- expect(["active", "waiting"]).toContain(response.body.status);
- // wait for 60 seconds
- let isCompleted = false;
- while (!isCompleted) {
- const statusCheckResponse = await request(TEST_URL)
+ it.concurrent(
+ "should return a successful response with relative max depth option for a valid crawl job with maxDepths equals to zero",
+ async () => {
+ const crawlResponse = await request(TEST_URL)
+ .post("/v0/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({
+ url: "https://www.mendable.ai",
+ crawlerOptions: { maxDepth: 0 }
+ });
+ expect(crawlResponse.statusCode).toBe(200);
+
+ const response = await request(TEST_URL)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
- expect(statusCheckResponse.statusCode).toBe(200);
- isCompleted = statusCheckResponse.body.status === "completed";
- if (!isCompleted) {
- await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("status");
+ expect(["active", "waiting"]).toContain(response.body.status);
+ // wait for 60 seconds
+ let isCompleted = false;
+ while (!isCompleted) {
+ const statusCheckResponse = await request(TEST_URL)
+ .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ expect(statusCheckResponse.statusCode).toBe(200);
+ isCompleted = statusCheckResponse.body.status === "completed";
+ if (!isCompleted) {
+ await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
+ }
}
- }
- const completedResponse = await request(TEST_URL)
- .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
-
- expect(completedResponse.statusCode).toBe(200);
- expect(completedResponse.body).toHaveProperty("status");
- expect(completedResponse.body.status).toBe("completed");
- expect(completedResponse.body).toHaveProperty("data");
- expect(completedResponse.body.data[0]).toHaveProperty("content");
- expect(completedResponse.body.data[0]).toHaveProperty("markdown");
- expect(completedResponse.body.data[0]).toHaveProperty("metadata");
- const urls = completedResponse.body.data.map(
- (item: any) => item.metadata?.sourceURL
- );
- expect(urls.length).toBeGreaterThan(1);
-
- // Check if all URLs have an absolute maximum depth of 3 after the base URL depth was 2 and the maxDepth was 1
- urls.forEach((url: string) => {
- const pathSplits = new URL(url).pathname.split('/');
- const depth = pathSplits.length - (pathSplits[0].length === 0 && pathSplits[pathSplits.length - 1].length === 0 ? 1 : 0);
- expect(depth).toBeLessThanOrEqual(3);
- });
- }, 180000);
-
- it.concurrent("should return a successful response with relative max depth option for a valid crawl job with maxDepths equals to zero", async () => {
-
- const crawlResponse = await request(TEST_URL)
- .post("/v0/crawl")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({
- url: "https://www.mendable.ai",
- crawlerOptions: { maxDepth: 0 },
- });
- expect(crawlResponse.statusCode).toBe(200);
-
- const response = await request(TEST_URL)
- .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("status");
- expect(["active", "waiting"]).toContain(response.body.status);
- // wait for 60 seconds
- let isCompleted = false;
- while (!isCompleted) {
- const statusCheckResponse = await request(TEST_URL)
+ const completedResponse = await request(TEST_URL)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
- expect(statusCheckResponse.statusCode).toBe(200);
- isCompleted = statusCheckResponse.body.status === "completed";
- if (!isCompleted) {
- await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
- }
- }
- const completedResponse = await request(TEST_URL)
- .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
const testurls = completedResponse.body.data.map(
(item: any) => item.metadata?.sourceURL
);
//console.log(testurls)
- expect(completedResponse.statusCode).toBe(200);
- expect(completedResponse.body).toHaveProperty("status");
- expect(completedResponse.body.status).toBe("completed");
- expect(completedResponse.body).toHaveProperty("data");
- expect(completedResponse.body.data[0]).toHaveProperty("content");
- expect(completedResponse.body.data[0]).toHaveProperty("markdown");
- expect(completedResponse.body.data[0]).toHaveProperty("metadata");
- const urls = completedResponse.body.data.map(
- (item: any) => item.metadata?.sourceURL
- );
- expect(urls.length).toBeGreaterThanOrEqual(1);
+ expect(completedResponse.statusCode).toBe(200);
+ expect(completedResponse.body).toHaveProperty("status");
+ expect(completedResponse.body.status).toBe("completed");
+ expect(completedResponse.body).toHaveProperty("data");
+ expect(completedResponse.body.data[0]).toHaveProperty("content");
+ expect(completedResponse.body.data[0]).toHaveProperty("markdown");
+ expect(completedResponse.body.data[0]).toHaveProperty("metadata");
+ const urls = completedResponse.body.data.map(
+ (item: any) => item.metadata?.sourceURL
+ );
+ expect(urls.length).toBeGreaterThanOrEqual(1);
- // Check if all URLs have an absolute maximum depth of 3 after the base URL depth was 2 and the maxDepth was 1
- urls.forEach((url: string) => {
- const pathSplits = new URL(url).pathname.split('/');
- const depth = pathSplits.length - (pathSplits[0].length === 0 && pathSplits[pathSplits.length - 1].length === 0 ? 1 : 0);
- expect(depth).toBeLessThanOrEqual(1);
- });
- }, 180000);
-
-
-
-
+ // Check if all URLs have an absolute maximum depth of 3 after the base URL depth was 2 and the maxDepth was 1
+ urls.forEach((url: string) => {
+ const pathSplits = new URL(url).pathname.split("/");
+ const depth =
+ pathSplits.length -
+ (pathSplits[0].length === 0 &&
+ pathSplits[pathSplits.length - 1].length === 0
+ ? 1
+ : 0);
+ expect(depth).toBeLessThanOrEqual(1);
+ });
+ },
+ 180000
+ );
// it.concurrent("should return a successful response with a valid API key and valid limit option", async () => {
// const crawlResponse = await request(TEST_URL)
@@ -735,7 +889,7 @@ describe("E2E Tests for API Routes", () => {
// url: "https://mendable.ai",
// crawlerOptions: { limit: 10 },
// });
-
+
// const response = await request(TEST_URL)
// .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
@@ -771,100 +925,126 @@ describe("E2E Tests for API Routes", () => {
// expect(completedResponse.body.data[0].content).not.toContain("main menu");
// }, 60000); // 60 seconds
- it.concurrent("should return a successful response for a valid crawl job with includeHtml set to true option", async () => {
- const crawlResponse = await request(TEST_URL)
- .post("/v0/crawl")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({
- url: "https://roastmywebsite.ai",
- pageOptions: { includeHtml: true },
- });
- expect(crawlResponse.statusCode).toBe(200);
+ it.concurrent(
+ "should return a successful response for a valid crawl job with includeHtml set to true option",
+ async () => {
+ const crawlResponse = await request(TEST_URL)
+ .post("/v0/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({
+ url: "https://roastmywebsite.ai",
+ pageOptions: { includeHtml: true }
+ });
+ expect(crawlResponse.statusCode).toBe(200);
- const response = await request(TEST_URL)
- .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("status");
- expect(["active", "waiting"]).toContain(response.body.status);
-
- let isCompleted = false;
- while (!isCompleted) {
- const statusCheckResponse = await request(TEST_URL)
+ const response = await request(TEST_URL)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
- expect(statusCheckResponse.statusCode).toBe(200);
- isCompleted = statusCheckResponse.body.status === "completed";
- if (!isCompleted) {
- await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("status");
+ expect(["active", "waiting"]).toContain(response.body.status);
+
+ let isCompleted = false;
+ while (!isCompleted) {
+ const statusCheckResponse = await request(TEST_URL)
+ .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ expect(statusCheckResponse.statusCode).toBe(200);
+ isCompleted = statusCheckResponse.body.status === "completed";
+ if (!isCompleted) {
+ await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
+ }
}
- }
- const completedResponse = await request(TEST_URL)
- .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
-
- expect(completedResponse.statusCode).toBe(200);
- expect(completedResponse.body).toHaveProperty("status");
- expect(completedResponse.body.status).toBe("completed");
- expect(completedResponse.body).toHaveProperty("data");
- expect(completedResponse.body.data[0]).toHaveProperty("content");
- expect(completedResponse.body.data[0]).toHaveProperty("markdown");
- expect(completedResponse.body.data[0]).toHaveProperty("metadata");
- expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(200);
- expect(completedResponse.body.data[0].metadata.pageError).toBeUndefined();
-
- // 120 seconds
- expect(completedResponse.body.data[0]).toHaveProperty("html");
- expect(completedResponse.body.data[0]).toHaveProperty("metadata");
- expect(completedResponse.body.data[0].content).toContain("_Roast_");
- expect(completedResponse.body.data[0].markdown).toContain("_Roast_");
- expect(completedResponse.body.data[0].html).toContain("
{
- const crawlInitResponse = await request(TEST_URL)
- .post("/v0/crawl")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({
- url: "https://mendable.ai",
- crawlerOptions: {
- allowExternalContentLinks: true,
- ignoreSitemap: true,
- returnOnlyUrls: true,
- limit: 50
- }
- });
-
- expect(crawlInitResponse.statusCode).toBe(200);
- expect(crawlInitResponse.body).toHaveProperty("jobId");
-
- let crawlStatus: string = "scraping";
- let crawlData = [];
- while (crawlStatus !== "completed") {
- const statusResponse = await request(TEST_URL)
- .get(`/v0/crawl/status/${crawlInitResponse.body.jobId}`)
+ const completedResponse = await request(TEST_URL)
+ .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
- crawlStatus = statusResponse.body.status;
- if (statusResponse.body.data) {
- crawlData = statusResponse.body.data;
+
+ expect(completedResponse.statusCode).toBe(200);
+ expect(completedResponse.body).toHaveProperty("status");
+ expect(completedResponse.body.status).toBe("completed");
+ expect(completedResponse.body).toHaveProperty("data");
+ expect(completedResponse.body.data[0]).toHaveProperty("content");
+ expect(completedResponse.body.data[0]).toHaveProperty("markdown");
+ expect(completedResponse.body.data[0]).toHaveProperty("metadata");
+ expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
+ 200
+ );
+ expect(
+ completedResponse.body.data[0].metadata.pageError
+ ).toBeUndefined();
+
+ // 120 seconds
+ expect(completedResponse.body.data[0]).toHaveProperty("html");
+ expect(completedResponse.body.data[0]).toHaveProperty("metadata");
+ expect(completedResponse.body.data[0].content).toContain("_Roast_");
+ expect(completedResponse.body.data[0].markdown).toContain("_Roast_");
+ expect(completedResponse.body.data[0].html).toContain("
{
+ const crawlInitResponse = await request(TEST_URL)
+ .post("/v0/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({
+ url: "https://mendable.ai",
+ crawlerOptions: {
+ allowExternalContentLinks: true,
+ ignoreSitemap: true,
+ returnOnlyUrls: true,
+ limit: 50
+ }
+ });
+
+ expect(crawlInitResponse.statusCode).toBe(200);
+ expect(crawlInitResponse.body).toHaveProperty("jobId");
+
+ let crawlStatus: string = "scraping";
+ let crawlData = [];
+ while (crawlStatus !== "completed") {
+ const statusResponse = await request(TEST_URL)
+ .get(`/v0/crawl/status/${crawlInitResponse.body.jobId}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ crawlStatus = statusResponse.body.status;
+ if (statusResponse.body.data) {
+ crawlData = statusResponse.body.data;
+ }
+ if (crawlStatus !== "completed") {
+ await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
+ }
}
- if (crawlStatus !== "completed") {
- await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
- }
- }
- expect(crawlData.length).toBeGreaterThan(0);
- expect(crawlData).toEqual(expect.arrayContaining([
- expect.objectContaining({ url: expect.stringContaining("https://firecrawl.dev/?ref=mendable+banner") }),
- expect.objectContaining({ url: expect.stringContaining("https://mendable.ai/pricing") }),
- expect.objectContaining({ url: expect.stringContaining("https://x.com/CalebPeffer") })
- ]));
- }, 180000); // 3 minutes timeout
+ expect(crawlData.length).toBeGreaterThan(0);
+ expect(crawlData).toEqual(
+ expect.arrayContaining([
+ expect.objectContaining({
+ url: expect.stringContaining(
+ "https://firecrawl.dev/?ref=mendable+banner"
+ )
+ }),
+ expect.objectContaining({
+ url: expect.stringContaining("https://mendable.ai/pricing")
+ }),
+ expect.objectContaining({
+ url: expect.stringContaining("https://x.com/CalebPeffer")
+ })
+ ])
+ );
+ },
+ 180000
+ ); // 3 minutes timeout
});
describe("POST /v0/crawlWebsitePreview", () => {
@@ -873,14 +1053,17 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(401);
});
- it.concurrent("should return an error response with an invalid API key", async () => {
- const response = await request(TEST_URL)
- .post("/v0/crawlWebsitePreview")
- .set("Authorization", `Bearer invalid-api-key`)
- .set("Content-Type", "application/json")
- .send({ url: "https://firecrawl.dev" });
- expect(response.statusCode).toBe(401);
- });
+ it.concurrent(
+ "should return an error response with an invalid API key",
+ async () => {
+ const response = await request(TEST_URL)
+ .post("/v0/crawlWebsitePreview")
+ .set("Authorization", `Bearer invalid-api-key`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://firecrawl.dev" });
+ expect(response.statusCode).toBe(401);
+ }
+ );
// it.concurrent("should return an error for a blocklisted URL", async () => {
// const blocklistedUrl = "https://instagram.com/fake-test";
@@ -894,15 +1077,19 @@ describe("E2E Tests for API Routes", () => {
// expect(response.body.error).toContain("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.");
// });
- it.concurrent("should return a timeout error when scraping takes longer than the specified timeout", async () => {
- const response = await request(TEST_URL)
- .post("/v0/scrape")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({ url: "https://firecrawl.dev", timeout: 1000 });
+ it.concurrent(
+ "should return a timeout error when scraping takes longer than the specified timeout",
+ async () => {
+ const response = await request(TEST_URL)
+ .post("/v0/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://firecrawl.dev", timeout: 1000 });
- expect(response.statusCode).toBe(408);
- }, 3000);
+ expect(response.statusCode).toBe(408);
+ },
+ 3000
+ );
// it.concurrent("should return a successful response with a valid API key for crawlWebsitePreview", async () => {
// const response = await request(TEST_URL)
@@ -924,26 +1111,33 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(401);
});
- it.concurrent("should return an error response with an invalid API key", async () => {
- const response = await request(TEST_URL)
- .post("/v0/search")
- .set("Authorization", `Bearer invalid-api-key`)
- .set("Content-Type", "application/json")
- .send({ query: "test" });
- expect(response.statusCode).toBe(401);
- });
+ it.concurrent(
+ "should return an error response with an invalid API key",
+ async () => {
+ const response = await request(TEST_URL)
+ .post("/v0/search")
+ .set("Authorization", `Bearer invalid-api-key`)
+ .set("Content-Type", "application/json")
+ .send({ query: "test" });
+ expect(response.statusCode).toBe(401);
+ }
+ );
- it.concurrent("should return a successful response with a valid API key for search", async () => {
- const response = await request(TEST_URL)
- .post("/v0/search")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({ query: "test" });
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("success");
- expect(response.body.success).toBe(true);
- expect(response.body).toHaveProperty("data");
- }, 30000); // 30 seconds timeout
+ it.concurrent(
+ "should return a successful response with a valid API key for search",
+ async () => {
+ const response = await request(TEST_URL)
+ .post("/v0/search")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ query: "test" });
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("success");
+ expect(response.body.success).toBe(true);
+ expect(response.body).toHaveProperty("data");
+ },
+ 30000
+ ); // 30 seconds timeout
});
describe("GET /v0/crawl/status/:jobId", () => {
@@ -952,123 +1146,217 @@ describe("E2E Tests for API Routes", () => {
expect(response.statusCode).toBe(401);
});
- it.concurrent("should return an error response with an invalid API key", async () => {
- const response = await request(TEST_URL)
- .get("/v0/crawl/status/123")
- .set("Authorization", `Bearer invalid-api-key`);
- expect(response.statusCode).toBe(401);
- });
-
- it.concurrent("should return Job not found for invalid job ID", async () => {
- const response = await request(TEST_URL)
- .get("/v0/crawl/status/invalidJobId")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
- expect(response.statusCode).toBe(404);
- });
-
- it.concurrent("should return a successful crawl status response for a valid crawl job", async () => {
- const crawlResponse = await request(TEST_URL)
- .post("/v0/crawl")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({ url: "https://mendable.ai/blog" });
- expect(crawlResponse.statusCode).toBe(200);
-
- let isCompleted = false;
- let completedResponse;
-
- while (!isCompleted) {
+ it.concurrent(
+ "should return an error response with an invalid API key",
+ async () => {
const response = await request(TEST_URL)
- .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
+ .get("/v0/crawl/status/123")
+ .set("Authorization", `Bearer invalid-api-key`);
+ expect(response.statusCode).toBe(401);
+ }
+ );
+
+ it.concurrent(
+ "should return Job not found for invalid job ID",
+ async () => {
+ const response = await request(TEST_URL)
+ .get("/v0/crawl/status/invalidJobId")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("status");
-
- if (response.body.status === "completed") {
- isCompleted = true;
- completedResponse = response;
- } else {
- await new Promise((r) => setTimeout(r, 1000)); // Wait for 1 second before checking again
- }
+ expect(response.statusCode).toBe(404);
}
- expect(completedResponse.body).toHaveProperty("status");
- expect(completedResponse.body.status).toBe("completed");
- expect(completedResponse.body).toHaveProperty("data");
- expect(completedResponse.body.data[0]).toHaveProperty("content");
- expect(completedResponse.body.data[0]).toHaveProperty("markdown");
- expect(completedResponse.body.data[0]).toHaveProperty("metadata");
- expect(completedResponse.body.data[0].content).toContain("Mendable");
- expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(200);
- expect(completedResponse.body.data[0].metadata.pageError).toBeUndefined();
+ );
- const childrenLinks = completedResponse.body.data.filter(doc =>
- doc.metadata && doc.metadata.sourceURL && doc.metadata.sourceURL.includes("mendable.ai/blog")
- );
+ it.concurrent(
+ "should return a successful crawl status response for a valid crawl job",
+ async () => {
+ const crawlResponse = await request(TEST_URL)
+ .post("/v0/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://mendable.ai/blog" });
+ expect(crawlResponse.statusCode).toBe(200);
- expect(childrenLinks.length).toBe(completedResponse.body.data.length);
- }, 180000); // 120 seconds
-
- it.concurrent('should return a successful response for a valid crawl job with PDF files without explicit .pdf extension ', async () => {
- const crawlResponse = await request(TEST_URL)
- .post('/v0/crawl')
- .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
- .set('Content-Type', 'application/json')
- .send({ url: 'https://arxiv.org/pdf/astro-ph/9301001', crawlerOptions: { limit: 10, excludes: [ 'list/*', 'login', 'abs/*', 'static/*', 'about/*', 'archive/*' ] }});
- expect(crawlResponse.statusCode).toBe(200);
+ let isCompleted = false;
+ let completedResponse;
- let isCompleted = false;
- let completedResponse;
+ while (!isCompleted) {
+ const response = await request(TEST_URL)
+ .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("status");
- while (!isCompleted) {
- const response = await request(TEST_URL)
- .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
- .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`);
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty('status');
-
- if (response.body.status === 'completed') {
- isCompleted = true;
- completedResponse = response;
- } else {
- await new Promise((r) => setTimeout(r, 1000)); // Wait for 1 second before checking again
+ if (response.body.status === "completed") {
+ isCompleted = true;
+ completedResponse = response;
+ } else {
+ await new Promise((r) => setTimeout(r, 1000)); // Wait for 1 second before checking again
+ }
}
- }
- expect(completedResponse.body.status).toBe('completed');
- expect(completedResponse.body).toHaveProperty('data');
+ expect(completedResponse.body).toHaveProperty("status");
+ expect(completedResponse.body.status).toBe("completed");
+ expect(completedResponse.body).toHaveProperty("data");
+ expect(completedResponse.body.data[0]).toHaveProperty("content");
+ expect(completedResponse.body.data[0]).toHaveProperty("markdown");
+ expect(completedResponse.body.data[0]).toHaveProperty("metadata");
+ expect(completedResponse.body.data[0].content).toContain("Mendable");
+ expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
+ 200
+ );
+ expect(
+ completedResponse.body.data[0].metadata.pageError
+ ).toBeUndefined();
+
+ const childrenLinks = completedResponse.body.data.filter(
+ (doc) =>
+ doc.metadata &&
+ doc.metadata.sourceURL &&
+ doc.metadata.sourceURL.includes("mendable.ai/blog")
+ );
+
+ expect(childrenLinks.length).toBe(completedResponse.body.data.length);
+ },
+ 180000
+ ); // 120 seconds
+
+ it.concurrent(
+ "should return a successful response for a valid crawl job with PDF files without explicit .pdf extension ",
+ async () => {
+ const crawlResponse = await request(TEST_URL)
+ .post("/v0/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({
+ url: "https://arxiv.org/pdf/astro-ph/9301001",
+ crawlerOptions: {
+ limit: 10,
+ excludes: [
+ "list/*",
+ "login",
+ "abs/*",
+ "static/*",
+ "about/*",
+ "archive/*"
+ ]
+ }
+ });
+ expect(crawlResponse.statusCode).toBe(200);
+
+ let isCompleted = false;
+ let completedResponse;
+
+ while (!isCompleted) {
+ const response = await request(TEST_URL)
+ .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("status");
+
+ if (response.body.status === "completed") {
+ isCompleted = true;
+ completedResponse = response;
+ } else {
+ await new Promise((r) => setTimeout(r, 1000)); // Wait for 1 second before checking again
+ }
+ }
+ expect(completedResponse.body.status).toBe("completed");
+ expect(completedResponse.body).toHaveProperty("data");
expect(completedResponse.body.data.length).toEqual(1);
expect(completedResponse.body.data).toEqual(
expect.arrayContaining([
expect.objectContaining({
- content: expect.stringContaining('asymmetries might represent, for instance, preferred source orientations to our line of sight.')
+ content: expect.stringContaining(
+ "asymmetries might represent, for instance, preferred source orientations to our line of sight."
+ )
})
])
);
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
- expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(200);
- expect(completedResponse.body.data[0].metadata.pageError).toBeUndefined();
- }, 180000); // 120 seconds
+ expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(
+ 200
+ );
+ expect(
+ completedResponse.body.data[0].metadata.pageError
+ ).toBeUndefined();
+ },
+ 180000
+ ); // 120 seconds
+ it.concurrent(
+ "should return a successful response for a valid crawl job with includeHtml set to true option (2)",
+ async () => {
+ const crawlResponse = await request(TEST_URL)
+ .post("/v0/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({
+ url: "https://roastmywebsite.ai",
+ pageOptions: { includeHtml: true }
+ });
+ expect(crawlResponse.statusCode).toBe(200);
+ const response = await request(TEST_URL)
+ .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("status");
+ expect(["active", "waiting"]).toContain(response.body.status);
- it.concurrent("should return a successful response for a valid crawl job with includeHtml set to true option (2)", async () => {
+ let isFinished = false;
+ let completedResponse;
+
+ while (!isFinished) {
+ const response = await request(TEST_URL)
+ .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("status");
+
+ if (response.body.status === "completed") {
+ isFinished = true;
+ completedResponse = response;
+ } else {
+ await new Promise((r) => setTimeout(r, 1000)); // Wait for 1 second before checking again
+ }
+ }
+
+ expect(completedResponse.statusCode).toBe(200);
+ expect(completedResponse.body).toHaveProperty("status");
+ expect(completedResponse.body.status).toBe("completed");
+ expect(completedResponse.body).toHaveProperty("data");
+ expect(completedResponse.body.data[0]).toHaveProperty("content");
+ expect(completedResponse.body.data[0]).toHaveProperty("markdown");
+ expect(completedResponse.body.data[0]).toHaveProperty("metadata");
+ expect(completedResponse.body.data[0]).toHaveProperty("html");
+ expect(completedResponse.body.data[0].content).toContain("_Roast_");
+ expect(completedResponse.body.data[0].markdown).toContain("_Roast_");
+ expect(completedResponse.body.data[0].html).toContain("
{
const crawlResponse = await request(TEST_URL)
.post("/v0/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({
- url: "https://roastmywebsite.ai",
+ url: "https://mendable.ai/blog",
pageOptions: { includeHtml: true },
+ crawlerOptions: { allowBackwardCrawling: true }
});
expect(crawlResponse.statusCode).toBe(200);
- const response = await request(TEST_URL)
- .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("status");
- expect(["active", "waiting"]).toContain(response.body.status);
-
let isFinished = false;
let completedResponse;
@@ -1095,190 +1383,167 @@ describe("E2E Tests for API Routes", () => {
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
expect(completedResponse.body.data[0]).toHaveProperty("html");
- expect(completedResponse.body.data[0].content).toContain("_Roast_");
- expect(completedResponse.body.data[0].markdown).toContain("_Roast_");
- expect(completedResponse.body.data[0].html).toContain("
{
- const crawlResponse = await request(TEST_URL)
- .post("/v0/crawl")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({
- url: "https://mendable.ai/blog",
- pageOptions: { includeHtml: true },
- crawlerOptions: { allowBackwardCrawling: true },
+ const onlyChildrenLinks = completedResponse.body.data.filter((doc) => {
+ return (
+ doc.metadata &&
+ doc.metadata.sourceURL &&
+ doc.metadata.sourceURL.includes("mendable.ai/blog")
+ );
});
- expect(crawlResponse.statusCode).toBe(200);
-
- let isFinished = false;
- let completedResponse;
- while (!isFinished) {
- const response = await request(TEST_URL)
+ expect(completedResponse.body.data.length).toBeGreaterThan(
+ onlyChildrenLinks.length
+ );
+ },
+ 60000
+ );
+
+ it.concurrent(
+ "If someone cancels a crawl job, it should turn into failed status",
+ async () => {
+ const crawlResponse = await request(TEST_URL)
+ .post("/v0/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://jestjs.io" });
+
+ expect(crawlResponse.statusCode).toBe(200);
+
+ await new Promise((r) => setTimeout(r, 20000));
+
+ const responseCancel = await request(TEST_URL)
+ .delete(`/v0/crawl/cancel/${crawlResponse.body.jobId}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ expect(responseCancel.statusCode).toBe(200);
+ expect(responseCancel.body).toHaveProperty("status");
+ expect(responseCancel.body.status).toBe("cancelled");
+
+ await new Promise((r) => setTimeout(r, 10000));
+ const completedResponse = await request(TEST_URL)
.get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("status");
- if (response.body.status === "completed") {
- isFinished = true;
- completedResponse = response;
- } else {
- await new Promise((r) => setTimeout(r, 1000)); // Wait for 1 second before checking again
- }
- }
-
- expect(completedResponse.statusCode).toBe(200);
- expect(completedResponse.body).toHaveProperty("status");
- expect(completedResponse.body.status).toBe("completed");
- expect(completedResponse.body).toHaveProperty("data");
- expect(completedResponse.body.data[0]).toHaveProperty("content");
- expect(completedResponse.body.data[0]).toHaveProperty("markdown");
- expect(completedResponse.body.data[0]).toHaveProperty("metadata");
- expect(completedResponse.body.data[0]).toHaveProperty("html");
- expect(completedResponse.body.data[0].content).toContain("Mendable");
- expect(completedResponse.body.data[0].markdown).toContain("Mendable");
- expect(completedResponse.body.data[0].metadata.pageStatusCode).toBe(200);
- expect(completedResponse.body.data[0].metadata.pageError).toBeUndefined();
-
- const onlyChildrenLinks = completedResponse.body.data.filter(doc => {
- return doc.metadata && doc.metadata.sourceURL && doc.metadata.sourceURL.includes("mendable.ai/blog")
- });
-
- expect(completedResponse.body.data.length).toBeGreaterThan(onlyChildrenLinks.length);
- }, 60000);
-
- it.concurrent("If someone cancels a crawl job, it should turn into failed status", async () => {
- const crawlResponse = await request(TEST_URL)
- .post("/v0/crawl")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({ url: "https://jestjs.io" });
-
- expect(crawlResponse.statusCode).toBe(200);
-
- await new Promise((r) => setTimeout(r, 20000));
-
- const responseCancel = await request(TEST_URL)
- .delete(`/v0/crawl/cancel/${crawlResponse.body.jobId}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
- expect(responseCancel.statusCode).toBe(200);
- expect(responseCancel.body).toHaveProperty("status");
- expect(responseCancel.body.status).toBe("cancelled");
-
- await new Promise((r) => setTimeout(r, 10000));
- const completedResponse = await request(TEST_URL)
- .get(`/v0/crawl/status/${crawlResponse.body.jobId}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
-
- expect(completedResponse.statusCode).toBe(200);
- expect(completedResponse.body).toHaveProperty("status");
- expect(completedResponse.body.status).toBe("failed");
- expect(completedResponse.body).toHaveProperty("data");
- expect(completedResponse.body.data).toBeNull();
- expect(completedResponse.body).toHaveProperty("partial_data");
- expect(completedResponse.body.partial_data[0]).toHaveProperty("content");
- expect(completedResponse.body.partial_data[0]).toHaveProperty("markdown");
- expect(completedResponse.body.partial_data[0]).toHaveProperty("metadata");
- expect(completedResponse.body.partial_data[0].metadata.pageStatusCode).toBe(200);
- expect(completedResponse.body.partial_data[0].metadata.pageError).toBeUndefined();
- }, 60000); // 60 seconds
+ expect(completedResponse.statusCode).toBe(200);
+ expect(completedResponse.body).toHaveProperty("status");
+ expect(completedResponse.body.status).toBe("failed");
+ expect(completedResponse.body).toHaveProperty("data");
+ expect(completedResponse.body.data).toBeNull();
+ expect(completedResponse.body).toHaveProperty("partial_data");
+ expect(completedResponse.body.partial_data[0]).toHaveProperty("content");
+ expect(completedResponse.body.partial_data[0]).toHaveProperty("markdown");
+ expect(completedResponse.body.partial_data[0]).toHaveProperty("metadata");
+ expect(
+ completedResponse.body.partial_data[0].metadata.pageStatusCode
+ ).toBe(200);
+ expect(
+ completedResponse.body.partial_data[0].metadata.pageError
+ ).toBeUndefined();
+ },
+ 60000
+ ); // 60 seconds
describe("POST /v0/scrape with LLM Extraction", () => {
- it.concurrent("should extract data using LLM extraction mode", async () => {
- const response = await request(TEST_URL)
- .post("/v0/scrape")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({
- url: "https://mendable.ai",
- pageOptions: {
- onlyMainContent: true,
- },
- extractorOptions: {
- mode: "llm-extraction",
- extractionPrompt:
- "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
- extractionSchema: {
- type: "object",
- properties: {
- company_mission: {
- type: "string",
- },
- supports_sso: {
- type: "boolean",
- },
- is_open_source: {
- type: "boolean",
- },
- },
- required: ["company_mission", "supports_sso", "is_open_source"],
+ it.concurrent(
+ "should extract data using LLM extraction mode",
+ async () => {
+ const response = await request(TEST_URL)
+ .post("/v0/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({
+ url: "https://mendable.ai",
+ pageOptions: {
+ onlyMainContent: true
},
- },
- });
-
- // Ensure that the job was successfully created before proceeding with LLM extraction
- expect(response.statusCode).toBe(200);
-
- // Assuming the LLM extraction object is available in the response body under `data.llm_extraction`
- let llmExtraction = response.body.data.llm_extraction;
-
- // Check if the llm_extraction object has the required properties with correct types and values
- expect(llmExtraction).toHaveProperty("company_mission");
- expect(typeof llmExtraction.company_mission).toBe("string");
- expect(llmExtraction).toHaveProperty("supports_sso");
- expect(llmExtraction.supports_sso).toBe(true);
- expect(typeof llmExtraction.supports_sso).toBe("boolean");
- expect(llmExtraction).toHaveProperty("is_open_source");
- expect(llmExtraction.is_open_source).toBe(false);
- expect(typeof llmExtraction.is_open_source).toBe("boolean");
- }, 60000); // 60 secs
-
- it.concurrent("should extract data using LLM extraction mode with RawHtml", async () => {
- const response = await request(TEST_URL)
- .post("/v0/scrape")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({
- url: "https://mendable.ai",
-
- extractorOptions: {
- mode: "llm-extraction-from-raw-html",
- extractionPrompt:
- "Based on the information on the page, what are the primary and secondary CTA buttons?",
- extractionSchema: {
- type: "object",
- properties: {
- primary_cta: {
- type: "string",
+ extractorOptions: {
+ mode: "llm-extraction",
+ extractionPrompt:
+ "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
+ extractionSchema: {
+ type: "object",
+ properties: {
+ company_mission: {
+ type: "string"
+ },
+ supports_sso: {
+ type: "boolean"
+ },
+ is_open_source: {
+ type: "boolean"
+ }
},
- secondary_cta: {
- type: "string",
+ required: ["company_mission", "supports_sso", "is_open_source"]
+ }
+ }
+ });
+
+ // Ensure that the job was successfully created before proceeding with LLM extraction
+ expect(response.statusCode).toBe(200);
+
+ // Assuming the LLM extraction object is available in the response body under `data.llm_extraction`
+ let llmExtraction = response.body.data.llm_extraction;
+
+ // Check if the llm_extraction object has the required properties with correct types and values
+ expect(llmExtraction).toHaveProperty("company_mission");
+ expect(typeof llmExtraction.company_mission).toBe("string");
+ expect(llmExtraction).toHaveProperty("supports_sso");
+ expect(llmExtraction.supports_sso).toBe(true);
+ expect(typeof llmExtraction.supports_sso).toBe("boolean");
+ expect(llmExtraction).toHaveProperty("is_open_source");
+ expect(llmExtraction.is_open_source).toBe(false);
+ expect(typeof llmExtraction.is_open_source).toBe("boolean");
+ },
+ 60000
+ ); // 60 secs
+
+ it.concurrent(
+ "should extract data using LLM extraction mode with RawHtml",
+ async () => {
+ const response = await request(TEST_URL)
+ .post("/v0/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({
+ url: "https://mendable.ai",
+
+ extractorOptions: {
+ mode: "llm-extraction-from-raw-html",
+ extractionPrompt:
+ "Based on the information on the page, what are the primary and secondary CTA buttons?",
+ extractionSchema: {
+ type: "object",
+ properties: {
+ primary_cta: {
+ type: "string"
+ },
+ secondary_cta: {
+ type: "string"
+ }
},
- },
- required: ["primary_cta", "secondary_cta"],
- },
- },
- });
+ required: ["primary_cta", "secondary_cta"]
+ }
+ }
+ });
- // Ensure that the job was successfully created before proceeding with LLM extraction
- expect(response.statusCode).toBe(200);
+ // Ensure that the job was successfully created before proceeding with LLM extraction
+ expect(response.statusCode).toBe(200);
- // Assuming the LLM extraction object is available in the response body under `data.llm_extraction`
- let llmExtraction = response.body.data.llm_extraction;
+ // Assuming the LLM extraction object is available in the response body under `data.llm_extraction`
+ let llmExtraction = response.body.data.llm_extraction;
- // Check if the llm_extraction object has the required properties with correct types and values
- expect(llmExtraction).toHaveProperty("primary_cta");
- expect(typeof llmExtraction.primary_cta).toBe("string");
- expect(llmExtraction).toHaveProperty("secondary_cta");
- expect(typeof llmExtraction.secondary_cta).toBe("string");
-
- }, 60000); // 60 secs
+ // Check if the llm_extraction object has the required properties with correct types and values
+ expect(llmExtraction).toHaveProperty("primary_cta");
+ expect(typeof llmExtraction.primary_cta).toBe("string");
+ expect(llmExtraction).toHaveProperty("secondary_cta");
+ expect(typeof llmExtraction.secondary_cta).toBe("string");
+ },
+ 60000
+ ); // 60 secs
});
// describe("POST /v0/scrape for Top 100 Companies", () => {
@@ -1340,60 +1605,63 @@ describe("E2E Tests for API Routes", () => {
// });
describe("POST /v0/crawl with fast mode", () => {
- it.concurrent("should complete the crawl under 20 seconds", async () => {
- const startTime = Date.now();
+ it.concurrent(
+ "should complete the crawl under 20 seconds",
+ async () => {
+ const startTime = Date.now();
- const crawlResponse = await request(TEST_URL)
- .post("/v0/crawl")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({
- url: "https://flutterbricks.com",
- crawlerOptions: {
- mode: "fast"
+ const crawlResponse = await request(TEST_URL)
+ .post("/v0/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({
+ url: "https://flutterbricks.com",
+ crawlerOptions: {
+ mode: "fast"
+ }
+ });
+
+ expect(crawlResponse.statusCode).toBe(200);
+
+ const jobId = crawlResponse.body.jobId;
+ let statusResponse;
+ let isFinished = false;
+
+ while (!isFinished) {
+ statusResponse = await request(TEST_URL)
+ .get(`/v0/crawl/status/${jobId}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+
+ expect(statusResponse.statusCode).toBe(200);
+ isFinished = statusResponse.body.status === "completed";
+
+ if (!isFinished) {
+ await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
}
- });
-
- expect(crawlResponse.statusCode).toBe(200);
-
- const jobId = crawlResponse.body.jobId;
- let statusResponse;
- let isFinished = false;
-
- while (!isFinished) {
- statusResponse = await request(TEST_URL)
- .get(`/v0/crawl/status/${jobId}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
-
- expect(statusResponse.statusCode).toBe(200);
- isFinished = statusResponse.body.status === "completed";
-
- if (!isFinished) {
- await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
}
- }
- // const endTime = Date.now();
- // const timeElapsed = (endTime - startTime) / 1000; // Convert to seconds
+ // const endTime = Date.now();
+ // const timeElapsed = (endTime - startTime) / 1000; // Convert to seconds
- // console.log(`Time elapsed: ${timeElapsed} seconds`);
+ // console.log(`Time elapsed: ${timeElapsed} seconds`);
- expect(statusResponse.body.status).toBe("completed");
- expect(statusResponse.body).toHaveProperty("data");
- expect(statusResponse.body.data[0]).toHaveProperty("content");
- expect(statusResponse.body.data[0]).toHaveProperty("markdown");
- expect(statusResponse.body.data[0]).toHaveProperty("metadata");
- expect(statusResponse.body.data[0].metadata.pageStatusCode).toBe(200);
- expect(statusResponse.body.data[0].metadata.pageError).toBeUndefined();
+ expect(statusResponse.body.status).toBe("completed");
+ expect(statusResponse.body).toHaveProperty("data");
+ expect(statusResponse.body.data[0]).toHaveProperty("content");
+ expect(statusResponse.body.data[0]).toHaveProperty("markdown");
+ expect(statusResponse.body.data[0]).toHaveProperty("metadata");
+ expect(statusResponse.body.data[0].metadata.pageStatusCode).toBe(200);
+ expect(statusResponse.body.data[0].metadata.pageError).toBeUndefined();
- const results = statusResponse.body.data;
- // results.forEach((result, i) => {
- // console.log(result.metadata.sourceURL);
- // });
- expect(results.length).toBeGreaterThanOrEqual(10);
- expect(results.length).toBeLessThanOrEqual(15);
-
- }, 20000);
+ const results = statusResponse.body.data;
+ // results.forEach((result, i) => {
+ // console.log(result.metadata.sourceURL);
+ // });
+ expect(results.length).toBeGreaterThanOrEqual(10);
+ expect(results.length).toBeLessThanOrEqual(15);
+ },
+ 20000
+ );
// it.concurrent("should complete the crawl in more than 10 seconds", async () => {
// const startTime = Date.now();
@@ -1440,7 +1708,7 @@ describe("E2E Tests for API Routes", () => {
// // });
// expect(results.length).toBeGreaterThanOrEqual(10);
// expect(results.length).toBeLessThanOrEqual(15);
-
+
// }, 50000);// 15 seconds timeout to account for network delays
});
@@ -1453,24 +1721,28 @@ describe("E2E Tests for API Routes", () => {
});
describe("Rate Limiter", () => {
- it.concurrent("should return 429 when rate limit is exceeded for preview token", async () => {
- for (let i = 0; i < 5; i++) {
+ it.concurrent(
+ "should return 429 when rate limit is exceeded for preview token",
+ async () => {
+ for (let i = 0; i < 5; i++) {
+ const response = await request(TEST_URL)
+ .post("/v0/scrape")
+ .set("Authorization", `Bearer this_is_just_a_preview_token`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://www.scrapethissite.com" });
+
+ expect(response.statusCode).toBe(200);
+ }
const response = await request(TEST_URL)
.post("/v0/scrape")
.set("Authorization", `Bearer this_is_just_a_preview_token`)
.set("Content-Type", "application/json")
.send({ url: "https://www.scrapethissite.com" });
- expect(response.statusCode).toBe(200);
- }
- const response = await request(TEST_URL)
- .post("/v0/scrape")
- .set("Authorization", `Bearer this_is_just_a_preview_token`)
- .set("Content-Type", "application/json")
- .send({ url: "https://www.scrapethissite.com" });
-
- expect(response.statusCode).toBe(429);
- }, 90000);
+ expect(response.statusCode).toBe(429);
+ },
+ 90000
+ );
});
// it.concurrent("should return 429 when rate limit is exceeded for API key", async () => {
diff --git a/apps/api/src/__tests__/e2e_map/index.test.ts b/apps/api/src/__tests__/e2e_map/index.test.ts
index b065dff1..948f097e 100644
--- a/apps/api/src/__tests__/e2e_map/index.test.ts
+++ b/apps/api/src/__tests__/e2e_map/index.test.ts
@@ -15,7 +15,7 @@ describe("E2E Tests for Map API Routes", () => {
.send({
url: "https://firecrawl.dev",
sitemapOnly: false,
- search: "smart-crawl",
+ search: "smart-crawl"
});
console.log(response.body);
@@ -37,7 +37,7 @@ describe("E2E Tests for Map API Routes", () => {
.send({
url: "https://firecrawl.dev",
sitemapOnly: false,
- includeSubdomains: true,
+ includeSubdomains: true
});
console.log(response.body);
@@ -60,7 +60,7 @@ describe("E2E Tests for Map API Routes", () => {
.set("Content-Type", "application/json")
.send({
url: "https://firecrawl.dev",
- sitemapOnly: true,
+ sitemapOnly: true
});
console.log(response.body);
@@ -84,7 +84,7 @@ describe("E2E Tests for Map API Routes", () => {
.send({
url: "https://firecrawl.dev",
sitemapOnly: false,
- limit: 10,
+ limit: 10
});
console.log(response.body);
@@ -104,7 +104,7 @@ describe("E2E Tests for Map API Routes", () => {
.set("Content-Type", "application/json")
.send({
url: "https://geekflare.com/sitemap_index.xml",
- sitemapOnly: true,
+ sitemapOnly: true
});
console.log(response.body);
diff --git a/apps/api/src/__tests__/e2e_noAuth/index.test.ts b/apps/api/src/__tests__/e2e_noAuth/index.test.ts
index 83f676b8..9c3ddf33 100644
--- a/apps/api/src/__tests__/e2e_noAuth/index.test.ts
+++ b/apps/api/src/__tests__/e2e_noAuth/index.test.ts
@@ -32,7 +32,6 @@ describe("E2E Tests for API Routes with No Authentication", () => {
process.env = originalEnv;
});
-
describe("GET /", () => {
it("should return Hello, world! message", async () => {
const response = await request(TEST_URL).get("/");
@@ -62,7 +61,9 @@ describe("E2E Tests for API Routes with No Authentication", () => {
.set("Content-Type", "application/json")
.send({ url: blocklistedUrl });
expect(response.statusCode).toBe(403);
- expect(response.body.error).toContain("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.");
+ expect(response.body.error).toContain(
+ "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."
+ );
});
it("should return a successful response", async () => {
@@ -87,7 +88,9 @@ describe("E2E Tests for API Routes with No Authentication", () => {
.set("Content-Type", "application/json")
.send({ url: blocklistedUrl });
expect(response.statusCode).toBe(403);
- expect(response.body.error).toContain("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.");
+ expect(response.body.error).toContain(
+ "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."
+ );
});
it("should return a successful response", async () => {
@@ -116,7 +119,9 @@ describe("E2E Tests for API Routes with No Authentication", () => {
.set("Content-Type", "application/json")
.send({ url: blocklistedUrl });
expect(response.statusCode).toBe(403);
- expect(response.body.error).toContain("Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.");
+ expect(response.body.error).toContain(
+ "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it."
+ );
});
it("should return a successful response", async () => {
@@ -199,8 +204,6 @@ describe("E2E Tests for API Routes with No Authentication", () => {
expect(completedResponse.body.data[0]).toHaveProperty("content");
expect(completedResponse.body.data[0]).toHaveProperty("markdown");
expect(completedResponse.body.data[0]).toHaveProperty("metadata");
-
-
}, 60000); // 60 seconds
});
diff --git a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts
index e1f5f3fa..33e3be5d 100644
--- a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts
+++ b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts
@@ -2,7 +2,7 @@ import request from "supertest";
import { configDotenv } from "dotenv";
import {
ScrapeRequestInput,
- ScrapeResponseRequestTest,
+ ScrapeResponseRequestTest
} from "../../controllers/v1/types";
configDotenv();
@@ -19,15 +19,17 @@ describe("E2E Tests for v1 API Routes", () => {
describe("GET /is-production", () => {
it.concurrent("should return the production status", async () => {
- const response: ScrapeResponseRequestTest = await request(TEST_URL).get(
- "/is-production"
- );
+ const response: ScrapeResponseRequestTest =
+ await request(TEST_URL).get("/is-production");
- console.log('process.env.USE_DB_AUTHENTICATION', process.env.USE_DB_AUTHENTICATION);
- console.log('?', process.env.USE_DB_AUTHENTICATION === 'true');
- const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
- console.log('!!useDbAuthentication', !!useDbAuthentication);
- console.log('!useDbAuthentication', !useDbAuthentication);
+ console.log(
+ "process.env.USE_DB_AUTHENTICATION",
+ process.env.USE_DB_AUTHENTICATION
+ );
+ console.log("?", process.env.USE_DB_AUTHENTICATION === "true");
+ const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
+ console.log("!!useDbAuthentication", !!useDbAuthentication);
+ console.log("!useDbAuthentication", !useDbAuthentication);
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("isProduction");
@@ -37,15 +39,15 @@ describe("E2E Tests for v1 API Routes", () => {
describe("POST /v1/scrape", () => {
it.concurrent("should require authorization", async () => {
const response: ScrapeResponseRequestTest = await request(TEST_URL)
- .post("/v1/scrape")
- .send({ url: "https://firecrawl.dev"})
+ .post("/v1/scrape")
+ .send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(401);
});
it.concurrent("should throw error for blocklisted URL", async () => {
const scrapeRequest: ScrapeRequestInput = {
- url: "https://facebook.com/fake-test",
+ url: "https://facebook.com/fake-test"
};
const response = await request(TEST_URL)
@@ -55,7 +57,9 @@ describe("E2E Tests for v1 API Routes", () => {
.send(scrapeRequest);
expect(response.statusCode).toBe(403);
- expect(response.body.error).toBe("URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions.");
+ expect(response.body.error).toBe(
+ "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions."
+ );
});
it.concurrent(
@@ -74,7 +78,7 @@ describe("E2E Tests for v1 API Routes", () => {
"should return a successful response with a valid API key",
async () => {
const scrapeRequest: ScrapeRequestInput = {
- url: "https://roastmywebsite.ai",
+ url: "https://roastmywebsite.ai"
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
@@ -126,7 +130,7 @@ describe("E2E Tests for v1 API Routes", () => {
"should return a successful response with a valid API key",
async () => {
const scrapeRequest: ScrapeRequestInput = {
- url: "https://arxiv.org/abs/2410.04840",
+ url: "https://arxiv.org/abs/2410.04840"
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
@@ -146,8 +150,12 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.body.data).not.toHaveProperty("html");
expect(response.body.data.markdown).toContain("Strong Model Collapse");
expect(response.body.data.metadata.error).toBeUndefined();
- expect(response.body.data.metadata.description).toContain("Abstract page for arXiv paper 2410.04840: Strong Model Collapse");
- expect(response.body.data.metadata.citation_title).toBe("Strong Model Collapse");
+ expect(response.body.data.metadata.description).toContain(
+ "Abstract page for arXiv paper 2410.04840: Strong Model Collapse"
+ );
+ expect(response.body.data.metadata.citation_title).toBe(
+ "Strong Model Collapse"
+ );
expect(response.body.data.metadata.citation_author).toEqual([
"Dohmatob, Elvis",
"Feng, Yunzhen",
@@ -155,11 +163,21 @@ describe("E2E Tests for v1 API Routes", () => {
"Kempe, Julia"
]);
expect(response.body.data.metadata.citation_date).toBe("2024/10/07");
- expect(response.body.data.metadata.citation_online_date).toBe("2024/10/08");
- expect(response.body.data.metadata.citation_pdf_url).toBe("http://arxiv.org/pdf/2410.04840");
- expect(response.body.data.metadata.citation_arxiv_id).toBe("2410.04840");
- expect(response.body.data.metadata.citation_abstract).toContain("Within the scaling laws paradigm");
- expect(response.body.data.metadata.sourceURL).toBe("https://arxiv.org/abs/2410.04840");
+ expect(response.body.data.metadata.citation_online_date).toBe(
+ "2024/10/08"
+ );
+ expect(response.body.data.metadata.citation_pdf_url).toBe(
+ "http://arxiv.org/pdf/2410.04840"
+ );
+ expect(response.body.data.metadata.citation_arxiv_id).toBe(
+ "2410.04840"
+ );
+ expect(response.body.data.metadata.citation_abstract).toContain(
+ "Within the scaling laws paradigm"
+ );
+ expect(response.body.data.metadata.sourceURL).toBe(
+ "https://arxiv.org/abs/2410.04840"
+ );
expect(response.body.data.metadata.statusCode).toBe(200);
},
30000
@@ -169,7 +187,7 @@ describe("E2E Tests for v1 API Routes", () => {
async () => {
const scrapeRequest: ScrapeRequestInput = {
url: "https://roastmywebsite.ai",
- formats: ["markdown", "html"],
+ formats: ["markdown", "html"]
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
@@ -177,7 +195,7 @@ describe("E2E Tests for v1 API Routes", () => {
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send(scrapeRequest);
-
+
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("data");
if (!("data" in response.body)) {
@@ -193,62 +211,77 @@ describe("E2E Tests for v1 API Routes", () => {
},
30000
);
- it.concurrent('should return a successful response for a valid scrape with PDF file', async () => {
+ it.concurrent(
+ "should return a successful response for a valid scrape with PDF file",
+ async () => {
const scrapeRequest: ScrapeRequestInput = {
url: "https://arxiv.org/pdf/astro-ph/9301001.pdf"
- // formats: ["markdown", "html"],
+ // formats: ["markdown", "html"],
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
- .post('/v1/scrape')
- .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
- .set('Content-Type', 'application/json')
- .send(scrapeRequest);
- await new Promise((r) => setTimeout(r, 6000));
-
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty('data');
- if (!("data" in response.body)) {
- throw new Error("Expected response body to have 'data' property");
- }
- expect(response.body.data).toHaveProperty('metadata');
- expect(response.body.data.markdown).toContain('Broad Line Radio Galaxy');
- expect(response.body.data.metadata.statusCode).toBe(200);
- expect(response.body.data.metadata.error).toBeUndefined();
- }, 60000);
-
- it.concurrent('should return a successful response for a valid scrape with PDF file without explicit .pdf extension', async () => {
- const scrapeRequest: ScrapeRequestInput = {
- url: "https://arxiv.org/pdf/astro-ph/9301001"
- };
- const response: ScrapeResponseRequestTest = await request(TEST_URL)
- .post('/v1/scrape')
- .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
- .set('Content-Type', 'application/json')
- .send(scrapeRequest);
- await new Promise((r) => setTimeout(r, 6000));
-
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty('data');
- if (!("data" in response.body)) {
- throw new Error("Expected response body to have 'data' property");
- }
- expect(response.body.data).toHaveProperty('markdown');
- expect(response.body.data).toHaveProperty('metadata');
- expect(response.body.data.markdown).toContain('Broad Line Radio Galaxy');
- expect(response.body.data.metadata.statusCode).toBe(200);
- expect(response.body.data.metadata.error).toBeUndefined();
- }, 60000);
-
- it.concurrent("should return a successful response with a valid API key with removeTags option", async () => {
- const scrapeRequest: ScrapeRequestInput = {
- url: "https://www.scrapethissite.com/",
- onlyMainContent: false // default is true
- };
- const responseWithoutRemoveTags: ScrapeResponseRequestTest = await request(TEST_URL)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send(scrapeRequest);
+ await new Promise((r) => setTimeout(r, 6000));
+
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+ expect(response.body.data).toHaveProperty("metadata");
+ expect(response.body.data.markdown).toContain(
+ "Broad Line Radio Galaxy"
+ );
+ expect(response.body.data.metadata.statusCode).toBe(200);
+ expect(response.body.data.metadata.error).toBeUndefined();
+ },
+ 60000
+ );
+
+ it.concurrent(
+ "should return a successful response for a valid scrape with PDF file without explicit .pdf extension",
+ async () => {
+ const scrapeRequest: ScrapeRequestInput = {
+ url: "https://arxiv.org/pdf/astro-ph/9301001"
+ };
+ const response: ScrapeResponseRequestTest = await request(TEST_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+ await new Promise((r) => setTimeout(r, 6000));
+
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+ expect(response.body.data).toHaveProperty("markdown");
+ expect(response.body.data).toHaveProperty("metadata");
+ expect(response.body.data.markdown).toContain(
+ "Broad Line Radio Galaxy"
+ );
+ expect(response.body.data.metadata.statusCode).toBe(200);
+ expect(response.body.data.metadata.error).toBeUndefined();
+ },
+ 60000
+ );
+
+ it.concurrent(
+ "should return a successful response with a valid API key with removeTags option",
+ async () => {
+ const scrapeRequest: ScrapeRequestInput = {
+ url: "https://www.scrapethissite.com/",
+ onlyMainContent: false // default is true
+ };
+ const responseWithoutRemoveTags: ScrapeResponseRequestTest =
+ await request(TEST_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
expect(responseWithoutRemoveTags.statusCode).toBe(200);
expect(responseWithoutRemoveTags.body).toHaveProperty("data");
@@ -258,13 +291,17 @@ describe("E2E Tests for v1 API Routes", () => {
expect(responseWithoutRemoveTags.body.data).toHaveProperty("markdown");
expect(responseWithoutRemoveTags.body.data).toHaveProperty("metadata");
expect(responseWithoutRemoveTags.body.data).not.toHaveProperty("html");
- expect(responseWithoutRemoveTags.body.data.markdown).toContain("[FAQ](/faq/)"); // .nav
- expect(responseWithoutRemoveTags.body.data.markdown).toContain("Hartley Brody 2023"); // #footer
-
+ expect(responseWithoutRemoveTags.body.data.markdown).toContain(
+ "[FAQ](/faq/)"
+ ); // .nav
+ expect(responseWithoutRemoveTags.body.data.markdown).toContain(
+ "Hartley Brody 2023"
+ ); // #footer
+
const scrapeRequestWithRemoveTags: ScrapeRequestInput = {
- url: "https://www.scrapethissite.com/",
- excludeTags: ['.nav', '#footer', 'strong'],
- onlyMainContent: false // default is true
+ url: "https://www.scrapethissite.com/",
+ excludeTags: [".nav", "#footer", "strong"],
+ onlyMainContent: false // default is true
};
const response: ScrapeResponseRequestTest = await request(TEST_URL)
.post("/v1/scrape")
@@ -281,725 +318,757 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data).not.toHaveProperty("html");
expect(response.body.data.markdown).not.toContain("Hartley Brody 2023");
- expect(response.body.data.markdown).not.toContain("[FAQ](/faq/)"); //
- }, 30000);
+ expect(response.body.data.markdown).not.toContain("[FAQ](/faq/)"); //
+ },
+ 30000
+ );
- it.concurrent('should return a successful response for a scrape with 400 page', async () => {
+ it.concurrent(
+ "should return a successful response for a scrape with 400 page",
+ async () => {
const response: ScrapeResponseRequestTest = await request(TEST_URL)
- .post('/v1/scrape')
- .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
- .set('Content-Type', 'application/json')
- .send({ url: 'https://httpstat.us/400' });
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://httpstat.us/400" });
await new Promise((r) => setTimeout(r, 5000));
-
+
expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty('data');
+ expect(response.body).toHaveProperty("data");
if (!("data" in response.body)) {
throw new Error("Expected response body to have 'data' property");
}
- expect(response.body.data).toHaveProperty('markdown');
- expect(response.body.data).toHaveProperty('metadata');
+ expect(response.body.data).toHaveProperty("markdown");
+ expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.metadata.statusCode).toBe(400);
- }, 60000);
+ },
+ 60000
+ );
-
- it.concurrent('should return a successful response for a scrape with 401 page', async () => {
+ it.concurrent(
+ "should return a successful response for a scrape with 401 page",
+ async () => {
const response: ScrapeResponseRequestTest = await request(TEST_URL)
- .post('/v1/scrape')
- .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
- .set('Content-Type', 'application/json')
- .send({ url: 'https://httpstat.us/401' });
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://httpstat.us/401" });
await new Promise((r) => setTimeout(r, 5000));
-
+
expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty('data');
+ expect(response.body).toHaveProperty("data");
if (!("data" in response.body)) {
throw new Error("Expected response body to have 'data' property");
}
- expect(response.body.data).toHaveProperty('markdown');
- expect(response.body.data).toHaveProperty('metadata');
+ expect(response.body.data).toHaveProperty("markdown");
+ expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.metadata.statusCode).toBe(401);
- }, 60000);
+ },
+ 60000
+ );
- // Removed it as we want to retry fallback to the next scraper
- // it.concurrent('should return a successful response for a scrape with 403 page', async () => {
- // const response: ScrapeResponseRequestTest = await request(TEST_URL)
- // .post('/v1/scrape')
- // .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
- // .set('Content-Type', 'application/json')
- // .send({ url: 'https://httpstat.us/403' });
- // await new Promise((r) => setTimeout(r, 5000));
-
- // expect(response.statusCode).toBe(200);
- // expect(response.body).toHaveProperty('data');
- // if (!("data" in response.body)) {
- // throw new Error("Expected response body to have 'data' property");
- // }
- // expect(response.body.data).toHaveProperty('markdown');
- // expect(response.body.data).toHaveProperty('metadata');
- // expect(response.body.data.metadata.statusCode).toBe(403);
- // }, 60000);
+ // Removed it as we want to retry fallback to the next scraper
+ // it.concurrent('should return a successful response for a scrape with 403 page', async () => {
+ // const response: ScrapeResponseRequestTest = await request(TEST_URL)
+ // .post('/v1/scrape')
+ // .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
+ // .set('Content-Type', 'application/json')
+ // .send({ url: 'https://httpstat.us/403' });
+ // await new Promise((r) => setTimeout(r, 5000));
- it.concurrent('should return a successful response for a scrape with 404 page', async () => {
+ // expect(response.statusCode).toBe(200);
+ // expect(response.body).toHaveProperty('data');
+ // if (!("data" in response.body)) {
+ // throw new Error("Expected response body to have 'data' property");
+ // }
+ // expect(response.body.data).toHaveProperty('markdown');
+ // expect(response.body.data).toHaveProperty('metadata');
+ // expect(response.body.data.metadata.statusCode).toBe(403);
+ // }, 60000);
+
+ it.concurrent(
+ "should return a successful response for a scrape with 404 page",
+ async () => {
const response: ScrapeResponseRequestTest = await request(TEST_URL)
- .post('/v1/scrape')
- .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
- .set('Content-Type', 'application/json')
- .send({ url: 'https://httpstat.us/404' });
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://httpstat.us/404" });
await new Promise((r) => setTimeout(r, 5000));
-
+
expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty('data');
+ expect(response.body).toHaveProperty("data");
if (!("data" in response.body)) {
throw new Error("Expected response body to have 'data' property");
}
- expect(response.body.data).toHaveProperty('markdown');
- expect(response.body.data).toHaveProperty('metadata');
+ expect(response.body.data).toHaveProperty("markdown");
+ expect(response.body.data).toHaveProperty("metadata");
expect(response.body.data.metadata.statusCode).toBe(404);
- }, 60000);
+ },
+ 60000
+ );
- // it.concurrent('should return a successful response for a scrape with 405 page', async () => {
- // const response: ScrapeResponseRequestTest = await request(TEST_URL)
- // .post('/v1/scrape')
- // .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
- // .set('Content-Type', 'application/json')
- // .send({ url: 'https://httpstat.us/405' });
- // await new Promise((r) => setTimeout(r, 5000));
-
- // expect(response.statusCode).toBe(200);
- // expect(response.body).toHaveProperty('data');
- // if (!("data" in response.body)) {
- // throw new Error("Expected response body to have 'data' property");
- // }
- // expect(response.body.data).toHaveProperty('markdown');
- // expect(response.body.data).toHaveProperty('metadata');
- // expect(response.body.data.metadata.statusCode).toBe(405);
- // }, 60000);
+ // it.concurrent('should return a successful response for a scrape with 405 page', async () => {
+ // const response: ScrapeResponseRequestTest = await request(TEST_URL)
+ // .post('/v1/scrape')
+ // .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
+ // .set('Content-Type', 'application/json')
+ // .send({ url: 'https://httpstat.us/405' });
+ // await new Promise((r) => setTimeout(r, 5000));
- // it.concurrent('should return a successful response for a scrape with 500 page', async () => {
- // const response: ScrapeResponseRequestTest = await request(TEST_URL)
- // .post('/v1/scrape')
- // .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
- // .set('Content-Type', 'application/json')
- // .send({ url: 'https://httpstat.us/500' });
- // await new Promise((r) => setTimeout(r, 5000));
-
- // expect(response.statusCode).toBe(200);
- // expect(response.body).toHaveProperty('data');
- // if (!("data" in response.body)) {
- // throw new Error("Expected response body to have 'data' property");
- // }
- // expect(response.body.data).toHaveProperty('markdown');
- // expect(response.body.data).toHaveProperty('metadata');
- // expect(response.body.data.metadata.statusCode).toBe(500);
- // }, 60000);
+ // expect(response.statusCode).toBe(200);
+ // expect(response.body).toHaveProperty('data');
+ // if (!("data" in response.body)) {
+ // throw new Error("Expected response body to have 'data' property");
+ // }
+ // expect(response.body.data).toHaveProperty('markdown');
+ // expect(response.body.data).toHaveProperty('metadata');
+ // expect(response.body.data.metadata.statusCode).toBe(405);
+ // }, 60000);
- it.concurrent("should return a timeout error when scraping takes longer than the specified timeout", async () => {
+ // it.concurrent('should return a successful response for a scrape with 500 page', async () => {
+ // const response: ScrapeResponseRequestTest = await request(TEST_URL)
+ // .post('/v1/scrape')
+ // .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
+ // .set('Content-Type', 'application/json')
+ // .send({ url: 'https://httpstat.us/500' });
+ // await new Promise((r) => setTimeout(r, 5000));
+
+ // expect(response.statusCode).toBe(200);
+ // expect(response.body).toHaveProperty('data');
+ // if (!("data" in response.body)) {
+ // throw new Error("Expected response body to have 'data' property");
+ // }
+ // expect(response.body.data).toHaveProperty('markdown');
+ // expect(response.body.data).toHaveProperty('metadata');
+ // expect(response.body.data.metadata.statusCode).toBe(500);
+ // }, 60000);
+
+ it.concurrent(
+ "should return a timeout error when scraping takes longer than the specified timeout",
+ async () => {
const response: ScrapeResponseRequestTest = await request(TEST_URL)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({ url: "https://firecrawl.dev", timeout: 1000 });
-
+
expect(response.statusCode).toBe(408);
- }, 3000);
+ },
+ 3000
+ );
- it.concurrent(
- "should return a successful response with a valid API key and includeHtml set to true",
- async () => {
- const scrapeRequest: ScrapeRequestInput = {
- url: "https://roastmywebsite.ai",
- formats: ["html","rawHtml"],
- };
-
- const response: ScrapeResponseRequestTest = await request(TEST_URL)
- .post("/v1/scrape")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send(scrapeRequest);
-
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("data");
- if (!("data" in response.body)) {
- throw new Error("Expected response body to have 'data' property");
- }
- expect(response.body.data).not.toHaveProperty("markdown");
- expect(response.body.data).toHaveProperty("html");
- expect(response.body.data).toHaveProperty("rawHtml");
- expect(response.body.data).toHaveProperty("metadata");
- expect(response.body.data.html).toContain("
{
+ const scrapeRequest: ScrapeRequestInput = {
+ url: "https://roastmywebsite.ai",
+ formats: ["html", "rawHtml"]
+ };
- it.concurrent(
- "should return a successful response with waitFor",
- async () => {
- const scrapeRequest: ScrapeRequestInput = {
- url: "https://ycombinator.com/companies",
- formats: ["markdown"],
- waitFor: 8000
- };
-
- const response: ScrapeResponseRequestTest = await request(TEST_URL)
- .post("/v1/scrape")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send(scrapeRequest);
-
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("data");
- if (!("data" in response.body)) {
- throw new Error("Expected response body to have 'data' property");
- }
- expect(response.body.data).toHaveProperty("markdown");
- expect(response.body.data).not.toHaveProperty("html");
- expect(response.body.data).not.toHaveProperty("links");
- expect(response.body.data).not.toHaveProperty("rawHtml");
- expect(response.body.data).toHaveProperty("metadata");
- expect(response.body.data.markdown).toContain("PagerDuty");
- expect(response.body.data.metadata.statusCode).toBe(200);
- expect(response.body.data.metadata.error).toBeUndefined();
+ const response: ScrapeResponseRequestTest = await request(TEST_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
- },
- 30000
- );
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+ expect(response.body.data).not.toHaveProperty("markdown");
+ expect(response.body.data).toHaveProperty("html");
+ expect(response.body.data).toHaveProperty("rawHtml");
+ expect(response.body.data).toHaveProperty("metadata");
+ expect(response.body.data.html).toContain("
{
- const scrapeRequest: ScrapeRequestInput = {
- url: "https://roastmywebsite.ai",
- formats: ["links"],
- };
-
- const response: ScrapeResponseRequestTest = await request(TEST_URL)
- .post("/v1/scrape")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send(scrapeRequest);
-
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("data");
- if (!("data" in response.body)) {
- throw new Error("Expected response body to have 'data' property");
- }
- expect(response.body.data).not.toHaveProperty("html");
- expect(response.body.data).not.toHaveProperty("rawHtml");
- expect(response.body.data).toHaveProperty("links");
- expect(response.body.data).toHaveProperty("metadata");
- expect(response.body.data.links).toContain("https://firecrawl.dev");
- expect(response.body.data.metadata.statusCode).toBe(200);
- expect(response.body.data.metadata.error).toBeUndefined();
- },
- 30000
- );
-
+ it.concurrent(
+ "should return a successful response with waitFor",
+ async () => {
+ const scrapeRequest: ScrapeRequestInput = {
+ url: "https://ycombinator.com/companies",
+ formats: ["markdown"],
+ waitFor: 8000
+ };
+ const response: ScrapeResponseRequestTest = await request(TEST_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+ expect(response.body.data).toHaveProperty("markdown");
+ expect(response.body.data).not.toHaveProperty("html");
+ expect(response.body.data).not.toHaveProperty("links");
+ expect(response.body.data).not.toHaveProperty("rawHtml");
+ expect(response.body.data).toHaveProperty("metadata");
+ expect(response.body.data.markdown).toContain("PagerDuty");
+ expect(response.body.data.metadata.statusCode).toBe(200);
+ expect(response.body.data.metadata.error).toBeUndefined();
+ },
+ 30000
+ );
+
+ it.concurrent(
+ "should return a successful response with a valid links on page",
+ async () => {
+ const scrapeRequest: ScrapeRequestInput = {
+ url: "https://roastmywebsite.ai",
+ formats: ["links"]
+ };
+
+ const response: ScrapeResponseRequestTest = await request(TEST_URL)
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(scrapeRequest);
+
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+ expect(response.body.data).not.toHaveProperty("html");
+ expect(response.body.data).not.toHaveProperty("rawHtml");
+ expect(response.body.data).toHaveProperty("links");
+ expect(response.body.data).toHaveProperty("metadata");
+ expect(response.body.data.links).toContain("https://firecrawl.dev");
+ expect(response.body.data.metadata.statusCode).toBe(200);
+ expect(response.body.data.metadata.error).toBeUndefined();
+ },
+ 30000
+ );
});
-describe("POST /v1/map", () => {
- it.concurrent("should require authorization", async () => {
- const response: ScrapeResponseRequestTest = await request(TEST_URL)
- .post("/v1/map")
- .send({ url: "https://firecrawl.dev" });
- expect(response.statusCode).toBe(401);
- });
-
- it.concurrent("should return an error response with an invalid API key", async () => {
- const response: ScrapeResponseRequestTest = await request(TEST_URL)
- .post("/v1/map")
- .set("Authorization", `Bearer invalid-api-key`)
- .set("Content-Type", "application/json")
- .send({ url: "https://firecrawl.dev" });
- expect(response.statusCode).toBe(401);
- });
-
- it.concurrent("should return a successful response with a valid API key", async () => {
- const mapRequest = {
- url: "https://roastmywebsite.ai"
- };
-
- const response: ScrapeResponseRequestTest = await request(TEST_URL)
- .post("/v1/map")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send(mapRequest);
-
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("success", true);
- expect(response.body).toHaveProperty("links");
- if (!("links" in response.body)) {
- throw new Error("Expected response body to have 'links' property");
- }
- const links = response.body.links as unknown[];
- expect(Array.isArray(links)).toBe(true);
- expect(links.length).toBeGreaterThan(0);
- });
-
- it.concurrent("should return a successful response with a valid API key and search", async () => {
- const mapRequest = {
- url: "https://usemotion.com",
- search: "pricing"
- };
-
- const response: ScrapeResponseRequestTest = await request(TEST_URL)
- .post("/v1/map")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send(mapRequest);
-
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("success", true);
- expect(response.body).toHaveProperty("links");
- if (!("links" in response.body)) {
- throw new Error("Expected response body to have 'links' property");
- }
- const links = response.body.links as unknown[];
- expect(Array.isArray(links)).toBe(true);
- expect(links.length).toBeGreaterThan(0);
- expect(links[0]).toContain("usemotion.com/pricing");
- });
-
- it.concurrent("should return a successful response with a valid API key and search and allowSubdomains", async () => {
- const mapRequest = {
- url: "https://firecrawl.dev",
- search: "docs",
- includeSubdomains: true
- };
-
- const response: ScrapeResponseRequestTest = await request(TEST_URL)
- .post("/v1/map")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send(mapRequest);
-
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("success", true);
- expect(response.body).toHaveProperty("links");
- if (!("links" in response.body)) {
- throw new Error("Expected response body to have 'links' property");
- }
- const links = response.body.links as unknown[];
- expect(Array.isArray(links)).toBe(true);
- expect(links.length).toBeGreaterThan(0);
-
- const containsDocsFirecrawlDev = links.some((link: string) => link.includes("docs.firecrawl.dev"));
- expect(containsDocsFirecrawlDev).toBe(true);
- });
-
- it.concurrent("should return a successful response with a valid API key and search and allowSubdomains and www", async () => {
- const mapRequest = {
- url: "https://www.firecrawl.dev",
- search: "docs",
- includeSubdomains: true
- };
-
- const response: ScrapeResponseRequestTest = await request(TEST_URL)
- .post("/v1/map")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send(mapRequest);
-
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("success", true);
- expect(response.body).toHaveProperty("links");
- if (!("links" in response.body)) {
- throw new Error("Expected response body to have 'links' property");
- }
- const links = response.body.links as unknown[];
- expect(Array.isArray(links)).toBe(true);
- expect(links.length).toBeGreaterThan(0);
-
- const containsDocsFirecrawlDev = links.some((link: string) => link.includes("docs.firecrawl.dev"));
- expect(containsDocsFirecrawlDev).toBe(true);
- }, 10000)
-
- it.concurrent("should return a successful response with a valid API key and search and not allowSubdomains and www", async () => {
- const mapRequest = {
- url: "https://www.firecrawl.dev",
- search: "docs",
- includeSubdomains: false
- };
-
- const response: ScrapeResponseRequestTest = await request(TEST_URL)
- .post("/v1/map")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send(mapRequest);
-
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("success", true);
- expect(response.body).toHaveProperty("links");
- if (!("links" in response.body)) {
- throw new Error("Expected response body to have 'links' property");
- }
- const links = response.body.links as unknown[];
- expect(Array.isArray(links)).toBe(true);
- expect(links.length).toBeGreaterThan(0);
- expect(links[0]).not.toContain("docs.firecrawl.dev");
- })
-
- it.concurrent("should return an error for invalid URL", async () => {
- const mapRequest = {
- url: "invalid-url",
- includeSubdomains: true,
- search: "test",
- };
-
- const response: ScrapeResponseRequestTest = await request(TEST_URL)
- .post("/v1/map")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send(mapRequest);
-
- expect(response.statusCode).toBe(400);
- expect(response.body).toHaveProperty("success", false);
- expect(response.body).toHaveProperty("error");
- });
-});
-
-
-describe("POST /v1/crawl", () => {
- it.concurrent("should require authorization", async () => {
- const response: ScrapeResponseRequestTest = await request(TEST_URL)
- .post("/v1/crawl")
- .send({ url: "https://firecrawl.dev" });
- expect(response.statusCode).toBe(401);
- });
-
- it.concurrent("should throw error for blocklisted URL", async () => {
- const scrapeRequest: ScrapeRequestInput = {
- url: "https://facebook.com/fake-test",
- };
-
- const response = await request(TEST_URL)
- .post("/v1/crawl")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send(scrapeRequest);
-
- expect(response.statusCode).toBe(403);
- expect(response.body.error).toBe("URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions.");
- });
-
- it.concurrent(
- "should return an error response with an invalid API key",
- async () => {
+ describe("POST /v1/map", () => {
+ it.concurrent("should require authorization", async () => {
const response: ScrapeResponseRequestTest = await request(TEST_URL)
- .post("/v1/crawl")
- .set("Authorization", `Bearer invalid-api-key`)
- .set("Content-Type", "application/json")
+ .post("/v1/map")
.send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(401);
- }
- );
+ });
- it.concurrent("should return a successful response", async () => {
- const response = await request(TEST_URL)
- .post("/v1/crawl")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({ url: "https://firecrawl.dev" });
-
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("id");
- expect(response.body.id).toMatch(
- /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
+ it.concurrent(
+ "should return an error response with an invalid API key",
+ async () => {
+ const response: ScrapeResponseRequestTest = await request(TEST_URL)
+ .post("/v1/map")
+ .set("Authorization", `Bearer invalid-api-key`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://firecrawl.dev" });
+ expect(response.statusCode).toBe(401);
+ }
);
- expect(response.body).toHaveProperty("success", true);
- expect(response.body).toHaveProperty("url");
- expect(response.body.url).toContain("/v1/crawl/");
- });
- it.concurrent(
- "should return a successful response with a valid API key and valid includes option",
- async () => {
- const crawlResponse = await request(TEST_URL)
- .post("/v1/crawl")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({
- url: "https://firecrawl.dev",
- limit: 40,
- includePaths: ["blog/*"],
- });
+ it.concurrent(
+ "should return a successful response with a valid API key",
+ async () => {
+ const mapRequest = {
+ url: "https://roastmywebsite.ai"
+ };
- let response;
- let isFinished = false;
-
- while (!isFinished) {
- response = await request(TEST_URL)
- .get(`/v1/crawl/${crawlResponse.body.id}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ const response: ScrapeResponseRequestTest = await request(TEST_URL)
+ .post("/v1/map")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(mapRequest);
expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("status");
- isFinished = response.body.status === "completed";
-
- if (!isFinished) {
- await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
+ expect(response.body).toHaveProperty("success", true);
+ expect(response.body).toHaveProperty("links");
+ if (!("links" in response.body)) {
+ throw new Error("Expected response body to have 'links' property");
}
+ const links = response.body.links as unknown[];
+ expect(Array.isArray(links)).toBe(true);
+ expect(links.length).toBeGreaterThan(0);
}
+ );
- await new Promise((resolve) => setTimeout(resolve, 1000)); // wait for data to be saved on the database
- const completedResponse = await request(TEST_URL)
- .get(`/v1/crawl/${crawlResponse.body.id}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ it.concurrent(
+ "should return a successful response with a valid API key and search",
+ async () => {
+ const mapRequest = {
+ url: "https://usemotion.com",
+ search: "pricing"
+ };
- const urls = completedResponse.body.data.map(
- (item: any) => item.metadata?.sourceURL
- );
- expect(urls.length).toBeGreaterThan(5);
- urls.forEach((url: string) => {
- expect(url).toContain("firecrawl.dev/blog");
- });
-
- expect(completedResponse.statusCode).toBe(200);
- expect(completedResponse.body).toHaveProperty("status");
- expect(completedResponse.body.status).toBe("completed");
- expect(completedResponse.body).toHaveProperty("data");
- expect(completedResponse.body.data[0]).toHaveProperty("markdown");
- expect(completedResponse.body.data[0]).toHaveProperty("metadata");
- expect(completedResponse.body.data[0]).not.toHaveProperty("content"); // v0
- expect(completedResponse.body.data[0].metadata.statusCode).toBe(200);
- expect(completedResponse.body.data[0].metadata.error).toBeUndefined();
- },
- 180000
- ); // 180 seconds
-
- it.concurrent(
- "should return a successful response with a valid API key and valid excludes option",
- async () => {
- const crawlResponse = await request(TEST_URL)
- .post("/v1/crawl")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({
- url: "https://firecrawl.dev",
- limit: 40,
- excludePaths: ["blog/*"],
- });
-
- let isFinished = false;
- let response;
-
- while (!isFinished) {
- response = await request(TEST_URL)
- .get(`/v1/crawl/${crawlResponse.body.id}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ const response: ScrapeResponseRequestTest = await request(TEST_URL)
+ .post("/v1/map")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(mapRequest);
expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("status");
- isFinished = response.body.status === "completed";
-
- if (!isFinished) {
- await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
+ expect(response.body).toHaveProperty("success", true);
+ expect(response.body).toHaveProperty("links");
+ if (!("links" in response.body)) {
+ throw new Error("Expected response body to have 'links' property");
}
+ const links = response.body.links as unknown[];
+ expect(Array.isArray(links)).toBe(true);
+ expect(links.length).toBeGreaterThan(0);
+ expect(links[0]).toContain("usemotion.com/pricing");
}
+ );
- await new Promise((resolve) => setTimeout(resolve, 1000)); // wait for data to be saved on the database
- const completedResponse = await request(
- TEST_URL
- )
- .get(`/v1/crawl/${crawlResponse.body.id}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ it.concurrent(
+ "should return a successful response with a valid API key and search and allowSubdomains",
+ async () => {
+ const mapRequest = {
+ url: "https://firecrawl.dev",
+ search: "docs",
+ includeSubdomains: true
+ };
- const urls = completedResponse.body.data.map(
- (item: any) => item.metadata?.sourceURL
- );
- expect(urls.length).toBeGreaterThan(3);
- urls.forEach((url: string) => {
- expect(url.startsWith("https://www.firecrawl.dev/blog/")).toBeFalsy();
- });
- },
- 90000
- ); // 90 seconds
+ const response: ScrapeResponseRequestTest = await request(TEST_URL)
+ .post("/v1/map")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(mapRequest);
- it.concurrent(
- "should return a successful response with max depth option for a valid crawl job",
- async () => {
- const crawlResponse = await request(TEST_URL)
- .post("/v1/crawl")
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("success", true);
+ expect(response.body).toHaveProperty("links");
+ if (!("links" in response.body)) {
+ throw new Error("Expected response body to have 'links' property");
+ }
+ const links = response.body.links as unknown[];
+ expect(Array.isArray(links)).toBe(true);
+ expect(links.length).toBeGreaterThan(0);
+
+ const containsDocsFirecrawlDev = links.some((link: string) =>
+ link.includes("docs.firecrawl.dev")
+ );
+ expect(containsDocsFirecrawlDev).toBe(true);
+ }
+ );
+
+ it.concurrent(
+ "should return a successful response with a valid API key and search and allowSubdomains and www",
+ async () => {
+ const mapRequest = {
+ url: "https://www.firecrawl.dev",
+ search: "docs",
+ includeSubdomains: true
+ };
+
+ const response: ScrapeResponseRequestTest = await request(TEST_URL)
+ .post("/v1/map")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(mapRequest);
+
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("success", true);
+ expect(response.body).toHaveProperty("links");
+ if (!("links" in response.body)) {
+ throw new Error("Expected response body to have 'links' property");
+ }
+ const links = response.body.links as unknown[];
+ expect(Array.isArray(links)).toBe(true);
+ expect(links.length).toBeGreaterThan(0);
+
+ const containsDocsFirecrawlDev = links.some((link: string) =>
+ link.includes("docs.firecrawl.dev")
+ );
+ expect(containsDocsFirecrawlDev).toBe(true);
+ },
+ 10000
+ );
+
+ it.concurrent(
+ "should return a successful response with a valid API key and search and not allowSubdomains and www",
+ async () => {
+ const mapRequest = {
+ url: "https://www.firecrawl.dev",
+ search: "docs",
+ includeSubdomains: false
+ };
+
+ const response: ScrapeResponseRequestTest = await request(TEST_URL)
+ .post("/v1/map")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send(mapRequest);
+
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("success", true);
+ expect(response.body).toHaveProperty("links");
+ if (!("links" in response.body)) {
+ throw new Error("Expected response body to have 'links' property");
+ }
+ const links = response.body.links as unknown[];
+ expect(Array.isArray(links)).toBe(true);
+ expect(links.length).toBeGreaterThan(0);
+ expect(links[0]).not.toContain("docs.firecrawl.dev");
+ }
+ );
+
+ it.concurrent("should return an error for invalid URL", async () => {
+ const mapRequest = {
+ url: "invalid-url",
+ includeSubdomains: true,
+ search: "test"
+ };
+
+ const response: ScrapeResponseRequestTest = await request(TEST_URL)
+ .post("/v1/map")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
- .send({
- url: "https://www.scrapethissite.com",
- maxDepth: 1,
- });
- expect(crawlResponse.statusCode).toBe(200);
+ .send(mapRequest);
- const response = await request(TEST_URL)
- .get(`/v1/crawl/${crawlResponse.body.id}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty("status");
- expect(["active", "waiting", "completed", "scraping"]).toContain(response.body.status);
- // wait for 60 seconds
- let isCompleted = false;
- while (!isCompleted) {
- const statusCheckResponse = await request(TEST_URL)
- .get(`/v1/crawl/${crawlResponse.body.id}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
- expect(statusCheckResponse.statusCode).toBe(200);
- isCompleted = statusCheckResponse.body.status === "completed";
- if (!isCompleted) {
- await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
- }
- }
- const completedResponse = await request(
- TEST_URL
- )
- .get(`/v1/crawl/${crawlResponse.body.id}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
-
- expect(completedResponse.statusCode).toBe(200);
- expect(completedResponse.body).toHaveProperty("status");
- expect(completedResponse.body.status).toBe("completed");
- expect(completedResponse.body).toHaveProperty("data");
- expect(completedResponse.body.data[0]).not.toHaveProperty("content");
- expect(completedResponse.body.data[0]).toHaveProperty("markdown");
- expect(completedResponse.body.data[0]).toHaveProperty("metadata");
- expect(completedResponse.body.data[0].metadata.statusCode).toBe(200);
- expect(completedResponse.body.data[0].metadata.error).toBeUndefined();
- const urls = completedResponse.body.data.map(
- (item: any) => item.metadata?.sourceURL
- );
- expect(urls.length).toBeGreaterThan(1);
-
- // Check if all URLs have a maximum depth of 1
- urls.forEach((url: string) => {
- const pathSplits = new URL(url).pathname.split("/");
- const depth =
- pathSplits.length -
- (pathSplits[0].length === 0 &&
- pathSplits[pathSplits.length - 1].length === 0
- ? 1
- : 0);
- expect(depth).toBeLessThanOrEqual(2);
- });
- },
- 180000
- );
-})
-
-describe("GET /v1/crawl/:jobId", () => {
- it.concurrent("should require authorization", async () => {
- const response = await request(TEST_URL).get("/v1/crawl/123");
- expect(response.statusCode).toBe(401);
+ expect(response.statusCode).toBe(400);
+ expect(response.body).toHaveProperty("success", false);
+ expect(response.body).toHaveProperty("error");
+ });
});
- it.concurrent(
- "should return an error response with an invalid API key",
- async () => {
- const response = await request(TEST_URL)
- .get("/v1/crawl/123")
- .set("Authorization", `Bearer invalid-api-key`);
+ describe("POST /v1/crawl", () => {
+ it.concurrent("should require authorization", async () => {
+ const response: ScrapeResponseRequestTest = await request(TEST_URL)
+ .post("/v1/crawl")
+ .send({ url: "https://firecrawl.dev" });
expect(response.statusCode).toBe(401);
- }
- );
+ });
+
+ it.concurrent("should throw error for blocklisted URL", async () => {
+ const scrapeRequest: ScrapeRequestInput = {
+ url: "https://facebook.com/fake-test"
+ };
- it.concurrent(
- "should return Job not found for invalid job ID",
- async () => {
const response = await request(TEST_URL)
- .get("/v1/crawl/invalidJobId")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
- expect(response.statusCode).toBe(404);
- }
- );
-
- it.concurrent(
- "should return a successful crawl status response for a valid crawl job",
- async () => {
- const crawlResponse = await request(TEST_URL)
.post("/v1/crawl")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
- .send({ url: "https://docs.firecrawl.dev" });
- expect(crawlResponse.statusCode).toBe(200);
+ .send(scrapeRequest);
- let isCompleted = false;
+ expect(response.statusCode).toBe(403);
+ expect(response.body.error).toBe(
+ "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions."
+ );
+ });
+
+ it.concurrent(
+ "should return an error response with an invalid API key",
+ async () => {
+ const response: ScrapeResponseRequestTest = await request(TEST_URL)
+ .post("/v1/crawl")
+ .set("Authorization", `Bearer invalid-api-key`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://firecrawl.dev" });
+ expect(response.statusCode).toBe(401);
+ }
+ );
+
+ it.concurrent("should return a successful response", async () => {
+ const response = await request(TEST_URL)
+ .post("/v1/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://firecrawl.dev" });
+
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("id");
+ expect(response.body.id).toMatch(
+ /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
+ );
+ expect(response.body).toHaveProperty("success", true);
+ expect(response.body).toHaveProperty("url");
+ expect(response.body.url).toContain("/v1/crawl/");
+ });
+
+ it.concurrent(
+ "should return a successful response with a valid API key and valid includes option",
+ async () => {
+ const crawlResponse = await request(TEST_URL)
+ .post("/v1/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({
+ url: "https://firecrawl.dev",
+ limit: 40,
+ includePaths: ["blog/*"]
+ });
+
+ let response;
+ let isFinished = false;
+
+ while (!isFinished) {
+ response = await request(TEST_URL)
+ .get(`/v1/crawl/${crawlResponse.body.id}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("status");
+ isFinished = response.body.status === "completed";
+
+ if (!isFinished) {
+ await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
+ }
+ }
+
+ await new Promise((resolve) => setTimeout(resolve, 1000)); // wait for data to be saved on the database
+ const completedResponse = await request(TEST_URL)
+ .get(`/v1/crawl/${crawlResponse.body.id}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+
+ const urls = completedResponse.body.data.map(
+ (item: any) => item.metadata?.sourceURL
+ );
+ expect(urls.length).toBeGreaterThan(5);
+ urls.forEach((url: string) => {
+ expect(url).toContain("firecrawl.dev/blog");
+ });
+
+ expect(completedResponse.statusCode).toBe(200);
+ expect(completedResponse.body).toHaveProperty("status");
+ expect(completedResponse.body.status).toBe("completed");
+ expect(completedResponse.body).toHaveProperty("data");
+ expect(completedResponse.body.data[0]).toHaveProperty("markdown");
+ expect(completedResponse.body.data[0]).toHaveProperty("metadata");
+ expect(completedResponse.body.data[0]).not.toHaveProperty("content"); // v0
+ expect(completedResponse.body.data[0].metadata.statusCode).toBe(200);
+ expect(completedResponse.body.data[0].metadata.error).toBeUndefined();
+ },
+ 180000
+ ); // 180 seconds
+
+ it.concurrent(
+ "should return a successful response with a valid API key and valid excludes option",
+ async () => {
+ const crawlResponse = await request(TEST_URL)
+ .post("/v1/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({
+ url: "https://firecrawl.dev",
+ limit: 40,
+ excludePaths: ["blog/*"]
+ });
+
+ let isFinished = false;
+ let response;
+
+ while (!isFinished) {
+ response = await request(TEST_URL)
+ .get(`/v1/crawl/${crawlResponse.body.id}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("status");
+ isFinished = response.body.status === "completed";
+
+ if (!isFinished) {
+ await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
+ }
+ }
+
+ await new Promise((resolve) => setTimeout(resolve, 1000)); // wait for data to be saved on the database
+ const completedResponse = await request(TEST_URL)
+ .get(`/v1/crawl/${crawlResponse.body.id}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+
+ const urls = completedResponse.body.data.map(
+ (item: any) => item.metadata?.sourceURL
+ );
+ expect(urls.length).toBeGreaterThan(3);
+ urls.forEach((url: string) => {
+ expect(url.startsWith("https://www.firecrawl.dev/blog/")).toBeFalsy();
+ });
+ },
+ 90000
+ ); // 90 seconds
+
+ it.concurrent(
+ "should return a successful response with max depth option for a valid crawl job",
+ async () => {
+ const crawlResponse = await request(TEST_URL)
+ .post("/v1/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({
+ url: "https://www.scrapethissite.com",
+ maxDepth: 1
+ });
+ expect(crawlResponse.statusCode).toBe(200);
- while (!isCompleted) {
const response = await request(TEST_URL)
.get(`/v1/crawl/${crawlResponse.body.id}`)
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("status");
-
- if (response.body.status === "completed") {
- isCompleted = true;
- } else {
- await new Promise((r) => setTimeout(r, 1000)); // Wait for 1 second before checking again
+ expect(["active", "waiting", "completed", "scraping"]).toContain(
+ response.body.status
+ );
+ // wait for 60 seconds
+ let isCompleted = false;
+ while (!isCompleted) {
+ const statusCheckResponse = await request(TEST_URL)
+ .get(`/v1/crawl/${crawlResponse.body.id}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ expect(statusCheckResponse.statusCode).toBe(200);
+ isCompleted = statusCheckResponse.body.status === "completed";
+ if (!isCompleted) {
+ await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for 1 second before checking again
+ }
}
+ const completedResponse = await request(TEST_URL)
+ .get(`/v1/crawl/${crawlResponse.body.id}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+
+ expect(completedResponse.statusCode).toBe(200);
+ expect(completedResponse.body).toHaveProperty("status");
+ expect(completedResponse.body.status).toBe("completed");
+ expect(completedResponse.body).toHaveProperty("data");
+ expect(completedResponse.body.data[0]).not.toHaveProperty("content");
+ expect(completedResponse.body.data[0]).toHaveProperty("markdown");
+ expect(completedResponse.body.data[0]).toHaveProperty("metadata");
+ expect(completedResponse.body.data[0].metadata.statusCode).toBe(200);
+ expect(completedResponse.body.data[0].metadata.error).toBeUndefined();
+ const urls = completedResponse.body.data.map(
+ (item: any) => item.metadata?.sourceURL
+ );
+ expect(urls.length).toBeGreaterThan(1);
+
+ // Check if all URLs have a maximum depth of 1
+ urls.forEach((url: string) => {
+ const pathSplits = new URL(url).pathname.split("/");
+ const depth =
+ pathSplits.length -
+ (pathSplits[0].length === 0 &&
+ pathSplits[pathSplits.length - 1].length === 0
+ ? 1
+ : 0);
+ expect(depth).toBeLessThanOrEqual(2);
+ });
+ },
+ 180000
+ );
+ });
+
+ describe("GET /v1/crawl/:jobId", () => {
+ it.concurrent("should require authorization", async () => {
+ const response = await request(TEST_URL).get("/v1/crawl/123");
+ expect(response.statusCode).toBe(401);
+ });
+
+ it.concurrent(
+ "should return an error response with an invalid API key",
+ async () => {
+ const response = await request(TEST_URL)
+ .get("/v1/crawl/123")
+ .set("Authorization", `Bearer invalid-api-key`);
+ expect(response.statusCode).toBe(401);
}
+ );
- await new Promise((resolve) => setTimeout(resolve, 1000)); // wait for data to be saved on the database
- const completedResponse = await request(TEST_URL)
- .get(`/v1/crawl/${crawlResponse.body.id}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ it.concurrent(
+ "should return Job not found for invalid job ID",
+ async () => {
+ const response = await request(TEST_URL)
+ .get("/v1/crawl/invalidJobId")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ expect(response.statusCode).toBe(404);
+ }
+ );
- expect(completedResponse.body).toHaveProperty("status");
- expect(completedResponse.body.status).toBe("completed");
- expect(completedResponse.body).toHaveProperty("data");
- expect(completedResponse.body.data[0]).not.toHaveProperty("content");
- expect(completedResponse.body.data[0]).toHaveProperty("markdown");
- expect(completedResponse.body.data[0]).toHaveProperty("metadata");
- expect(completedResponse.body.data[0].metadata.statusCode).toBe(200);
- expect(
- completedResponse.body.data[0].metadata.error
- ).toBeUndefined();
+ it.concurrent(
+ "should return a successful crawl status response for a valid crawl job",
+ async () => {
+ const crawlResponse = await request(TEST_URL)
+ .post("/v1/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://docs.firecrawl.dev" });
+ expect(crawlResponse.statusCode).toBe(200);
- const childrenLinks = completedResponse.body.data.filter(
- (doc) =>
- doc.metadata &&
- doc.metadata.sourceURL
- );
+ let isCompleted = false;
- expect(childrenLinks.length).toBe(completedResponse.body.data.length);
- },
- 180000
- ); // 120 seconds
+ while (!isCompleted) {
+ const response = await request(TEST_URL)
+ .get(`/v1/crawl/${crawlResponse.body.id}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("status");
- it.concurrent(
- "If someone cancels a crawl job, it should turn into failed status",
- async () => {
- const crawlResponse = await request(TEST_URL)
- .post("/v1/crawl")
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
- .set("Content-Type", "application/json")
- .send({ url: "https://docs.firecrawl.dev", limit: 10 });
+ if (response.body.status === "completed") {
+ isCompleted = true;
+ } else {
+ await new Promise((r) => setTimeout(r, 1000)); // Wait for 1 second before checking again
+ }
+ }
- expect(crawlResponse.statusCode).toBe(200);
+ await new Promise((resolve) => setTimeout(resolve, 1000)); // wait for data to be saved on the database
+ const completedResponse = await request(TEST_URL)
+ .get(`/v1/crawl/${crawlResponse.body.id}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
- await new Promise((r) => setTimeout(r, 10000));
+ expect(completedResponse.body).toHaveProperty("status");
+ expect(completedResponse.body.status).toBe("completed");
+ expect(completedResponse.body).toHaveProperty("data");
+ expect(completedResponse.body.data[0]).not.toHaveProperty("content");
+ expect(completedResponse.body.data[0]).toHaveProperty("markdown");
+ expect(completedResponse.body.data[0]).toHaveProperty("metadata");
+ expect(completedResponse.body.data[0].metadata.statusCode).toBe(200);
+ expect(completedResponse.body.data[0].metadata.error).toBeUndefined();
- const responseCancel = await request(TEST_URL)
- .delete(`/v1/crawl/${crawlResponse.body.id}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
- expect(responseCancel.statusCode).toBe(200);
- expect(responseCancel.body).toHaveProperty("status");
- expect(responseCancel.body.status).toBe("cancelled");
+ const childrenLinks = completedResponse.body.data.filter(
+ (doc) => doc.metadata && doc.metadata.sourceURL
+ );
- await new Promise((r) => setTimeout(r, 10000));
- const completedResponse = await request(TEST_URL)
- .get(`/v1/crawl/${crawlResponse.body.id}`)
- .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
-
- expect(completedResponse.statusCode).toBe(200);
- expect(completedResponse.body).toHaveProperty("status");
- expect(completedResponse.body.status).toBe("cancelled");
- expect(completedResponse.body).toHaveProperty("data");
- expect(completedResponse.body.data[0]).toHaveProperty("markdown");
- expect(completedResponse.body.data[0]).toHaveProperty("metadata");
- expect(completedResponse.body.data[0].metadata.statusCode).toBe(200);
- expect(completedResponse.body.data[0].metadata.error).toBeUndefined();
- },
- 60000
- ); // 60 seconds
-})
+ expect(childrenLinks.length).toBe(completedResponse.body.data.length);
+ },
+ 180000
+ ); // 120 seconds
+
+ it.concurrent(
+ "If someone cancels a crawl job, it should turn into failed status",
+ async () => {
+ const crawlResponse = await request(TEST_URL)
+ .post("/v1/crawl")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://docs.firecrawl.dev", limit: 10 });
+
+ expect(crawlResponse.statusCode).toBe(200);
+
+ await new Promise((r) => setTimeout(r, 10000));
+
+ const responseCancel = await request(TEST_URL)
+ .delete(`/v1/crawl/${crawlResponse.body.id}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+ expect(responseCancel.statusCode).toBe(200);
+ expect(responseCancel.body).toHaveProperty("status");
+ expect(responseCancel.body.status).toBe("cancelled");
+
+ await new Promise((r) => setTimeout(r, 10000));
+ const completedResponse = await request(TEST_URL)
+ .get(`/v1/crawl/${crawlResponse.body.id}`)
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`);
+
+ expect(completedResponse.statusCode).toBe(200);
+ expect(completedResponse.body).toHaveProperty("status");
+ expect(completedResponse.body.status).toBe("cancelled");
+ expect(completedResponse.body).toHaveProperty("data");
+ expect(completedResponse.body.data[0]).toHaveProperty("markdown");
+ expect(completedResponse.body.data[0]).toHaveProperty("metadata");
+ expect(completedResponse.body.data[0].metadata.statusCode).toBe(200);
+ expect(completedResponse.body.data[0].metadata.error).toBeUndefined();
+ },
+ 60000
+ ); // 60 seconds
+ });
});
diff --git a/apps/api/src/__tests__/e2e_v1_withAuth_all_params/index.test.ts b/apps/api/src/__tests__/e2e_v1_withAuth_all_params/index.test.ts
index 5c7feb1f..e297f7c8 100644
--- a/apps/api/src/__tests__/e2e_v1_withAuth_all_params/index.test.ts
+++ b/apps/api/src/__tests__/e2e_v1_withAuth_all_params/index.test.ts
@@ -2,7 +2,7 @@ import request from "supertest";
import { configDotenv } from "dotenv";
import {
ScrapeRequest,
- ScrapeResponseRequestTest,
+ ScrapeResponseRequestTest
} from "../../controllers/v1/types";
configDotenv();
@@ -10,31 +10,39 @@ const FIRECRAWL_API_URL = "http://127.0.0.1:3002";
const E2E_TEST_SERVER_URL = "http://firecrawl-e2e-test.vercel.app"; // @rafaelsideguide/firecrawl-e2e-test
describe("E2E Tests for v1 API Routes", () => {
+ it.concurrent(
+ "should return a successful response for a scrape with 403 page",
+ async () => {
+ const response: ScrapeResponseRequestTest = await request(
+ FIRECRAWL_API_URL
+ )
+ .post("/v1/scrape")
+ .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
+ .set("Content-Type", "application/json")
+ .send({ url: "https://httpstat.us/403" });
- it.concurrent('should return a successful response for a scrape with 403 page', async () => {
- const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
- .post('/v1/scrape')
- .set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
- .set('Content-Type', 'application/json')
- .send({ url: 'https://httpstat.us/403' });
+ expect(response.statusCode).toBe(200);
+ expect(response.body).toHaveProperty("data");
+ if (!("data" in response.body)) {
+ throw new Error("Expected response body to have 'data' property");
+ }
+ expect(response.body.data).toHaveProperty("markdown");
+ expect(response.body.data).toHaveProperty("metadata");
+ expect(response.body.data.metadata.statusCode).toBe(403);
+ },
+ 30000
+ );
- expect(response.statusCode).toBe(200);
- expect(response.body).toHaveProperty('data');
- if (!("data" in response.body)) {
- throw new Error("Expected response body to have 'data' property");
- }
- expect(response.body.data).toHaveProperty('markdown');
- expect(response.body.data).toHaveProperty('metadata');
- expect(response.body.data.metadata.statusCode).toBe(403);
- }, 30000);
-
- it.concurrent("should handle 'formats:markdown (default)' parameter correctly",
+ it.concurrent(
+ "should handle 'formats:markdown (default)' parameter correctly",
async () => {
const scrapeRequest = {
url: E2E_TEST_SERVER_URL
} as ScrapeRequest;
- const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ const response: ScrapeResponseRequestTest = await request(
+ FIRECRAWL_API_URL
+ )
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
@@ -47,27 +55,41 @@ describe("E2E Tests for v1 API Routes", () => {
}
expect(response.body.data).toHaveProperty("markdown");
-
- expect(response.body.data.markdown).toContain("This page is used for end-to-end (e2e) testing with Firecrawl.");
- expect(response.body.data.markdown).toContain("Content with id #content-1");
+
+ expect(response.body.data.markdown).toContain(
+ "This page is used for end-to-end (e2e) testing with Firecrawl."
+ );
+ expect(response.body.data.markdown).toContain(
+ "Content with id #content-1"
+ );
// expect(response.body.data.markdown).toContain("Loading...");
expect(response.body.data.markdown).toContain("Click me!");
- expect(response.body.data.markdown).toContain("Power your AI apps with clean data crawled from any website. It's also open-source."); // firecrawl.dev inside an iframe
- expect(response.body.data.markdown).toContain("This content loads only when you see it. Don't blink! 👼"); // the browser always scroll to the bottom
+ expect(response.body.data.markdown).toContain(
+ "Power your AI apps with clean data crawled from any website. It's also open-source."
+ ); // firecrawl.dev inside an iframe
+ expect(response.body.data.markdown).toContain(
+ "This content loads only when you see it. Don't blink! 👼"
+ ); // the browser always scroll to the bottom
expect(response.body.data.markdown).not.toContain("Header"); // Only main content is returned by default
expect(response.body.data.markdown).not.toContain("footer"); // Only main content is returned by default
- expect(response.body.data.markdown).not.toContain("This content is only visible on mobile");
+ expect(response.body.data.markdown).not.toContain(
+ "This content is only visible on mobile"
+ );
},
- 30000);
+ 30000
+ );
- it.concurrent("should handle 'formats:html' parameter correctly",
+ it.concurrent(
+ "should handle 'formats:html' parameter correctly",
async () => {
const scrapeRequest = {
url: E2E_TEST_SERVER_URL,
formats: ["html"]
} as ScrapeRequest;
- const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
+ const response: ScrapeResponseRequestTest = await request(
+ FIRECRAWL_API_URL
+ )
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
@@ -79,23 +101,30 @@ describe("E2E Tests for v1 API Routes", () => {
throw new Error("Expected response body to have 'data' property");
}
-
expect(response.body.data).not.toHaveProperty("markdown");
expect(response.body.data).toHaveProperty("html");
- expect(response.body.data.html).not.toContain("
This page is used for end-to-end (e2e) testing with Firecrawl.
"); - expect(response.body.data.markdown).toContain("Content with id #content-1"); + expect(response.body.data.markdown).toContain( + "e2e-header-test: firecrawl" + ); }, - 30000); - - it.concurrent("should handle 'excludeTags' parameter correctly", + 30000 + ); + + it.concurrent( + "should handle 'includeTags' parameter correctly", async () => { const scrapeRequest = { url: E2E_TEST_SERVER_URL, - excludeTags: ['#content-1'] + includeTags: ["#content-1"] } as ScrapeRequest; - - const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL) + + const response: ScrapeResponseRequestTest = await request( + FIRECRAWL_API_URL + ) .post("/v1/scrape") .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Content-Type", "application/json") .send(scrapeRequest); - + expect(response.statusCode).toBe(200); expect(response.body).toHaveProperty("data"); if (!("data" in response.body)) { throw new Error("Expected response body to have 'data' property"); } - expect(response.body.data.markdown).toContain("This page is used for end-to-end (e2e) testing with Firecrawl."); - expect(response.body.data.markdown).not.toContain("Content with id #content-1"); + expect(response.body.data.markdown).not.toContain( + "This page is used for end-to-end (e2e) testing with Firecrawl.
" + ); + expect(response.body.data.markdown).toContain( + "Content with id #content-1" + ); }, - 30000); - - it.concurrent("should handle 'onlyMainContent' parameter correctly", + 30000 + ); + + it.concurrent( + "should handle 'excludeTags' parameter correctly", + async () => { + const scrapeRequest = { + url: E2E_TEST_SERVER_URL, + excludeTags: ["#content-1"] + } as ScrapeRequest; + + const response: ScrapeResponseRequestTest = await request( + FIRECRAWL_API_URL + ) + .post("/v1/scrape") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send(scrapeRequest); + + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + if (!("data" in response.body)) { + throw new Error("Expected response body to have 'data' property"); + } + + expect(response.body.data.markdown).toContain( + "This page is used for end-to-end (e2e) testing with Firecrawl." + ); + expect(response.body.data.markdown).not.toContain( + "Content with id #content-1" + ); + }, + 30000 + ); + + it.concurrent( + "should handle 'onlyMainContent' parameter correctly", async () => { const scrapeRequest = { url: E2E_TEST_SERVER_URL, formats: ["html", "markdown"], onlyMainContent: false } as ScrapeRequest; - - const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL) + + const response: ScrapeResponseRequestTest = await request( + FIRECRAWL_API_URL + ) .post("/v1/scrape") .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) .set("Content-Type", "application/json") .send(scrapeRequest); - + expect(response.statusCode).toBe(200); expect(response.body).toHaveProperty("data"); if (!("data" in response.body)) { throw new Error("Expected response body to have 'data' property"); } - - expect(response.body.data.markdown).toContain("This page is used for end-to-end (e2e) testing with Firecrawl."); - expect(response.body.data.html).toContain("Hello, world!
'; - const expectedMarkdown = 'Hello, world!'; +describe("parseMarkdown", () => { + it("should correctly convert simple HTML to Markdown", async () => { + const html = "Hello, world!
"; + const expectedMarkdown = "Hello, world!"; await expect(parseMarkdown(html)).resolves.toBe(expectedMarkdown); }); - it('should convert complex HTML with nested elements to Markdown', async () => { - const html = 'Hello bold world!
Hello bold world!
Unclosed tag', expected: 'Unclosed tag' }, - { html: '
Wrong nesting
', expected: '**Wrong nesting**' }, - { html: 'Link without closing tag', expected: '[Link without closing tag](http://example.com)' } + { html: "Unclosed tag", expected: "Unclosed tag" }, + { + html: "