2024-11-13 18:06:20 -03:00
|
|
|
import request from "supertest";
|
|
|
|
|
import dotenv from "dotenv";
|
|
|
|
|
import {
|
|
|
|
|
FirecrawlCrawlResponse,
|
|
|
|
|
FirecrawlCrawlStatusResponse,
|
|
|
|
|
FirecrawlScrapeResponse,
|
|
|
|
|
} from "../../types";
|
|
|
|
|
|
|
|
|
|
dotenv.config();
|
|
|
|
|
const TEST_URL = "http://127.0.0.1:3002";
|
|
|
|
|
|
|
|
|
|
describe("E2E Tests for Extract API Routes", () => {
|
2024-11-14 17:03:54 -03:00
|
|
|
it.concurrent("should return authors of blog posts on firecrawl.dev", async () => {
|
|
|
|
|
const response = await request(TEST_URL)
|
|
|
|
|
.post("/v1/extract")
|
|
|
|
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
|
|
|
|
.set("Content-Type", "application/json")
|
|
|
|
|
.send({
|
2024-11-14 15:06:22 -05:00
|
|
|
urls: ["https://firecrawl.dev/*"],
|
2024-11-14 17:03:54 -03:00
|
|
|
prompt: "Who are the authors of the blog posts?",
|
|
|
|
|
schema: {
|
|
|
|
|
type: "object",
|
|
|
|
|
properties: { authors: { type: "array", items: { type: "string" } } },
|
|
|
|
|
},
|
|
|
|
|
});
|
2024-11-13 18:06:20 -03:00
|
|
|
|
2024-11-14 17:03:54 -03:00
|
|
|
console.log(response.body);
|
|
|
|
|
expect(response.statusCode).toBe(200);
|
|
|
|
|
expect(response.body).toHaveProperty("data");
|
|
|
|
|
expect(response.body.data).toHaveProperty("authors");
|
2024-11-13 18:06:20 -03:00
|
|
|
|
2024-11-14 17:03:54 -03:00
|
|
|
let gotItRight = 0;
|
|
|
|
|
for (const author of response.body.data?.authors) {
|
|
|
|
|
if (author.includes("Caleb Peffer")) gotItRight++;
|
|
|
|
|
if (author.includes("Gergő Móricz")) gotItRight++;
|
|
|
|
|
if (author.includes("Eric Ciarla")) gotItRight++;
|
|
|
|
|
if (author.includes("Nicolas Camara")) gotItRight++;
|
2024-11-14 15:34:02 -05:00
|
|
|
if (author.includes("Jon")) gotItRight++;
|
|
|
|
|
if (author.includes("Wendong")) gotItRight++;
|
|
|
|
|
|
2024-11-14 17:03:54 -03:00
|
|
|
}
|
2024-11-13 18:06:20 -03:00
|
|
|
|
2024-11-14 17:03:54 -03:00
|
|
|
expect(gotItRight).toBeGreaterThan(1);
|
|
|
|
|
}, 60000);
|
2024-11-13 18:06:20 -03:00
|
|
|
|
2024-11-14 17:03:54 -03:00
|
|
|
it.concurrent("should return founders of firecrawl.dev (allowExternalLinks = true)", async () => {
|
|
|
|
|
const response = await request(TEST_URL)
|
|
|
|
|
.post("/v1/extract")
|
|
|
|
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
|
|
|
|
.set("Content-Type", "application/json")
|
|
|
|
|
.send({
|
2024-11-14 15:34:02 -05:00
|
|
|
urls: ["firecrawl.dev/*"],
|
2024-11-14 17:03:54 -03:00
|
|
|
prompt: "Who are the founders of the company?",
|
|
|
|
|
allowExternalLinks: true,
|
|
|
|
|
schema: {
|
|
|
|
|
type: "object",
|
|
|
|
|
properties: { founders: { type: "array", items: { type: "string" } } },
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
expect(response.statusCode).toBe(200);
|
|
|
|
|
expect(response.body).toHaveProperty("data");
|
|
|
|
|
expect(response.body.data).toHaveProperty("founders");
|
2024-11-13 18:06:20 -03:00
|
|
|
|
2024-11-14 15:34:02 -05:00
|
|
|
console.log(response.body.data?.founders);
|
2024-11-14 17:03:54 -03:00
|
|
|
let gotItRight = 0;
|
|
|
|
|
for (const founder of response.body.data?.founders) {
|
|
|
|
|
if (founder.includes("Caleb")) gotItRight++;
|
|
|
|
|
if (founder.includes("Eric")) gotItRight++;
|
|
|
|
|
if (founder.includes("Nicolas")) gotItRight++;
|
2024-11-14 15:34:02 -05:00
|
|
|
|
2024-11-14 17:03:54 -03:00
|
|
|
}
|
2024-11-13 18:06:20 -03:00
|
|
|
|
2024-11-14 15:34:02 -05:00
|
|
|
expect(gotItRight).toBeGreaterThanOrEqual(2);
|
2024-11-14 17:03:54 -03:00
|
|
|
}, 60000);
|
|
|
|
|
|
|
|
|
|
it.concurrent("should return hiring opportunities on firecrawl.dev (allowExternalLinks = true)", async () => {
|
|
|
|
|
const response = await request(TEST_URL)
|
|
|
|
|
.post("/v1/extract")
|
|
|
|
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
|
|
|
|
.set("Content-Type", "application/json")
|
|
|
|
|
.send({
|
2024-11-14 15:06:22 -05:00
|
|
|
urls: ["https://firecrawl.dev/*"],
|
2024-11-14 17:03:54 -03:00
|
|
|
prompt: "What are they hiring for?",
|
|
|
|
|
allowExternalLinks: true,
|
|
|
|
|
schema: {
|
|
|
|
|
type: "array",
|
|
|
|
|
items: {
|
|
|
|
|
type: "string"
|
2024-11-20 11:50:14 -08:00
|
|
|
},
|
|
|
|
|
required: ["items"]
|
2024-11-14 17:03:54 -03:00
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
expect(response.statusCode).toBe(200);
|
|
|
|
|
expect(response.body).toHaveProperty("data");
|
|
|
|
|
console.log(response.body.data);
|
2024-11-13 18:06:20 -03:00
|
|
|
|
2024-11-14 17:03:54 -03:00
|
|
|
let gotItRight = 0;
|
|
|
|
|
for (const hiring of response.body.data?.items) {
|
|
|
|
|
if (hiring.includes("Developer Support Engineer")) gotItRight++;
|
|
|
|
|
if (hiring.includes("Dev Ops Engineer")) gotItRight++;
|
|
|
|
|
if (hiring.includes("Founding Web Automation Engineer")) gotItRight++;
|
|
|
|
|
}
|
2024-11-13 18:06:20 -03:00
|
|
|
|
2024-11-14 15:34:02 -05:00
|
|
|
expect(gotItRight).toBeGreaterThan(2);
|
2024-11-14 17:03:54 -03:00
|
|
|
}, 60000);
|
2024-11-13 18:06:20 -03:00
|
|
|
|
2024-11-14 17:03:54 -03:00
|
|
|
it.concurrent("should return PCI DSS compliance for Fivetran", async () => {
|
|
|
|
|
const response = await request(TEST_URL)
|
|
|
|
|
.post("/v1/extract")
|
|
|
|
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
|
|
|
|
.set("Content-Type", "application/json")
|
|
|
|
|
.send({
|
2024-11-14 15:06:22 -05:00
|
|
|
urls: ["fivetran.com/*"],
|
2024-11-14 17:03:54 -03:00
|
|
|
prompt: "Does Fivetran have PCI DSS compliance?",
|
|
|
|
|
allowExternalLinks: true,
|
|
|
|
|
schema: {
|
|
|
|
|
type: "object",
|
|
|
|
|
properties: {
|
|
|
|
|
pciDssCompliance: { type: "boolean" }
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
expect(response.statusCode).toBe(200);
|
|
|
|
|
expect(response.body).toHaveProperty("data");
|
|
|
|
|
expect(response.body.data?.pciDssCompliance).toBe(true);
|
|
|
|
|
}, 60000);
|
|
|
|
|
|
|
|
|
|
it.concurrent("should return Azure Data Connectors for Fivetran", async () => {
|
|
|
|
|
const response = await request(TEST_URL)
|
|
|
|
|
.post("/v1/extract")
|
|
|
|
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
|
|
|
|
.set("Content-Type", "application/json")
|
|
|
|
|
.send({
|
2024-11-14 15:06:22 -05:00
|
|
|
urls: ["fivetran.com/*"],
|
2024-11-14 17:03:54 -03:00
|
|
|
prompt: "What are the Azure Data Connectors they offer?",
|
|
|
|
|
schema: {
|
|
|
|
|
type: "array",
|
|
|
|
|
items: {
|
2024-11-13 18:06:20 -03:00
|
|
|
type: "object",
|
|
|
|
|
properties: {
|
2024-11-14 17:03:54 -03:00
|
|
|
connector: { type: "string" },
|
|
|
|
|
description: { type: "string" },
|
|
|
|
|
supportsCaptureDelete: { type: "boolean" }
|
2024-11-13 18:06:20 -03:00
|
|
|
}
|
|
|
|
|
}
|
2024-11-14 17:03:54 -03:00
|
|
|
}
|
|
|
|
|
})
|
2024-11-13 18:06:20 -03:00
|
|
|
|
2024-11-14 17:03:54 -03:00
|
|
|
console.log(response.body);
|
|
|
|
|
// expect(response.statusCode).toBe(200);
|
|
|
|
|
// expect(response.body).toHaveProperty("data");
|
|
|
|
|
// expect(response.body.data?.pciDssCompliance).toBe(true);
|
|
|
|
|
}, 60000);
|
2024-11-20 10:23:44 -08:00
|
|
|
|
|
|
|
|
it.concurrent("should return Greenhouse Applicant Tracking System for Abnormal Security", async () => {
|
|
|
|
|
const response = await request(TEST_URL)
|
|
|
|
|
.post("/v1/extract")
|
|
|
|
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
|
|
|
|
.set("Content-Type", "application/json")
|
|
|
|
|
.send({
|
|
|
|
|
urls: ["https://careers.abnormalsecurity.com/jobs/6119456003?gh_jid=6119456003"],
|
|
|
|
|
prompt: "what applicant tracking system is this company using?",
|
|
|
|
|
schema: {
|
|
|
|
|
type: "object",
|
|
|
|
|
properties: {
|
|
|
|
|
isGreenhouseATS: { type: "boolean" },
|
|
|
|
|
answer: { type: "string" }
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
allowExternalLinks: true
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
console.log(response.body);
|
|
|
|
|
expect(response.statusCode).toBe(200);
|
|
|
|
|
expect(response.body).toHaveProperty("data");
|
|
|
|
|
expect(response.body.data?.isGreenhouseATS).toBe(true);
|
|
|
|
|
}, 60000);
|
|
|
|
|
|
2024-11-13 18:06:20 -03:00
|
|
|
});
|