Compare commits
10 Commits
7e73b01599
...
1396451d31
| Author | SHA1 | Date | |
|---|---|---|---|
| 1396451d31 | |||
| 07fb651a91 | |||
| 6a76ccfacb | |||
| 9297afd1ff | |||
| a8e0482718 | |||
| a2f41fb650 | |||
| 3ea221b093 | |||
| c9dd0e609a | |||
| 93655b5c0b | |||
| 38c96b524f |
@@ -373,5 +373,14 @@ describe("Scrape tests", () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
expect(response.metadata.sourceURL).toBe("https://firecrawl.dev/?pagewanted=all&et_blog");
|
expect(response.metadata.sourceURL).toBe("https://firecrawl.dev/?pagewanted=all&et_blog");
|
||||||
}, 35000);
|
}, 30000);
|
||||||
|
|
||||||
|
it.concurrent("application/json content type is markdownified properly", async () => {
|
||||||
|
const response = await scrape({
|
||||||
|
url: "https://jsonplaceholder.typicode.com/todos/1",
|
||||||
|
formats: ["markdown"],
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(response.markdown).toContain("```json");
|
||||||
|
}, 30000);
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ import { addScrapeJob, waitForJob } from "../../services/queue-jobs";
|
|||||||
import { getJobPriority } from "../../lib/job-priority";
|
import { getJobPriority } from "../../lib/job-priority";
|
||||||
import { getScrapeQueue } from "../../services/queue-service";
|
import { getScrapeQueue } from "../../services/queue-service";
|
||||||
import { supabaseGetJobById } from "../../lib/supabase-jobs";
|
import { supabaseGetJobById } from "../../lib/supabase-jobs";
|
||||||
|
import { calculateCreditsToBeBilled } from "../../lib/scrape-billing";
|
||||||
|
|
||||||
export async function scrapeController(
|
export async function scrapeController(
|
||||||
req: RequestWithAuth<{}, ScrapeResponse, ScrapeRequest>,
|
req: RequestWithAuth<{}, ScrapeResponse, ScrapeRequest>,
|
||||||
@@ -132,33 +133,12 @@ export async function scrapeController(
|
|||||||
0 // TODO: fix
|
0 // TODO: fix
|
||||||
: 0;
|
: 0;
|
||||||
|
|
||||||
let creditsToBeBilled = 1; // Assuming 1 credit per document
|
|
||||||
if (earlyReturn) {
|
if (earlyReturn) {
|
||||||
// Don't bill if we're early returning
|
// Don't bill if we're early returning
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if ((req.body.extract && req.body.formats?.includes("extract")) || (req.body.formats?.includes("changeTracking") && req.body.changeTrackingOptions?.modes?.includes("json"))) {
|
|
||||||
creditsToBeBilled = 5;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (req.body.agent?.model?.toLowerCase() === "fire-1" || req.body.extract?.agent?.model?.toLowerCase() === "fire-1" || req.body.jsonOptions?.agent?.model?.toLowerCase() === "fire-1") {
|
let creditsToBeBilled = await calculateCreditsToBeBilled(req.body, doc, jobId);
|
||||||
if (process.env.USE_DB_AUTHENTICATION === "true") {
|
|
||||||
// @Nick this is a hack pushed at 2AM pls help - mogery
|
|
||||||
const job = await supabaseGetJobById(jobId);
|
|
||||||
if (!job?.cost_tracking) {
|
|
||||||
logger.warn("No cost tracking found for job", {
|
|
||||||
jobId,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
creditsToBeBilled = Math.ceil((job?.cost_tracking?.totalCost ?? 1) * 1800);
|
|
||||||
} else {
|
|
||||||
creditsToBeBilled = 150;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (doc?.metadata?.proxyUsed === "stealth") {
|
|
||||||
creditsToBeBilled += 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
billTeam(req.auth.team_id, req.acuc?.sub_id, creditsToBeBilled).catch(
|
billTeam(req.auth.team_id, req.acuc?.sub_id, creditsToBeBilled).catch(
|
||||||
(error) => {
|
(error) => {
|
||||||
|
|||||||
@@ -255,7 +255,13 @@ export async function searchController(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Bill team once for all successful results
|
// Bill team once for all successful results
|
||||||
billTeam(req.auth.team_id, req.acuc?.sub_id, responseData.data.length).catch((error) => {
|
billTeam(req.auth.team_id, req.acuc?.sub_id, responseData.data.reduce((a,x) => {
|
||||||
|
if (x.metadata?.numPages !== undefined && x.metadata.numPages > 0) {
|
||||||
|
return a + x.metadata.numPages;
|
||||||
|
} else {
|
||||||
|
return a + 1;
|
||||||
|
}
|
||||||
|
}, 0)).catch((error) => {
|
||||||
logger.error(
|
logger.error(
|
||||||
`Failed to bill team ${req.auth.team_id} for ${responseData.data.length} credits: ${error}`,
|
`Failed to bill team ${req.auth.team_id} for ${responseData.data.length} credits: ${error}`,
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -750,6 +750,7 @@ export type Document = {
|
|||||||
scrapeId?: string;
|
scrapeId?: string;
|
||||||
error?: string;
|
error?: string;
|
||||||
numPages?: number;
|
numPages?: number;
|
||||||
|
contentType?: string;
|
||||||
proxyUsed: "basic" | "stealth";
|
proxyUsed: "basic" | "stealth";
|
||||||
// [key: string]: string | string[] | number | { smartScrape: number; other: number; total: number } | undefined;
|
// [key: string]: string | string[] | number | { smartScrape: number; other: number; total: number } | undefined;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -95,8 +95,13 @@ function startServer(port = DEFAULT_PORT) {
|
|||||||
logger.info(`Worker ${process.pid} listening on port ${port}`);
|
logger.info(`Worker ${process.pid} listening on port ${port}`);
|
||||||
});
|
});
|
||||||
|
|
||||||
const exitHandler = () => {
|
const exitHandler = async () => {
|
||||||
logger.info("SIGTERM signal received: closing HTTP server");
|
logger.info("SIGTERM signal received: closing HTTP server");
|
||||||
|
if (process.env.IS_KUBERNETES === "true") {
|
||||||
|
// Account for GCE load balancer drain timeout
|
||||||
|
logger.info("Waiting 60s for GCE load balancer drain timeout");
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 60000));
|
||||||
|
}
|
||||||
server.close(() => {
|
server.close(() => {
|
||||||
logger.info("Server closed.");
|
logger.info("Server closed.");
|
||||||
process.exit(0);
|
process.exit(0);
|
||||||
|
|||||||
@@ -0,0 +1,49 @@
|
|||||||
|
import { Document, ScrapeOptions } from "../controllers/v1/types";
|
||||||
|
import { supabaseGetJobById } from "./supabase-jobs";
|
||||||
|
import { logger } from "./logger";
|
||||||
|
import { CostTracking } from "./extract/extraction-service";
|
||||||
|
|
||||||
|
const creditsPerPDFPage = 1;
|
||||||
|
const stealthProxyCostBonus = 4;
|
||||||
|
|
||||||
|
export async function calculateCreditsToBeBilled(options: ScrapeOptions, document: Document, jobId: string, costTracking?: any) {
|
||||||
|
let creditsToBeBilled = 1; // Assuming 1 credit per document
|
||||||
|
if ((options.extract && options.formats?.includes("extract")) || (options.formats?.includes("changeTracking") && options.changeTrackingOptions?.modes?.includes("json"))) {
|
||||||
|
creditsToBeBilled = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (options.agent?.model?.toLowerCase() === "fire-1" || options.extract?.agent?.model?.toLowerCase() === "fire-1" || options.jsonOptions?.agent?.model?.toLowerCase() === "fire-1") {
|
||||||
|
if (process.env.USE_DB_AUTHENTICATION === "true") {
|
||||||
|
// @Nick this is a hack pushed at 2AM pls help - mogery
|
||||||
|
if (!costTracking) {
|
||||||
|
const job = await supabaseGetJobById(jobId);
|
||||||
|
costTracking = job?.cost_tracking;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!costTracking) {
|
||||||
|
logger.warn("No cost tracking found for job", {
|
||||||
|
jobId,
|
||||||
|
scrapeId: jobId
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (costTracking instanceof CostTracking) {
|
||||||
|
costTracking = costTracking.toJSON();
|
||||||
|
}
|
||||||
|
|
||||||
|
creditsToBeBilled = Math.ceil((costTracking?.totalCost ?? 1) * 1800);
|
||||||
|
} else {
|
||||||
|
creditsToBeBilled = 150;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (document.metadata.numPages !== undefined && document.metadata.numPages > 1) {
|
||||||
|
creditsToBeBilled += creditsPerPDFPage * (document.metadata.numPages - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (document?.metadata?.proxyUsed === "stealth") {
|
||||||
|
creditsToBeBilled += stealthProxyCostBonus;
|
||||||
|
}
|
||||||
|
|
||||||
|
return creditsToBeBilled;
|
||||||
|
}
|
||||||
@@ -30,7 +30,7 @@ export async function scrapeURLWithFetch(
|
|||||||
url: string;
|
url: string;
|
||||||
body: string,
|
body: string,
|
||||||
status: number;
|
status: number;
|
||||||
headers: any;
|
headers: [string, string][];
|
||||||
};
|
};
|
||||||
|
|
||||||
if (meta.mock !== null) {
|
if (meta.mock !== null) {
|
||||||
@@ -117,5 +117,8 @@ export async function scrapeURLWithFetch(
|
|||||||
url: response.url,
|
url: response.url,
|
||||||
html: response.body,
|
html: response.body,
|
||||||
statusCode: response.status,
|
statusCode: response.status,
|
||||||
|
contentType: (response.headers.find(
|
||||||
|
(x) => x[0].toLowerCase() === "content-type",
|
||||||
|
) ?? [])[1] ?? undefined,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -273,6 +273,10 @@ export async function scrapeURLWithFireEngineChromeCDP(
|
|||||||
error: response.pageError,
|
error: response.pageError,
|
||||||
statusCode: response.pageStatusCode,
|
statusCode: response.pageStatusCode,
|
||||||
|
|
||||||
|
contentType: (Object.entries(response.responseHeaders ?? {}).find(
|
||||||
|
(x) => x[0].toLowerCase() === "content-type",
|
||||||
|
) ?? [])[1] ?? undefined,
|
||||||
|
|
||||||
screenshot: response.screenshot,
|
screenshot: response.screenshot,
|
||||||
...(actions.length > 0
|
...(actions.length > 0
|
||||||
? {
|
? {
|
||||||
@@ -336,6 +340,10 @@ export async function scrapeURLWithFireEnginePlaywright(
|
|||||||
error: response.pageError,
|
error: response.pageError,
|
||||||
statusCode: response.pageStatusCode,
|
statusCode: response.pageStatusCode,
|
||||||
|
|
||||||
|
contentType: (Object.entries(response.responseHeaders ?? {}).find(
|
||||||
|
(x) => x[0].toLowerCase() === "content-type",
|
||||||
|
) ?? [])[1] ?? undefined,
|
||||||
|
|
||||||
...(response.screenshots !== undefined && response.screenshots.length > 0
|
...(response.screenshots !== undefined && response.screenshots.length > 0
|
||||||
? {
|
? {
|
||||||
screenshot: response.screenshots[0],
|
screenshot: response.screenshots[0],
|
||||||
@@ -391,5 +399,9 @@ export async function scrapeURLWithFireEngineTLSClient(
|
|||||||
html: response.content,
|
html: response.content,
|
||||||
error: response.pageError,
|
error: response.pageError,
|
||||||
statusCode: response.pageStatusCode,
|
statusCode: response.pageStatusCode,
|
||||||
|
|
||||||
|
contentType: (Object.entries(response.responseHeaders ?? {}).find(
|
||||||
|
(x) => x[0].toLowerCase() === "content-type",
|
||||||
|
) ?? [])[1] ?? undefined,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -111,6 +111,8 @@ export type EngineScrapeResult = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
numPages?: number;
|
numPages?: number;
|
||||||
|
|
||||||
|
contentType?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
const engineHandlers: {
|
const engineHandlers: {
|
||||||
|
|||||||
@@ -379,6 +379,7 @@ async function scrapeURLLoop(meta: Meta): Promise<ScrapeUrlResponse> {
|
|||||||
statusCode: result.result.statusCode,
|
statusCode: result.result.statusCode,
|
||||||
error: result.result.error,
|
error: result.result.error,
|
||||||
numPages: result.result.numPages,
|
numPages: result.result.numPages,
|
||||||
|
contentType: result.result.contentType,
|
||||||
proxyUsed: meta.featureFlags.has("stealthProxy") ? "stealth" : "basic",
|
proxyUsed: meta.featureFlags.has("stealthProxy") ? "stealth" : "basic",
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -61,6 +61,17 @@ export async function deriveMarkdownFromHTML(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (document.metadata.contentType?.includes("application/json")) {
|
||||||
|
if (document.rawHtml === undefined) {
|
||||||
|
throw new Error(
|
||||||
|
"rawHtml is undefined -- this transformer is being called out of order",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
document.markdown = "```json\n" + document.rawHtml + "\n```";
|
||||||
|
return document;
|
||||||
|
}
|
||||||
|
|
||||||
document.markdown = await parseMarkdown(document.html);
|
document.markdown = await parseMarkdown(document.html);
|
||||||
return document;
|
return document;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,58 @@ import { logger } from "../lib/logger";
|
|||||||
|
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
|
|
||||||
|
|
||||||
|
export async function fire_engine_search(
|
||||||
|
q: string,
|
||||||
|
options: {
|
||||||
|
tbs?: string;
|
||||||
|
filter?: string;
|
||||||
|
lang?: string;
|
||||||
|
country?: string;
|
||||||
|
location?: string;
|
||||||
|
numResults: number;
|
||||||
|
page?: number;
|
||||||
|
},
|
||||||
|
abort?: AbortSignal,
|
||||||
|
): Promise<SearchResult[]> {
|
||||||
|
try {
|
||||||
|
let data = JSON.stringify({
|
||||||
|
query: q,
|
||||||
|
lang: options.lang,
|
||||||
|
country: options.country,
|
||||||
|
location: options.location,
|
||||||
|
tbs: options.tbs,
|
||||||
|
numResults: options.numResults,
|
||||||
|
page: options.page ?? 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!process.env.FIRE_ENGINE_BETA_URL) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await fetch(`${process.env.FIRE_ENGINE_BETA_URL}/search`, {
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"X-Disable-Cache": "true",
|
||||||
|
},
|
||||||
|
body: data,
|
||||||
|
signal: abort,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (response.ok) {
|
||||||
|
const responseData = await response.json();
|
||||||
|
return responseData;
|
||||||
|
} else {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(error);
|
||||||
|
Sentry.captureException(error);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export async function fireEngineMap(
|
export async function fireEngineMap(
|
||||||
q: string,
|
q: string,
|
||||||
options: {
|
options: {
|
||||||
@@ -34,7 +86,7 @@ export async function fireEngineMap(
|
|||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
const response = await fetch(`${process.env.FIRE_ENGINE_BETA_URL}/search`, {
|
const response = await fetch(`${process.env.FIRE_ENGINE_BETA_URL}/map`, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: {
|
headers: {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import { googleSearch } from "./googlesearch";
|
|||||||
import { searchapi_search } from "./searchapi";
|
import { searchapi_search } from "./searchapi";
|
||||||
import { serper_search } from "./serper";
|
import { serper_search } from "./serper";
|
||||||
import { searxng_search } from "./searxng";
|
import { searxng_search } from "./searxng";
|
||||||
|
import { fire_engine_search } from "./fireEngine";
|
||||||
|
|
||||||
export async function search({
|
export async function search({
|
||||||
query,
|
query,
|
||||||
@@ -31,8 +32,19 @@ export async function search({
|
|||||||
timeout?: number;
|
timeout?: number;
|
||||||
}): Promise<SearchResult[]> {
|
}): Promise<SearchResult[]> {
|
||||||
try {
|
try {
|
||||||
|
if (process.env.FIRE_ENGINE_BETA_URL) {
|
||||||
|
const results = await fire_engine_search(query, {
|
||||||
|
numResults: num_results,
|
||||||
|
tbs,
|
||||||
|
filter,
|
||||||
|
lang,
|
||||||
|
country,
|
||||||
|
location,
|
||||||
|
});
|
||||||
|
if (results.length > 0) return results;
|
||||||
|
}
|
||||||
if (process.env.SERPER_API_KEY) {
|
if (process.env.SERPER_API_KEY) {
|
||||||
return await serper_search(query, {
|
const results = await serper_search(query, {
|
||||||
num_results,
|
num_results,
|
||||||
tbs,
|
tbs,
|
||||||
filter,
|
filter,
|
||||||
@@ -40,9 +52,10 @@ export async function search({
|
|||||||
country,
|
country,
|
||||||
location,
|
location,
|
||||||
});
|
});
|
||||||
|
if (results.length > 0) return results;
|
||||||
}
|
}
|
||||||
if (process.env.SEARCHAPI_API_KEY) {
|
if (process.env.SEARCHAPI_API_KEY) {
|
||||||
return await searchapi_search(query, {
|
const results = await searchapi_search(query, {
|
||||||
num_results,
|
num_results,
|
||||||
tbs,
|
tbs,
|
||||||
filter,
|
filter,
|
||||||
@@ -50,9 +63,10 @@ export async function search({
|
|||||||
country,
|
country,
|
||||||
location,
|
location,
|
||||||
});
|
});
|
||||||
|
if (results.length > 0) return results;
|
||||||
}
|
}
|
||||||
if (process.env.SEARXNG_ENDPOINT) {
|
if (process.env.SEARXNG_ENDPOINT) {
|
||||||
return await searxng_search(query, {
|
const results = await searxng_search(query, {
|
||||||
num_results,
|
num_results,
|
||||||
tbs,
|
tbs,
|
||||||
filter,
|
filter,
|
||||||
@@ -60,6 +74,7 @@ export async function search({
|
|||||||
country,
|
country,
|
||||||
location,
|
location,
|
||||||
});
|
});
|
||||||
|
if (results.length > 0) return results;
|
||||||
}
|
}
|
||||||
return await googleSearch(
|
return await googleSearch(
|
||||||
query,
|
query,
|
||||||
|
|||||||
@@ -66,10 +66,10 @@ export function getIndexQueue() {
|
|||||||
connection: redisConnection,
|
connection: redisConnection,
|
||||||
defaultJobOptions: {
|
defaultJobOptions: {
|
||||||
removeOnComplete: {
|
removeOnComplete: {
|
||||||
age: 90000, // 25 hours
|
age: 3600, // 1 hour
|
||||||
},
|
},
|
||||||
removeOnFail: {
|
removeOnFail: {
|
||||||
age: 90000, // 25 hours
|
age: 3600, // 1 hour
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
@@ -120,10 +120,10 @@ export function getBillingQueue() {
|
|||||||
connection: redisConnection,
|
connection: redisConnection,
|
||||||
defaultJobOptions: {
|
defaultJobOptions: {
|
||||||
removeOnComplete: {
|
removeOnComplete: {
|
||||||
age: 90000, // 25 hours
|
age: 3600, // 1 hour
|
||||||
},
|
},
|
||||||
removeOnFail: {
|
removeOnFail: {
|
||||||
age: 90000, // 25 hours
|
age: 3600, // 1 hour
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -85,6 +85,7 @@ import https from "https";
|
|||||||
import { cacheableLookup } from "../scraper/scrapeURL/lib/cacheableLookup";
|
import { cacheableLookup } from "../scraper/scrapeURL/lib/cacheableLookup";
|
||||||
import { robustFetch } from "../scraper/scrapeURL/lib/fetch";
|
import { robustFetch } from "../scraper/scrapeURL/lib/fetch";
|
||||||
import { RateLimiterMode } from "../types";
|
import { RateLimiterMode } from "../types";
|
||||||
|
import { calculateCreditsToBeBilled } from "../lib/scrape-billing";
|
||||||
import { redisEvictConnection } from "./redis";
|
import { redisEvictConnection } from "./redis";
|
||||||
|
|
||||||
configDotenv();
|
configDotenv();
|
||||||
@@ -1384,22 +1385,7 @@ async function processJob(job: Job & { id: string }, token: string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (job.data.is_scrape !== true) {
|
if (job.data.is_scrape !== true) {
|
||||||
let creditsToBeBilled = 1; // Assuming 1 credit per document
|
let creditsToBeBilled = await calculateCreditsToBeBilled(job.data.scrapeOptions, doc, job.id, costTracking);
|
||||||
if ((job.data.scrapeOptions.extract && job.data.scrapeOptions.formats?.includes("extract")) || (job.data.scrapeOptions.formats?.includes("changeTracking") && job.data.scrapeOptions.changeTrackingOptions?.modes?.includes("json"))) {
|
|
||||||
creditsToBeBilled = 5;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (job.data.scrapeOptions.agent?.model?.toLowerCase() === "fire-1" || job.data.scrapeOptions.extract?.agent?.model?.toLowerCase() === "fire-1" || job.data.scrapeOptions.jsonOptions?.agent?.model?.toLowerCase() === "fire-1") {
|
|
||||||
if (process.env.USE_DB_AUTHENTICATION === "true") {
|
|
||||||
creditsToBeBilled = Math.ceil((costTracking.toJSON().totalCost ?? 1) * 1800);
|
|
||||||
} else {
|
|
||||||
creditsToBeBilled = 150;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (doc.metadata?.proxyUsed === "stealth") {
|
|
||||||
creditsToBeBilled += 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (
|
if (
|
||||||
job.data.team_id !== process.env.BACKGROUND_INDEX_TEAM_ID! &&
|
job.data.team_id !== process.env.BACKGROUND_INDEX_TEAM_ID! &&
|
||||||
|
|||||||
Generated
+1
-1
@@ -680,7 +680,7 @@ checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "firecrawl"
|
name = "firecrawl"
|
||||||
version = "1.1.0"
|
version = "1.2.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"assert_matches",
|
"assert_matches",
|
||||||
"axum",
|
"axum",
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "firecrawl"
|
name = "firecrawl"
|
||||||
author= "Mendable.ai"
|
author= "Mendable.ai"
|
||||||
version = "1.1.0"
|
version = "1.2.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
homepage = "https://www.firecrawl.dev/"
|
homepage = "https://www.firecrawl.dev/"
|
||||||
|
|||||||
@@ -99,6 +99,49 @@ impl From<CrawlScrapeOptions> for ScrapeOptions {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Options for webhook notifications
|
||||||
|
#[serde_with::skip_serializing_none]
|
||||||
|
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct WebhookOptions {
|
||||||
|
/// URL to send webhook notifications to
|
||||||
|
pub url: String,
|
||||||
|
|
||||||
|
/// Custom headers to include in webhook requests
|
||||||
|
pub headers: Option<HashMap<String, String>>,
|
||||||
|
|
||||||
|
/// Custom data included in all webhook payloads
|
||||||
|
pub metadata: Option<HashMap<String, String>>,
|
||||||
|
|
||||||
|
/// Event types to receive
|
||||||
|
pub events: Option<Vec<WebhookEvent>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<String> for WebhookOptions {
|
||||||
|
fn from(value: String) -> Self {
|
||||||
|
Self {
|
||||||
|
url: value,
|
||||||
|
..Default::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Serialize, Debug, PartialEq, Eq, Clone, Copy)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub enum WebhookEvent {
|
||||||
|
/// Crawl finished successfully
|
||||||
|
Completed,
|
||||||
|
|
||||||
|
/// Crawl encountered an error
|
||||||
|
Failed,
|
||||||
|
|
||||||
|
/// Individual page scraped
|
||||||
|
Page,
|
||||||
|
|
||||||
|
/// Crawl job initiated
|
||||||
|
Started,
|
||||||
|
}
|
||||||
|
|
||||||
#[serde_with::skip_serializing_none]
|
#[serde_with::skip_serializing_none]
|
||||||
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
|
#[derive(Deserialize, Serialize, Debug, Default, Clone)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
@@ -132,7 +175,7 @@ pub struct CrawlOptions {
|
|||||||
pub allow_external_links: Option<bool>,
|
pub allow_external_links: Option<bool>,
|
||||||
|
|
||||||
/// URL to send Webhook crawl events to.
|
/// URL to send Webhook crawl events to.
|
||||||
pub webhook: Option<String>,
|
pub webhook: Option<WebhookOptions>,
|
||||||
|
|
||||||
/// Idempotency key to send to the crawl endpoint.
|
/// Idempotency key to send to the crawl endpoint.
|
||||||
#[serde(skip)]
|
#[serde(skip)]
|
||||||
|
|||||||
Reference in New Issue
Block a user