Files
firecrawl/apps/api/src/scraper/scrapeURL/lib/fetch.ts
T
Gergő Móricz 2200f084f3 SELFHOST FIXES (#1207)
* fix(extract): construct OpenAI on demand

Fixes hard-crash if api key not specified in a self-hosting environment.

* fix(ci): try sleeping

* fix(ci): override host

* fix(ci): wait for server to start

* Support /extract and /crawl for self-hosted (FIR-1097) (#1137)

* Support /extract for self-hosted

This returns the job response from redis rather than supabase when db auth is disabled (self hosted mode)

* Use getJob for extract and use correct types

* fix(v1/crawl-status): only poll DB for total count if DB is enabled

* feat(snips): TEST_SUITE_SELF_HOSTED

* fix(ci/test-server-self-host): use pr trigger

* fix(scrapeURL): f-e mocking in selfhosted env

* fix(snips): do not try to eval json format on selfhost

* fix(scrapeURL): further f-e mocking

* fix(snips): don't timeout on hard fail polling

* fix(v1/extract-status): fix-up the db-agnostic impl

unfortunately had to separate the functions since the schema
was too divergent :(

* fix(snips): boost screenshot delay

* feat(ci): test with openai

* feat(ci): extract, search testing

* fix(ci): matrix

* fix(ci): bleh

* Update: fix default google search (#1174)

* fix log title

* search should always work

* asd

* fix ci

---------

Co-authored-by: Nick Roth <nlr06886@gmail.com>
Co-authored-by: William <sdustusun@gmail.com>
2025-02-20 00:41:22 +01:00

268 lines
6.0 KiB
TypeScript

import { Logger } from "winston";
import { z, ZodError } from "zod";
import * as Sentry from "@sentry/node";
import { MockState, saveMock } from "./mock";
import { fireEngineURL } from "../engines/fire-engine/scrape";
export type RobustFetchParams<Schema extends z.Schema<any>> = {
url: string;
logger: Logger;
method: "GET" | "POST" | "DELETE" | "PUT";
body?: any;
headers?: Record<string, string>;
schema?: Schema;
dontParseResponse?: boolean;
ignoreResponse?: boolean;
ignoreFailure?: boolean;
requestId?: string;
tryCount?: number;
tryCooldown?: number;
mock: MockState | null;
};
export async function robustFetch<
Schema extends z.Schema<any>,
Output = z.infer<Schema>,
>({
url,
logger,
method = "GET",
body,
headers,
schema,
ignoreResponse = false,
ignoreFailure = false,
requestId = crypto.randomUUID(),
tryCount = 1,
tryCooldown,
mock,
}: RobustFetchParams<Schema>): Promise<Output> {
const params = {
url,
logger,
method,
body,
headers,
schema,
ignoreResponse,
ignoreFailure,
tryCount,
tryCooldown,
};
let response: {
status: number;
headers: Headers;
body: string;
};
if (mock === null) {
let request: Response;
try {
request = await fetch(url, {
method,
headers: {
...(body instanceof FormData
? {}
: body !== undefined
? {
"Content-Type": "application/json",
}
: {}),
...(headers !== undefined ? headers : {}),
},
...(body instanceof FormData
? {
body,
}
: body !== undefined
? {
body: JSON.stringify(body),
}
: {}),
});
} catch (error) {
if (!ignoreFailure) {
Sentry.captureException(error);
if (tryCount > 1) {
logger.debug(
"Request failed, trying " + (tryCount - 1) + " more times",
{ params, error, requestId },
);
return await robustFetch({
...params,
requestId,
tryCount: tryCount - 1,
mock,
});
} else {
logger.debug("Request failed", { params, error, requestId });
throw new Error("Request failed", {
cause: {
params,
requestId,
error,
},
});
}
} else {
return null as Output;
}
}
if (ignoreResponse === true) {
return null as Output;
}
response = {
status: request.status,
headers: request.headers,
body: await request.text(), // NOTE: can this throw an exception?
};
} else {
if (ignoreResponse === true) {
return null as Output;
}
const makeRequestTypeId = (
request: (typeof mock)["requests"][number]["options"],
) => {
let trueUrl = request.url.startsWith(fireEngineURL)
? request.url.replace(fireEngineURL, "<fire-engine>")
: request.url;
let out = trueUrl + ";" + request.method;
if (
trueUrl.startsWith("<fire-engine>") &&
request.method === "POST"
) {
out += "f-e;" + request.body?.engine + ";" + request.body?.url;
}
return out;
};
const thisId = makeRequestTypeId(params);
const matchingMocks = mock.requests
.filter((x) => makeRequestTypeId(x.options) === thisId)
.sort((a, b) => a.time - b.time);
const nextI = mock.tracker[thisId] ?? 0;
mock.tracker[thisId] = nextI + 1;
if (!matchingMocks[nextI]) {
throw new Error("Failed to mock request -- no mock targets found.");
}
response = {
...matchingMocks[nextI].result,
headers: new Headers(matchingMocks[nextI].result.headers),
};
}
if (response.status >= 300) {
if (tryCount > 1) {
logger.debug(
"Request sent failure status, trying " + (tryCount - 1) + " more times",
{ params, response, requestId },
);
if (tryCooldown !== undefined) {
await new Promise((resolve) =>
setTimeout(() => resolve(null), tryCooldown),
);
}
return await robustFetch({
...params,
requestId,
tryCount: tryCount - 1,
mock,
});
} else {
logger.debug("Request sent failure status", {
params,
response,
requestId,
});
throw new Error("Request sent failure status", {
cause: {
params,
response,
requestId,
},
});
}
}
if (mock === null) {
await saveMock(
{
...params,
logger: undefined,
schema: undefined,
headers: undefined,
},
response,
);
}
let data: Output;
try {
data = JSON.parse(response.body);
} catch (error) {
logger.debug("Request sent malformed JSON", {
params,
response,
requestId,
});
throw new Error("Request sent malformed JSON", {
cause: {
params,
response,
requestId,
},
});
}
if (schema) {
try {
data = schema.parse(data);
} catch (error) {
if (error instanceof ZodError) {
logger.debug("Response does not match provided schema", {
params,
response,
requestId,
error,
schema,
});
throw new Error("Response does not match provided schema", {
cause: {
params,
response,
requestId,
error,
schema,
},
});
} else {
logger.debug("Parsing response with provided schema failed", {
params,
response,
requestId,
error,
schema,
});
throw new Error("Parsing response with provided schema failed", {
cause: {
params,
response,
requestId,
error,
schema,
},
});
}
}
}
return data;
}