Files
firecrawl/apps/api/src/scraper/scrapeURL/lib/fetch.ts
T

214 lines
4.5 KiB
TypeScript
Raw Normal View History

2024-11-07 20:57:33 +01:00
import { Logger } from "winston";
import { z, ZodError } from "zod";
import { v4 as uuid } from "uuid";
import * as Sentry from "@sentry/node";
export type RobustFetchParams<Schema extends z.Schema<any>> = {
2024-12-11 19:46:11 -03:00
url: string;
logger: Logger;
method: "GET" | "POST" | "DELETE" | "PUT";
body?: any;
headers?: Record<string, string>;
schema?: Schema;
dontParseResponse?: boolean;
ignoreResponse?: boolean;
ignoreFailure?: boolean;
requestId?: string;
tryCount?: number;
tryCooldown?: number;
2024-11-07 20:57:33 +01:00
};
2024-12-11 19:46:11 -03:00
export async function robustFetch<
Schema extends z.Schema<any>,
2024-12-11 19:51:08 -03:00
Output = z.infer<Schema>,
2024-12-11 19:46:11 -03:00
>({
url,
logger,
method = "GET",
body,
headers,
schema,
ignoreResponse = false,
ignoreFailure = false,
requestId = uuid(),
tryCount = 1,
2024-12-11 19:51:08 -03:00
tryCooldown,
2024-12-11 19:46:11 -03:00
}: RobustFetchParams<Schema>): Promise<Output> {
const params = {
2024-11-07 20:57:33 +01:00
url,
logger,
2024-12-11 19:46:11 -03:00
method,
2024-11-07 20:57:33 +01:00
body,
headers,
schema,
2024-12-11 19:46:11 -03:00
ignoreResponse,
ignoreFailure,
tryCount,
2024-12-11 19:51:08 -03:00
tryCooldown,
2024-12-11 19:46:11 -03:00
};
2024-11-07 20:57:33 +01:00
2024-12-11 19:46:11 -03:00
let request: Response;
try {
request = await fetch(url, {
method,
headers: {
...(body instanceof FormData
? {}
: body !== undefined
? {
2024-12-11 19:51:08 -03:00
"Content-Type": "application/json",
2024-12-11 19:46:11 -03:00
}
: {}),
2024-12-11 19:51:08 -03:00
...(headers !== undefined ? headers : {}),
2024-12-11 19:46:11 -03:00
},
...(body instanceof FormData
? {
2024-12-11 19:51:08 -03:00
body,
2024-12-11 19:46:11 -03:00
}
: body !== undefined
? {
2024-12-11 19:51:08 -03:00
body: JSON.stringify(body),
2024-11-07 20:57:33 +01:00
}
2024-12-11 19:51:08 -03:00
: {}),
2024-12-11 19:46:11 -03:00
});
} catch (error) {
if (!ignoreFailure) {
Sentry.captureException(error);
if (tryCount > 1) {
logger.debug(
"Request failed, trying " + (tryCount - 1) + " more times",
2024-12-11 19:51:08 -03:00
{ params, error, requestId },
2024-12-11 19:46:11 -03:00
);
return await robustFetch({
...params,
requestId,
2024-12-11 19:51:08 -03:00
tryCount: tryCount - 1,
2024-12-11 19:46:11 -03:00
});
} else {
logger.debug("Request failed", { params, error, requestId });
throw new Error("Request failed", {
cause: {
params,
requestId,
2024-12-11 19:51:08 -03:00
error,
},
2024-12-11 19:46:11 -03:00
});
}
} else {
return null as Output;
2024-11-07 20:57:33 +01:00
}
2024-12-11 19:46:11 -03:00
}
2024-11-07 20:57:33 +01:00
2024-12-11 19:46:11 -03:00
if (ignoreResponse === true) {
return null as Output;
}
2024-11-07 20:57:33 +01:00
2024-12-11 19:46:11 -03:00
const response = {
status: request.status,
headers: request.headers,
2024-12-11 19:51:08 -03:00
body: await request.text(), // NOTE: can this throw an exception?
2024-12-11 19:46:11 -03:00
};
2024-11-07 20:57:33 +01:00
2024-12-11 19:46:11 -03:00
if (request.status >= 300) {
if (tryCount > 1) {
logger.debug(
"Request sent failure status, trying " + (tryCount - 1) + " more times",
2024-12-11 19:51:08 -03:00
{ params, request, response, requestId },
2024-12-11 19:46:11 -03:00
);
if (tryCooldown !== undefined) {
await new Promise((resolve) =>
2024-12-11 19:51:08 -03:00
setTimeout(() => resolve(null), tryCooldown),
2024-12-11 19:46:11 -03:00
);
}
return await robustFetch({
...params,
requestId,
2024-12-11 19:51:08 -03:00
tryCount: tryCount - 1,
2024-12-11 19:46:11 -03:00
});
} else {
logger.debug("Request sent failure status", {
params,
request,
response,
2024-12-11 19:51:08 -03:00
requestId,
2024-12-11 19:46:11 -03:00
});
throw new Error("Request sent failure status", {
cause: {
params,
request,
response,
2024-12-11 19:51:08 -03:00
requestId,
},
2024-12-11 19:46:11 -03:00
});
2024-11-07 20:57:33 +01:00
}
2024-12-11 19:46:11 -03:00
}
let data: Output;
try {
data = JSON.parse(response.body);
} catch (error) {
logger.debug("Request sent malformed JSON", {
params,
request,
response,
2024-12-11 19:51:08 -03:00
requestId,
2024-12-11 19:46:11 -03:00
});
throw new Error("Request sent malformed JSON", {
cause: {
params,
request,
response,
2024-12-11 19:51:08 -03:00
requestId,
},
2024-12-11 19:46:11 -03:00
});
}
2024-11-07 20:57:33 +01:00
2024-12-11 19:46:11 -03:00
if (schema) {
2024-11-07 20:57:33 +01:00
try {
2024-12-11 19:46:11 -03:00
data = schema.parse(data);
2024-11-07 20:57:33 +01:00
} catch (error) {
2024-12-11 19:46:11 -03:00
if (error instanceof ZodError) {
logger.debug("Response does not match provided schema", {
params,
request,
response,
requestId,
error,
2024-12-11 19:51:08 -03:00
schema,
2024-11-07 20:57:33 +01:00
});
2024-12-11 19:46:11 -03:00
throw new Error("Response does not match provided schema", {
cause: {
params,
request,
response,
requestId,
error,
2024-12-11 19:51:08 -03:00
schema,
},
2024-12-11 19:46:11 -03:00
});
} else {
logger.debug("Parsing response with provided schema failed", {
params,
request,
response,
requestId,
error,
2024-12-11 19:51:08 -03:00
schema,
2024-12-11 19:46:11 -03:00
});
throw new Error("Parsing response with provided schema failed", {
cause: {
params,
request,
response,
requestId,
error,
2024-12-11 19:51:08 -03:00
schema,
},
2024-12-11 19:46:11 -03:00
});
}
2024-11-07 20:57:33 +01:00
}
2024-12-11 19:46:11 -03:00
}
2024-11-07 20:57:33 +01:00
2024-12-11 19:46:11 -03:00
return data;
}