Nick: fixed prettier
This commit is contained in:
@@ -7,60 +7,82 @@ import { EngineError } from "../../error";
|
||||
|
||||
const client = new ScrapingBeeClient(process.env.SCRAPING_BEE_API_KEY!);
|
||||
|
||||
export function scrapeURLWithScrapingBee(wait_browser: "domcontentloaded" | "networkidle2"): ((meta: Meta) => Promise<EngineScrapeResult>) {
|
||||
return async (meta: Meta): Promise<EngineScrapeResult> => {
|
||||
let response: AxiosResponse<any>;
|
||||
try {
|
||||
response = await client.get({
|
||||
url: meta.url,
|
||||
params: {
|
||||
timeout: 15000, // TODO: dynamic timeout based on request timeout
|
||||
wait_browser: wait_browser,
|
||||
wait: Math.min(meta.options.waitFor, 35000),
|
||||
transparent_status_code: true,
|
||||
json_response: true,
|
||||
screenshot: meta.options.formats.includes("screenshot"),
|
||||
screenshot_full_page: meta.options.formats.includes("screenshot@fullPage"),
|
||||
},
|
||||
headers: {
|
||||
"ScrapingService-Request": "TRUE", // this is sent to the page, not to ScrapingBee - mogery
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
if (error instanceof AxiosError && error.response !== undefined) {
|
||||
response = error.response;
|
||||
} else {
|
||||
throw error;
|
||||
}
|
||||
export function scrapeURLWithScrapingBee(
|
||||
wait_browser: "domcontentloaded" | "networkidle2"
|
||||
): (meta: Meta) => Promise<EngineScrapeResult> {
|
||||
return async (meta: Meta): Promise<EngineScrapeResult> => {
|
||||
let response: AxiosResponse<any>;
|
||||
try {
|
||||
response = await client.get({
|
||||
url: meta.url,
|
||||
params: {
|
||||
timeout: 15000, // TODO: dynamic timeout based on request timeout
|
||||
wait_browser: wait_browser,
|
||||
wait: Math.min(meta.options.waitFor, 35000),
|
||||
transparent_status_code: true,
|
||||
json_response: true,
|
||||
screenshot: meta.options.formats.includes("screenshot"),
|
||||
screenshot_full_page: meta.options.formats.includes(
|
||||
"screenshot@fullPage"
|
||||
)
|
||||
},
|
||||
headers: {
|
||||
"ScrapingService-Request": "TRUE" // this is sent to the page, not to ScrapingBee - mogery
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
if (error instanceof AxiosError && error.response !== undefined) {
|
||||
response = error.response;
|
||||
} else {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
const data: Buffer = response.data;
|
||||
const body = JSON.parse(new TextDecoder().decode(data));
|
||||
const data: Buffer = response.data;
|
||||
const body = JSON.parse(new TextDecoder().decode(data));
|
||||
|
||||
const headers = body.headers ?? {};
|
||||
const isHiddenEngineError = !(headers["Date"] ?? headers["date"] ?? headers["Content-Type"] ?? headers["content-type"]);
|
||||
const headers = body.headers ?? {};
|
||||
const isHiddenEngineError = !(
|
||||
headers["Date"] ??
|
||||
headers["date"] ??
|
||||
headers["Content-Type"] ??
|
||||
headers["content-type"]
|
||||
);
|
||||
|
||||
if (body.errors || body.body?.error || isHiddenEngineError) {
|
||||
meta.logger.error("ScrapingBee threw an error", { body: body.body?.error ?? body.errors ?? body.body ?? body });
|
||||
throw new EngineError("Engine error #34", { cause: { body, statusCode: response.status } });
|
||||
}
|
||||
if (body.errors || body.body?.error || isHiddenEngineError) {
|
||||
meta.logger.error("ScrapingBee threw an error", {
|
||||
body: body.body?.error ?? body.errors ?? body.body ?? body
|
||||
});
|
||||
throw new EngineError("Engine error #34", {
|
||||
cause: { body, statusCode: response.status }
|
||||
});
|
||||
}
|
||||
|
||||
if (typeof body.body !== "string") {
|
||||
meta.logger.error("ScrapingBee: Body is not string??", { body });
|
||||
throw new EngineError("Engine error #35", { cause: { body, statusCode: response.status } });
|
||||
}
|
||||
if (typeof body.body !== "string") {
|
||||
meta.logger.error("ScrapingBee: Body is not string??", { body });
|
||||
throw new EngineError("Engine error #35", {
|
||||
cause: { body, statusCode: response.status }
|
||||
});
|
||||
}
|
||||
|
||||
specialtyScrapeCheck(meta.logger.child({ method: "scrapeURLWithScrapingBee/specialtyScrapeCheck" }), body.headers);
|
||||
specialtyScrapeCheck(
|
||||
meta.logger.child({
|
||||
method: "scrapeURLWithScrapingBee/specialtyScrapeCheck"
|
||||
}),
|
||||
body.headers
|
||||
);
|
||||
|
||||
return {
|
||||
url: body["resolved-url"] ?? meta.url,
|
||||
return {
|
||||
url: body["resolved-url"] ?? meta.url,
|
||||
|
||||
html: body.body,
|
||||
error: response.status >= 300 ? response.statusText : undefined,
|
||||
statusCode: response.status,
|
||||
...(body.screenshot ? ({
|
||||
screenshot: `data:image/png;base64,${body.screenshot}`,
|
||||
}) : {}),
|
||||
};
|
||||
html: body.body,
|
||||
error: response.status >= 300 ? response.statusText : undefined,
|
||||
statusCode: response.status,
|
||||
...(body.screenshot
|
||||
? {
|
||||
screenshot: `data:image/png;base64,${body.screenshot}`
|
||||
}
|
||||
: {})
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user