fix(scrapeURL/sb): enforce timeout (FIR-980) (#1183)

* fix(scrapeURL/scrapingbee): enforce timeout

* fix(scrapeURL/sb): types

* fix the test

* fixup: remove nix files
This commit is contained in:
Gergő Móricz
2025-02-16 15:55:03 +01:00
committed by GitHub
parent fd8b38902a
commit 1491b5b141
2 changed files with 22 additions and 19 deletions
+1 -1
View File
@@ -34,7 +34,7 @@ describe("Scrape tests", () => {
expect(response.body.data.markdown).toBe( expect(response.body.data.markdown).toBe(
"this is fake data coming from the mocking system!", "this is fake data coming from the mocking system!",
); );
}); }, 10000);
describe("Ad blocking (f-e dependant)", () => { describe("Ad blocking (f-e dependant)", () => {
it.concurrent("blocks ads by default", async () => { it.concurrent("blocks ads by default", async () => {
@@ -3,7 +3,7 @@ import { Meta } from "../..";
import { EngineScrapeResult } from ".."; import { EngineScrapeResult } from "..";
import { specialtyScrapeCheck } from "../utils/specialtyHandler"; import { specialtyScrapeCheck } from "../utils/specialtyHandler";
import { AxiosError, type AxiosResponse } from "axios"; import { AxiosError, type AxiosResponse } from "axios";
import { EngineError } from "../../error"; import { EngineError, TimeoutError } from "../../error";
const client = new ScrapingBeeClient(process.env.SCRAPING_BEE_API_KEY!); const client = new ScrapingBeeClient(process.env.SCRAPING_BEE_API_KEY!);
@@ -17,7 +17,8 @@ export function scrapeURLWithScrapingBee(
let response: AxiosResponse<any>; let response: AxiosResponse<any>;
const timeout = (timeToRun ?? 300000) + meta.options.waitFor; const timeout = (timeToRun ?? 300000) + meta.options.waitFor;
try { try {
response = await client.get({ response = await Promise.race<AxiosResponse<any>>([
client.get({
url: meta.url, url: meta.url,
params: { params: {
timeout, timeout,
@@ -33,7 +34,9 @@ export function scrapeURLWithScrapingBee(
headers: { headers: {
"ScrapingService-Request": "TRUE", // this is sent to the page, not to ScrapingBee - mogery "ScrapingService-Request": "TRUE", // this is sent to the page, not to ScrapingBee - mogery
}, },
}); }),
new Promise((_, reject) => setTimeout(() => reject(new TimeoutError("ScrapingBee timed out")), timeout + 5000)),
]);
} catch (error) { } catch (error) {
if (error instanceof AxiosError && error.response !== undefined) { if (error instanceof AxiosError && error.response !== undefined) {
response = error.response; response = error.response;