diff --git a/apps/api/src/__tests__/snips/scrape.test.ts b/apps/api/src/__tests__/snips/scrape.test.ts index 0c53edb8..41326fac 100644 --- a/apps/api/src/__tests__/snips/scrape.test.ts +++ b/apps/api/src/__tests__/snips/scrape.test.ts @@ -34,7 +34,7 @@ describe("Scrape tests", () => { expect(response.body.data.markdown).toBe( "this is fake data coming from the mocking system!", ); - }); + }, 10000); describe("Ad blocking (f-e dependant)", () => { it.concurrent("blocks ads by default", async () => { diff --git a/apps/api/src/scraper/scrapeURL/engines/scrapingbee/index.ts b/apps/api/src/scraper/scrapeURL/engines/scrapingbee/index.ts index 38c43878..39805a2f 100644 --- a/apps/api/src/scraper/scrapeURL/engines/scrapingbee/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/scrapingbee/index.ts @@ -3,7 +3,7 @@ import { Meta } from "../.."; import { EngineScrapeResult } from ".."; import { specialtyScrapeCheck } from "../utils/specialtyHandler"; import { AxiosError, type AxiosResponse } from "axios"; -import { EngineError } from "../../error"; +import { EngineError, TimeoutError } from "../../error"; const client = new ScrapingBeeClient(process.env.SCRAPING_BEE_API_KEY!); @@ -17,23 +17,26 @@ export function scrapeURLWithScrapingBee( let response: AxiosResponse; const timeout = (timeToRun ?? 300000) + meta.options.waitFor; try { - response = await client.get({ - url: meta.url, - params: { - timeout, - wait_browser: wait_browser, - wait: meta.options.waitFor, - transparent_status_code: true, - json_response: true, - screenshot: meta.options.formats.includes("screenshot"), - screenshot_full_page: meta.options.formats.includes( - "screenshot@fullPage", - ), - }, - headers: { - "ScrapingService-Request": "TRUE", // this is sent to the page, not to ScrapingBee - mogery - }, - }); + response = await Promise.race>([ + client.get({ + url: meta.url, + params: { + timeout, + wait_browser: wait_browser, + wait: meta.options.waitFor, + transparent_status_code: true, + json_response: true, + screenshot: meta.options.formats.includes("screenshot"), + screenshot_full_page: meta.options.formats.includes( + "screenshot@fullPage", + ), + }, + headers: { + "ScrapingService-Request": "TRUE", // this is sent to the page, not to ScrapingBee - mogery + }, + }), + new Promise((_, reject) => setTimeout(() => reject(new TimeoutError("ScrapingBee timed out")), timeout + 5000)), + ]); } catch (error) { if (error instanceof AxiosError && error.response !== undefined) { response = error.response;