added logger

This commit is contained in:
rafaelsideguide
2024-07-23 17:30:46 -03:00
parent f0b07b509b
commit 6208ecdbc0
25 changed files with 201 additions and 109 deletions
@@ -2,6 +2,7 @@ import axios from "axios";
import { logScrape } from "../../../services/logging/scrape_log";
import { fetchAndProcessPdf } from "../utils/pdfProcessor";
import { universalTimeout } from "../global";
import { Logger } from "../../../lib/logger";
/**
* Scrapes a URL with Axios
@@ -34,9 +35,7 @@ export async function scrapWithFetch(
});
if (response.status !== 200) {
console.error(
`[Axios] Error fetching url: ${url} with status: ${response.status}`
);
Logger.debug(`⛏️ Axios: Failed to fetch url: ${url} with status: ${response.status}`);
logParams.error_message = response.statusText;
logParams.response_code = response.status;
return {
@@ -63,10 +62,10 @@ export async function scrapWithFetch(
} catch (error) {
if (error.code === "ECONNABORTED") {
logParams.error_message = "Request timed out";
console.log(`[Axios] Request timed out for ${url}`);
Logger.debug(`⛏️ Axios: Request timed out for ${url}`);
} else {
logParams.error_message = error.message || error;
console.error(`[Axios] Error fetching url: ${url} -> ${error}`);
Logger.debug(`⛏️ Axios: Failed to fetch url: ${url} | Error: ${error}`);
}
return { content: "", pageStatusCode: null, pageError: logParams.error_message };
} finally {
@@ -4,6 +4,7 @@ import { logScrape } from "../../../services/logging/scrape_log";
import { generateRequestParams } from "../single_url";
import { fetchAndProcessPdf } from "../utils/pdfProcessor";
import { universalTimeout } from "../global";
import { Logger } from "../../../lib/logger";
/**
* Scrapes a URL with Fire-Engine
@@ -59,12 +60,10 @@ export async function scrapWithFireEngine({
let engine = engineParam; // do we want fireEngineOptions as first choice?
console.log(
`[Fire-Engine][${engine}] Scraping ${url} with wait: ${waitParam} and screenshot: ${screenshotParam} and method: ${fireEngineOptionsParam?.method ?? "null"}`
Logger.info(
`⛏️ Fire-Engine (${engine}): Scraping ${url} | params: { wait: ${waitParam}, screenshot: ${screenshotParam}, method: ${fireEngineOptionsParam?.method ?? "null"} }`
);
// console.log(fireEngineOptionsParam)
const response = await axios.post(
process.env.FIRE_ENGINE_BETA_URL + endpoint,
{
@@ -84,15 +83,15 @@ export async function scrapWithFireEngine({
);
if (response.status !== 200) {
console.error(
`[Fire-Engine][${engine}] Error fetching url: ${url} with status: ${response.status}`
Logger.debug(
`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`
);
logParams.error_message = response.data?.pageError;
logParams.response_code = response.data?.pageStatusCode;
if(response.data && response.data?.pageStatusCode !== 200) {
console.error(`[Fire-Engine][${engine}] Error fetching url: ${url} with status: ${response.status}`);
Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`);
}
return {
@@ -130,10 +129,10 @@ export async function scrapWithFireEngine({
}
} catch (error) {
if (error.code === "ECONNABORTED") {
console.log(`[Fire-Engine] Request timed out for ${url}`);
Logger.debug(`⛏️ Fire-Engine: Request timed out for ${url}`);
logParams.error_message = "Request timed out";
} else {
console.error(`[Fire-Engine][c] Error fetching url: ${url} -> ${error}`);
Logger.debug(`⛏️ Fire-Engine: Failed to fetch url: ${url} | Error: ${error}`);
logParams.error_message = error.message || error;
}
return { html: "", screenshot: "", pageStatusCode: null, pageError: logParams.error_message };
@@ -3,6 +3,7 @@ import { logScrape } from "../../../services/logging/scrape_log";
import { generateRequestParams } from "../single_url";
import { fetchAndProcessPdf } from "../utils/pdfProcessor";
import { universalTimeout } from "../global";
import { Logger } from "../../../lib/logger";
/**
* Scrapes a URL with Playwright
@@ -51,8 +52,8 @@ export async function scrapWithPlaywright(
);
if (response.status !== 200) {
console.error(
`[Playwright] Error fetching url: ${url} with status: ${response.status}`
Logger.debug(
`⛏️ Playwright: Failed to fetch url: ${url} | status: ${response.status}, error: ${response.data?.pageError}`
);
logParams.error_message = response.data?.pageError;
logParams.response_code = response.data?.pageStatusCode;
@@ -86,8 +87,8 @@ export async function scrapWithPlaywright(
};
} catch (jsonError) {
logParams.error_message = jsonError.message || jsonError;
console.error(
`[Playwright] Error parsing JSON response for url: ${url} -> ${jsonError}`
Logger.debug(
`⛏️ Playwright: Error parsing JSON response for url: ${url} | Error: ${jsonError}`
);
return { content: "", pageStatusCode: null, pageError: logParams.error_message };
}
@@ -95,10 +96,10 @@ export async function scrapWithPlaywright(
} catch (error) {
if (error.code === "ECONNABORTED") {
logParams.error_message = "Request timed out";
console.log(`[Playwright] Request timed out for ${url}`);
Logger.debug(`⛏️ Playwright: Request timed out for ${url}`);
} else {
logParams.error_message = error.message || error;
console.error(`[Playwright] Error fetching url: ${url} -> ${error}`);
Logger.debug(`⛏️ Playwright: Failed to fetch url: ${url} | Error: ${error}`);
}
return { content: "", pageStatusCode: null, pageError: logParams.error_message };
} finally {
@@ -3,6 +3,7 @@ import { generateRequestParams } from "../single_url";
import { fetchAndProcessPdf } from "../utils/pdfProcessor";
import { universalTimeout } from "../global";
import { ScrapingBeeClient } from "scrapingbee";
import { Logger } from "../../../lib/logger";
/**
* Scrapes a URL with ScrapingBee
@@ -56,8 +57,8 @@ export async function scrapWithScrapingBee(
text = decoder.decode(response.data);
logParams.success = true;
} catch (decodeError) {
console.error(
`[ScrapingBee][c] Error decoding response data for url: ${url} -> ${decodeError}`
Logger.debug(
`⛏️ ScrapingBee: Error decoding response data for url: ${url} | Error: ${decodeError}`
);
logParams.error_message = decodeError.message || decodeError;
}
@@ -72,7 +73,7 @@ export async function scrapWithScrapingBee(
};
}
} catch (error) {
console.error(`[ScrapingBee][c] Error fetching url: ${url} -> ${error}`);
Logger.debug(`⛏️ ScrapingBee: Error fetching url: ${url} | Error: ${error}`);
logParams.error_message = error.message || error;
logParams.response_code = error.response?.status;
return {