Files
firecrawl/apps/api/src/services/logging/scrape_log.ts
T

53 lines
1.5 KiB
TypeScript
Raw Normal View History

2024-07-03 17:28:53 -03:00
import "dotenv/config";
import { ScrapeLog } from "../../types";
import { supabase_service } from "../supabase";
2024-07-03 20:18:11 -03:00
import { PageOptions } from "../../lib/entities";
2024-07-23 17:30:46 -03:00
import { Logger } from "../../lib/logger";
2024-07-03 17:28:53 -03:00
2024-07-03 20:18:11 -03:00
export async function logScrape(
scrapeLog: ScrapeLog,
pageOptions?: PageOptions
) {
2024-07-25 09:48:06 -03:00
if (process.env.USE_DB_AUTHENTICATION === "false") {
Logger.debug("Skipping logging scrape to Supabase");
return;
}
2024-07-03 17:28:53 -03:00
try {
// Only log jobs in production
// if (process.env.ENV !== "production") {
// return;
// }
2024-07-03 20:18:11 -03:00
// Redact any pages that have an authorization header
if (
pageOptions &&
pageOptions.headers &&
pageOptions.headers["Authorization"]
) {
scrapeLog.html = "REDACTED DUE TO AUTHORIZATION HEADER";
}
2024-07-03 17:28:53 -03:00
2024-07-03 20:18:11 -03:00
const { data, error } = await supabase_service.from("scrape_logs").insert([
{
url: scrapeLog.url,
scraper: scrapeLog.scraper,
success: scrapeLog.success,
response_code: scrapeLog.response_code,
time_taken_seconds: scrapeLog.time_taken_seconds,
proxy: scrapeLog.proxy,
retried: scrapeLog.retried,
error_message: scrapeLog.error_message,
date_added: new Date().toISOString(),
2024-07-19 12:53:26 -04:00
html: "Removed to save db space",
2024-07-03 20:18:11 -03:00
ipv4_support: scrapeLog.ipv4_support,
ipv6_support: scrapeLog.ipv6_support,
},
]);
2024-07-03 17:28:53 -03:00
if (error) {
2024-08-07 14:19:20 +02:00
Logger.error(`Error logging proxy:\n${JSON.stringify(error)}`);
2024-07-03 17:28:53 -03:00
}
} catch (error) {
2024-08-07 14:19:20 +02:00
Logger.error(`Error logging proxy:\n${JSON.stringify(error)}`);
2024-07-03 17:28:53 -03:00
}
}