Files
firecrawl/apps/api/src/lib/scrape-events.ts
T

110 lines
2.5 KiB
TypeScript
Raw Normal View History

2024-07-30 13:27:23 -04:00
import { Job } from "bullmq";
2024-07-24 14:31:25 +02:00
import { supabase_service as supabase } from "../services/supabase";
2024-11-07 20:57:33 +01:00
import { logger } from "./logger";
2024-09-04 15:57:57 -03:00
import { configDotenv } from "dotenv";
2024-11-07 20:57:33 +01:00
import { Engine } from "../scraper/scrapeURL/engines";
2024-09-04 15:57:57 -03:00
configDotenv();
2024-07-24 14:31:25 +02:00
export type ScrapeErrorEvent = {
2024-12-11 19:46:11 -03:00
type: "error";
message: string;
stack?: string;
};
2024-07-24 14:31:25 +02:00
export type ScrapeScrapeEvent = {
2024-12-11 19:46:11 -03:00
type: "scrape";
url: string;
worker?: string;
method: Engine;
2024-07-24 14:31:25 +02:00
result: null | {
2024-12-11 19:46:11 -03:00
success: boolean;
response_code?: number;
response_size?: number;
error?: string | object;
2024-07-24 14:31:25 +02:00
// proxy?: string,
2024-12-11 19:46:11 -03:00
time_taken: number;
};
};
2024-07-24 14:31:25 +02:00
export type ScrapeQueueEvent = {
2024-12-11 19:46:11 -03:00
type: "queue";
event:
| "waiting"
| "active"
| "completed"
| "paused"
| "resumed"
| "removed"
| "failed";
worker?: string;
};
2024-07-24 14:31:25 +02:00
2024-12-11 19:46:11 -03:00
export type ScrapeEvent =
| ScrapeErrorEvent
| ScrapeScrapeEvent
| ScrapeQueueEvent;
2024-07-24 14:31:25 +02:00
export class ScrapeEvents {
static async insert(jobId: string, content: ScrapeEvent) {
if (jobId === "TEST") return null;
2024-12-11 19:46:11 -03:00
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
if (useDbAuthentication) {
2024-07-25 16:20:29 -04:00
try {
2024-12-11 19:46:11 -03:00
const result = await supabase
.from("scrape_events")
.insert({
job_id: jobId,
type: content.type,
2024-12-11 19:51:08 -03:00
content: content,
2024-12-11 19:46:11 -03:00
// created_at
})
.select()
.single();
2024-07-25 16:20:29 -04:00
return (result.data as any).id;
} catch (error) {
2024-11-07 20:57:33 +01:00
// logger.error(`Error inserting scrape event: ${error}`);
2024-07-25 16:20:29 -04:00
return null;
}
2024-07-24 14:31:25 +02:00
}
return null;
}
2024-12-11 19:46:11 -03:00
static async updateScrapeResult(
logId: number | null,
2024-12-11 19:51:08 -03:00
result: ScrapeScrapeEvent["result"],
2024-12-11 19:46:11 -03:00
) {
2024-07-24 14:31:25 +02:00
if (logId === null) return;
2024-07-25 16:20:29 -04:00
try {
2024-12-11 19:46:11 -03:00
const previousLog = (
await supabase.from("scrape_events").select().eq("id", logId).single()
).data as any;
await supabase
.from("scrape_events")
.update({
content: {
...previousLog.content,
2024-12-11 19:51:08 -03:00
result,
},
2024-12-11 19:46:11 -03:00
})
.eq("id", logId);
2024-07-25 16:20:29 -04:00
} catch (error) {
2024-11-07 20:57:33 +01:00
logger.error(`Error updating scrape result: ${error}`);
2024-07-25 16:20:29 -04:00
}
2024-07-24 14:31:25 +02:00
}
2024-07-24 18:44:14 +02:00
2024-07-30 13:27:23 -04:00
static async logJobEvent(job: Job | any, event: ScrapeQueueEvent["event"]) {
2024-07-25 16:20:29 -04:00
try {
await this.insert(((job as any).id ? (job as any).id : job) as string, {
type: "queue",
event,
2024-12-11 19:51:08 -03:00
worker: process.env.FLY_MACHINE_ID,
2024-07-25 16:20:29 -04:00
});
} catch (error) {
2024-11-07 20:57:33 +01:00
logger.error(`Error logging job event: ${error}`);
2024-07-25 16:20:29 -04:00
}
2024-07-24 18:44:14 +02:00
}
2024-07-24 14:31:25 +02:00
}