Files
firecrawl/apps/api/src/services/logging/log_job.ts
T

80 lines
2.7 KiB
TypeScript
Raw Normal View History

2024-07-03 20:18:11 -03:00
import { ExtractorOptions } from "./../../lib/entities";
2024-04-20 13:53:11 -07:00
import { supabase_service } from "../supabase";
import { FirecrawlJob } from "../../types";
2024-05-02 15:30:22 -04:00
import { posthog } from "../posthog";
2024-04-20 13:53:11 -07:00
import "dotenv/config";
2024-07-23 17:30:46 -03:00
import { Logger } from "../../lib/logger";
2024-04-20 13:53:11 -07:00
export async function logJob(job: FirecrawlJob) {
try {
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if (!useDbAuthentication) {
2024-06-27 16:00:45 -03:00
return;
}
2024-07-03 20:18:11 -03:00
// Redact any pages that have an authorization header
if (
job.pageOptions &&
job.pageOptions.headers &&
job.pageOptions.headers["Authorization"]
) {
job.pageOptions.headers["Authorization"] = "REDACTED";
job.docs = [{ content: "REDACTED DUE TO AUTHORIZATION HEADER", html: "REDACTED DUE TO AUTHORIZATION HEADER" }];
}
2024-04-20 13:53:11 -07:00
const { data, error } = await supabase_service
.from("firecrawl_jobs")
.insert([
{
job_id: job.job_id ? job.job_id : null,
2024-04-20 13:53:11 -07:00
success: job.success,
message: job.message,
num_docs: job.num_docs,
docs: job.docs,
time_taken: job.time_taken,
2024-04-20 19:37:45 -07:00
team_id: job.team_id === "preview" ? null : job.team_id,
2024-04-20 13:53:11 -07:00
mode: job.mode,
url: job.url,
crawler_options: job.crawlerOptions,
page_options: job.pageOptions,
2024-04-20 19:37:45 -07:00
origin: job.origin,
2024-04-30 09:20:15 -07:00
extractor_options: job.extractor_options,
2024-07-03 20:18:11 -03:00
num_tokens: job.num_tokens,
retry: !!job.retry,
2024-08-13 22:03:46 +02:00
crawl_id: job.crawl_id,
2024-04-20 13:53:11 -07:00
},
]);
2024-05-02 15:30:22 -04:00
2024-08-15 18:55:18 +02:00
if (process.env.POSTHOG_API_KEY && !job.crawl_id) {
2024-07-03 20:18:11 -03:00
let phLog = {
distinctId: "from-api", //* To identify this on the group level, setting distinctid to a static string per posthog docs: https://posthog.com/docs/product-analytics/group-analytics#advanced-server-side-only-capturing-group-events-without-a-user
...(job.team_id !== "preview" && {
groups: { team: job.team_id },
}), //* Identifying event on this team
event: "job-logged",
properties: {
success: job.success,
message: job.message,
num_docs: job.num_docs,
time_taken: job.time_taken,
team_id: job.team_id === "preview" ? null : job.team_id,
mode: job.mode,
url: job.url,
crawler_options: job.crawlerOptions,
page_options: job.pageOptions,
origin: job.origin,
extractor_options: job.extractor_options,
num_tokens: job.num_tokens,
retry: job.retry,
2024-07-03 20:18:11 -03:00
},
};
posthog.capture(phLog);
}
2024-04-20 13:53:11 -07:00
if (error) {
2024-07-23 17:30:46 -03:00
Logger.error(`Error logging job: ${error.message}`);
2024-04-20 13:53:11 -07:00
}
} catch (error) {
2024-07-23 17:30:46 -03:00
Logger.error(`Error logging job: ${error.message}`);
2024-04-20 13:53:11 -07:00
}
}