2024-07-03 20:18:11 -03:00
import { ExtractorOptions } from "./../../lib/entities" ;
2024-04-20 13:53:11 -07:00
import { supabase_service } from "../supabase" ;
import { FirecrawlJob } from "../../types" ;
2024-05-02 15:30:22 -04:00
import { posthog } from "../posthog" ;
2024-04-20 13:53:11 -07:00
import "dotenv/config" ;
2024-07-23 17:30:46 -03:00
import { Logger } from "../../lib/logger" ;
2024-04-20 13:53:11 -07:00
export async function logJob ( job : FirecrawlJob ) {
try {
2024-08-12 14:20:41 -03:00
const useDbAuthentication = process . env . USE_DB_AUTHENTICATION === 'true' ;
if ( ! useDbAuthentication ) {
2024-06-27 16:00:45 -03:00
return ;
}
2024-06-26 18:23:28 -03:00
2024-07-03 20:18:11 -03:00
// Redact any pages that have an authorization header
if (
job . pageOptions &&
job . pageOptions . headers &&
job . pageOptions . headers [ "Authorization" ]
) {
job . pageOptions . headers [ "Authorization" ] = "REDACTED" ;
job . docs = [ { content : "REDACTED DUE TO AUTHORIZATION HEADER" , html : "REDACTED DUE TO AUTHORIZATION HEADER" } ] ;
}
2024-04-20 13:53:11 -07:00
const { data , error } = await supabase_service
. from ( "firecrawl_jobs" )
. insert ( [
{
2024-06-26 18:23:28 -03:00
job_id : job.job_id ? job.job_id : null ,
2024-04-20 13:53:11 -07:00
success : job.success ,
message : job.message ,
num_docs : job.num_docs ,
docs : job.docs ,
time_taken : job.time_taken ,
2024-04-20 19:37:45 -07:00
team_id : job.team_id === "preview" ? null : job . team_id ,
2024-04-20 13:53:11 -07:00
mode : job.mode ,
url : job.url ,
crawler_options : job.crawlerOptions ,
page_options : job.pageOptions ,
2024-04-20 19:37:45 -07:00
origin : job.origin ,
2024-04-30 09:20:15 -07:00
extractor_options : job.extractor_options ,
2024-07-03 20:18:11 -03:00
num_tokens : job.num_tokens ,
2024-07-09 14:56:47 +02:00
retry : ! ! job . retry ,
2024-08-13 22:03:46 +02:00
crawl_id : job.crawl_id ,
2024-04-20 13:53:11 -07:00
} ,
] ) ;
2024-05-02 15:30:22 -04:00
2024-08-15 18:55:18 +02:00
if ( process . env . POSTHOG_API_KEY && ! job . crawl_id ) {
2024-07-03 20:18:11 -03:00
let phLog = {
distinctId : "from-api" , //* To identify this on the group level, setting distinctid to a static string per posthog docs: https://posthog.com/docs/product-analytics/group-analytics#advanced-server-side-only-capturing-group-events-without-a-user
. . . ( job . team_id !== "preview" && {
groups : { team : job.team_id } ,
} ) , //* Identifying event on this team
event : "job-logged" ,
properties : {
success : job.success ,
message : job.message ,
num_docs : job.num_docs ,
time_taken : job.time_taken ,
team_id : job.team_id === "preview" ? null : job . team_id ,
mode : job.mode ,
url : job.url ,
crawler_options : job.crawlerOptions ,
page_options : job.pageOptions ,
origin : job.origin ,
extractor_options : job.extractor_options ,
num_tokens : job.num_tokens ,
2024-07-09 14:56:47 +02:00
retry : job.retry ,
2024-07-03 20:18:11 -03:00
} ,
} ;
posthog . capture ( phLog ) ;
}
2024-04-20 13:53:11 -07:00
if ( error ) {
2024-07-23 17:30:46 -03:00
Logger . error ( ` Error logging job: ${ error . message } ` ) ;
2024-04-20 13:53:11 -07:00
}
} catch ( error ) {
2024-07-23 17:30:46 -03:00
Logger . error ( ` Error logging job: ${ error . message } ` ) ;
2024-04-20 13:53:11 -07:00
}
}