Merge pull request #516 from kevinswiber/fix/use-db-auth-in-single-url-scraper

Ensuring USE_DB_AUTHENTICATION is true in single URL scraper.
This commit is contained in:
Nicolas
2024-09-02 23:35:52 -03:00
committed by GitHub
10 changed files with 34 additions and 21 deletions
+2 -1
View File
@@ -25,7 +25,8 @@ export class Logger {
const color = Logger.colors[level]; const color = Logger.colors[level];
console[level.toLowerCase()](color, `[${new Date().toISOString()}]${level} - ${message}`); console[level.toLowerCase()](color, `[${new Date().toISOString()}]${level} - ${message}`);
// if (process.env.USE_DB_AUTH) { // const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
// if (useDbAuthentication) {
// save to supabase? another place? // save to supabase? another place?
// supabase.from('logs').insert({ level: level, message: message, timestamp: new Date().toISOString(), success: boolean }); // supabase.from('logs').insert({ level: level, message: message, timestamp: new Date().toISOString(), success: boolean });
// } // }
+2 -1
View File
@@ -36,7 +36,8 @@ export class ScrapeEvents {
static async insert(jobId: string, content: ScrapeEvent) { static async insert(jobId: string, content: ScrapeEvent) {
if (jobId === "TEST") return null; if (jobId === "TEST") return null;
if (process.env.USE_DB_AUTHENTICATION) { const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if (useDbAuthentication) {
try { try {
const result = await supabase.from("scrape_events").insert({ const result = await supabase.from("scrape_events").insert({
job_id: jobId, job_id: jobId,
+2 -1
View File
@@ -7,7 +7,8 @@ export function withAuth<T extends AuthResponse, U extends any[]>(
originalFunction: (...args: U) => Promise<T> originalFunction: (...args: U) => Promise<T>
) { ) {
return async function (...args: U): Promise<T> { return async function (...args: U): Promise<T> {
if (process.env.USE_DB_AUTHENTICATION === "false") { const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if (!useDbAuthentication) {
if (warningCount < 5) { if (warningCount < 5) {
Logger.warn("You're bypassing authentication"); Logger.warn("You're bypassing authentication");
warningCount++; warningCount++;
+2 -1
View File
@@ -144,7 +144,8 @@ export async function runWebScraper({
const saveJob = async (job: Job, result: any, token: string, mode: string) => { const saveJob = async (job: Job, result: any, token: string, mode: string) => {
try { try {
if (process.env.USE_DB_AUTHENTICATION === "true") { const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if (useDbAuthentication) {
const { data, error } = await supabase_service const { data, error } = await supabase_service
.from("firecrawl_jobs") .from("firecrawl_jobs")
.update({ docs: result }) .update({ docs: result })
+14 -11
View File
@@ -23,12 +23,15 @@ import { clientSideError } from "../../strings";
dotenv.config(); dotenv.config();
const useScrapingBee = process.env.SCRAPING_BEE_API_KEY !== '' && process.env.SCRAPING_BEE_API_KEY !== undefined;
const useFireEngine = process.env.FIRE_ENGINE_BETA_URL !== '' && process.env.FIRE_ENGINE_BETA_URL !== undefined;
export const baseScrapers = [ export const baseScrapers = [
"fire-engine;chrome-cdp", useFireEngine ? "fire-engine;chrome-cdp" : undefined,
"fire-engine", useFireEngine ? "fire-engine" : undefined,
"scrapingBee", useScrapingBee ? "scrapingBee" : undefined,
process.env.USE_DB_AUTHENTICATION ? undefined : "playwright", useFireEngine ? undefined : "playwright",
"scrapingBeeLoad", useScrapingBee ? "scrapingBeeLoad" : undefined,
"fetch", "fetch",
].filter(Boolean); ].filter(Boolean);
@@ -85,18 +88,18 @@ function getScrapingFallbackOrder(
}); });
let defaultOrder = [ let defaultOrder = [
!process.env.USE_DB_AUTHENTICATION ? undefined : "fire-engine;chrome-cdp", useFireEngine ? "fire-engine;chrome-cdp" : undefined,
!process.env.USE_DB_AUTHENTICATION ? undefined : "fire-engine", useFireEngine ? "fire-engine" : undefined,
"scrapingBee", useScrapingBee ? "scrapingBee" : undefined,
process.env.USE_DB_AUTHENTICATION ? undefined : "playwright", useScrapingBee ? "scrapingBeeLoad" : undefined,
"scrapingBeeLoad", useFireEngine ? undefined : "playwright",
"fetch", "fetch",
].filter(Boolean); ].filter(Boolean);
if (isWaitPresent || isScreenshotPresent || isHeadersPresent) { if (isWaitPresent || isScreenshotPresent || isHeadersPresent) {
defaultOrder = [ defaultOrder = [
"fire-engine", "fire-engine",
process.env.USE_DB_AUTHENTICATION ? undefined : "playwright", useFireEngine ? undefined : "playwright",
...defaultOrder.filter( ...defaultOrder.filter(
(scraper) => scraper !== "fire-engine" && scraper !== "playwright" (scraper) => scraper !== "fire-engine" && scraper !== "playwright"
), ),
+2 -1
View File
@@ -3,7 +3,8 @@ import { Logger } from "../../../src/lib/logger";
import "dotenv/config"; import "dotenv/config";
export async function logCrawl(job_id: string, team_id: string) { export async function logCrawl(job_id: string, team_id: string) {
if (process.env.USE_DB_AUTHENTICATION === 'true') { const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if (useDbAuthentication) {
try { try {
const { data, error } = await supabase_service const { data, error } = await supabase_service
.from("bulljobs_teams") .from("bulljobs_teams")
+2 -1
View File
@@ -7,7 +7,8 @@ import { Logger } from "../../lib/logger";
export async function logJob(job: FirecrawlJob) { export async function logJob(job: FirecrawlJob) {
try { try {
if (process.env.USE_DB_AUTHENTICATION === "false") { const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if (!useDbAuthentication) {
return; return;
} }
+2 -1
View File
@@ -8,7 +8,8 @@ export async function logScrape(
scrapeLog: ScrapeLog, scrapeLog: ScrapeLog,
pageOptions?: PageOptions pageOptions?: PageOptions
) { ) {
if (process.env.USE_DB_AUTHENTICATION === "false") { const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if (!useDbAuthentication) {
Logger.debug("Skipping logging scrape to Supabase"); Logger.debug("Skipping logging scrape to Supabase");
return; return;
} }
+2 -1
View File
@@ -8,8 +8,9 @@ class SupabaseService {
constructor() { constructor() {
const supabaseUrl = process.env.SUPABASE_URL; const supabaseUrl = process.env.SUPABASE_URL;
const supabaseServiceToken = process.env.SUPABASE_SERVICE_TOKEN; const supabaseServiceToken = process.env.SUPABASE_SERVICE_TOKEN;
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
// Only initialize the Supabase client if both URL and Service Token are provided. // Only initialize the Supabase client if both URL and Service Token are provided.
if (process.env.USE_DB_AUTHENTICATION === "false") { if (!useDbAuthentication) {
// Warn the user that Authentication is disabled by setting the client to null // Warn the user that Authentication is disabled by setting the client to null
Logger.warn( Logger.warn(
"Authentication is disabled. Supabase client will not be initialized." "Authentication is disabled. Supabase client will not be initialized."
+4 -2
View File
@@ -9,7 +9,8 @@ class SupabaseService {
const supabaseUrl = process.env.SUPABASE_URL; const supabaseUrl = process.env.SUPABASE_URL;
const supabaseServiceToken = process.env.SUPABASE_SERVICE_TOKEN; const supabaseServiceToken = process.env.SUPABASE_SERVICE_TOKEN;
// Only initialize the Supabase client if both URL and Service Token are provided. // Only initialize the Supabase client if both URL and Service Token are provided.
if (process.env.USE_DB_AUTHENTICATION === "false") { const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if (!useDbAuthentication) {
// Warn the user that Authentication is disabled by setting the client to null // Warn the user that Authentication is disabled by setting the client to null
console.warn( console.warn(
"Authentication is disabled. Supabase client will not be initialized." "Authentication is disabled. Supabase client will not be initialized."
@@ -36,7 +37,8 @@ export const supabase_service: SupabaseClient = new Proxy(
new SupabaseService(), new SupabaseService(),
{ {
get: function (target, prop, receiver) { get: function (target, prop, receiver) {
if (process.env.USE_DB_AUTHENTICATION === "false") { const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true';
if (!useDbAuthentication) {
console.debug( console.debug(
"Attempted to access Supabase client when it's not configured." "Attempted to access Supabase client when it's not configured."
); );