2024-08-15 23:30:33 +02:00
import express , { NextFunction , Request , Response } from "express" ;
2024-08-27 09:42:55 -03:00
import { crawlController } from "../controllers/v1/crawl" ;
2024-08-06 15:24:45 -03:00
// import { crawlStatusController } from "../../src/controllers/v1/crawl-status";
import { scrapeController } from "../../src/controllers/v1/scrape" ;
2024-08-27 09:42:55 -03:00
import { crawlStatusController } from "../controllers/v1/crawl-status" ;
import { mapController } from "../controllers/v1/map" ;
2024-12-11 19:46:11 -03:00
import {
ErrorResponse ,
RequestWithACUC ,
RequestWithAuth ,
RequestWithMaybeAuth
} from "../controllers/v1/types" ;
2024-08-15 23:30:33 +02:00
import { RateLimiterMode } from "../types" ;
2024-08-26 18:48:00 -03:00
import { authenticateUser } from "../controllers/auth" ;
2024-08-15 23:30:33 +02:00
import { createIdempotencyKey } from "../services/idempotency/create" ;
import { validateIdempotencyKey } from "../services/idempotency/validate" ;
import { checkTeamCredits } from "../services/billing/credit_billing" ;
2024-08-17 01:04:14 +02:00
import expressWs from "express-ws" ;
import { crawlStatusWSController } from "../controllers/v1/crawl-status-ws" ;
2024-08-19 13:28:54 -03:00
import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist" ;
2024-08-27 09:42:55 -03:00
import { crawlCancelController } from "../controllers/v1/crawl-cancel" ;
2024-11-07 20:57:33 +01:00
import { logger } from "../lib/logger" ;
2024-08-31 14:23:55 -03:00
import { scrapeStatusController } from "../controllers/v1/scrape-status" ;
2024-10-01 16:04:39 -03:00
import { concurrencyCheckController } from "../controllers/v1/concurrency-check" ;
2024-10-23 15:37:24 -03:00
import { batchScrapeController } from "../controllers/v1/batch-scrape" ;
2024-11-12 12:23:24 -05:00
import { extractController } from "../controllers/v1/extract" ;
2024-08-06 15:24:45 -03:00
// import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview";
// import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status";
// import { searchController } from "../../src/controllers/v1/search";
// import { crawlCancelController } from "../../src/controllers/v1/crawl-cancel";
// import { keyAuthController } from "../../src/controllers/v1/keyAuth";
// import { livenessController } from "../controllers/v1/liveness";
// import { readinessController } from "../controllers/v1/readiness";
2024-12-11 19:46:11 -03:00
function checkCreditsMiddleware (
minimum? : number
) : ( req : RequestWithAuth , res : Response , next : NextFunction ) = > void {
return ( req , res , next ) = > {
( async ( ) = > {
if ( ! minimum && req . body ) {
minimum =
( req . body as any ) ? . limit ? ? ( req . body as any ) ? . urls ? . length ? ? 1 ;
}
const { success , remainingCredits , chunk } = await checkTeamCredits (
req . acuc ,
req . auth . team_id ,
minimum ? ? 1
) ;
if ( chunk ) {
req . acuc = chunk ;
}
if ( ! success ) {
logger . error (
` Insufficient credits: ${ JSON . stringify ( { team_id : req.auth.team_id , minimum , remainingCredits } )} `
) ;
if ( ! res . headersSent ) {
2024-12-11 19:48:22 -03:00
return res . status ( 402 ) . json ( {
success : false ,
error :
"Insufficient credits to perform this request. For more credits, you can upgrade your plan at https://firecrawl.dev/pricing or try changing the request limit to a lower value."
} ) ;
2024-12-11 19:46:11 -03:00
}
}
req . account = { remainingCredits } ;
next ( ) ;
} ) ( ) . catch ( ( err ) = > next ( err ) ) ;
} ;
2024-08-15 23:30:33 +02:00
}
2024-12-11 19:46:11 -03:00
export function authMiddleware (
rateLimiterMode : RateLimiterMode
) : ( req : RequestWithMaybeAuth , res : Response , next : NextFunction ) = > void {
return ( req , res , next ) = > {
2024-08-15 23:30:33 +02:00
( async ( ) = > {
2024-12-11 19:46:11 -03:00
const auth = await authenticateUser ( req , res , rateLimiterMode ) ;
if ( ! auth . success ) {
if ( ! res . headersSent ) {
return res
. status ( auth . status )
. json ( { success : false , error : auth.error } ) ;
} else {
return ;
2024-08-15 23:30:33 +02:00
}
2024-12-11 19:46:11 -03:00
}
const { team_id , plan , chunk } = auth ;
req . auth = { team_id , plan } ;
req . acuc = chunk ? ? undefined ;
if ( chunk ) {
req . account = { remainingCredits : chunk.remaining_credits } ;
}
next ( ) ;
} ) ( ) . catch ( ( err ) = > next ( err ) ) ;
} ;
2024-08-15 23:30:33 +02:00
}
2024-12-11 19:46:11 -03:00
function idempotencyMiddleware (
req : Request ,
res : Response ,
next : NextFunction
) {
( async ( ) = > {
if ( req . headers [ "x-idempotency-key" ] ) {
const isIdempotencyValid = await validateIdempotencyKey ( req ) ;
if ( ! isIdempotencyValid ) {
2024-09-04 11:29:32 -03:00
if ( ! res . headersSent ) {
2024-12-11 19:46:11 -03:00
return res
. status ( 409 )
. json ( { success : false , error : "Idempotency key already used" } ) ;
2024-09-04 11:29:32 -03:00
}
2024-12-11 19:46:11 -03:00
}
createIdempotencyKey ( req ) ;
2024-08-19 13:28:54 -03:00
}
next ( ) ;
2024-12-11 19:46:11 -03:00
} ) ( ) . catch ( ( err ) = > next ( err ) ) ;
2024-08-19 13:28:54 -03:00
}
2024-12-11 19:46:11 -03:00
function blocklistMiddleware ( req : Request , res : Response , next : NextFunction ) {
if ( typeof req . body . url === "string" && isUrlBlocked ( req . body . url ) ) {
if ( ! res . headersSent ) {
2024-12-11 19:48:22 -03:00
return res . status ( 403 ) . json ( {
success : false ,
error :
"URL is blocked intentionally. Firecrawl currently does not support social media scraping due to policy restrictions."
} ) ;
2024-08-15 23:30:33 +02:00
}
2024-12-11 19:46:11 -03:00
}
next ( ) ;
}
export function wrap (
controller : ( req : Request , res : Response ) = > Promise < any >
) : ( req : Request , res : Response , next : NextFunction ) = > any {
return ( req , res , next ) = > {
controller ( req , res ) . catch ( ( err ) = > next ( err ) ) ;
} ;
2024-08-15 23:30:33 +02:00
}
2024-08-17 01:04:14 +02:00
expressWs ( express ( ) ) ;
2024-08-06 15:24:45 -03:00
export const v1Router = express . Router ( ) ;
2024-08-15 23:30:33 +02:00
v1Router . post (
2024-12-11 19:46:11 -03:00
"/scrape" ,
authMiddleware ( RateLimiterMode . Scrape ) ,
checkCreditsMiddleware ( 1 ) ,
blocklistMiddleware ,
wrap ( scrapeController )
2024-08-15 23:30:33 +02:00
) ;
v1Router . post (
2024-12-11 19:46:11 -03:00
"/crawl" ,
authMiddleware ( RateLimiterMode . Crawl ) ,
checkCreditsMiddleware ( ) ,
blocklistMiddleware ,
idempotencyMiddleware ,
wrap ( crawlController )
2024-08-15 23:30:33 +02:00
) ;
2024-10-17 19:40:18 +02:00
v1Router . post (
2024-12-11 19:46:11 -03:00
"/batch/scrape" ,
authMiddleware ( RateLimiterMode . Crawl ) ,
checkCreditsMiddleware ( ) ,
blocklistMiddleware ,
idempotencyMiddleware ,
wrap ( batchScrapeController )
2024-10-17 19:40:18 +02:00
) ;
2024-08-15 23:30:33 +02:00
v1Router . post (
2024-12-11 19:46:11 -03:00
"/map" ,
authMiddleware ( RateLimiterMode . Map ) ,
checkCreditsMiddleware ( 1 ) ,
blocklistMiddleware ,
wrap ( mapController )
2024-08-15 23:30:33 +02:00
) ;
v1Router . get (
2024-12-11 19:46:11 -03:00
"/crawl/:jobId" ,
authMiddleware ( RateLimiterMode . CrawlStatus ) ,
wrap ( crawlStatusController )
2024-08-15 23:30:33 +02:00
) ;
2024-10-17 19:40:18 +02:00
v1Router . get (
2024-12-11 19:46:11 -03:00
"/batch/scrape/:jobId" ,
authMiddleware ( RateLimiterMode . CrawlStatus ) ,
// Yes, it uses the same controller as the normal crawl status controller
wrap ( ( req : any , res ) : any = > crawlStatusController ( req , res , true ) )
2024-10-17 19:40:18 +02:00
) ;
2024-12-11 19:46:11 -03:00
v1Router . get ( "/scrape/:jobId" , wrap ( scrapeStatusController ) ) ;
2024-08-31 14:23:55 -03:00
2024-10-01 16:04:39 -03:00
v1Router . get (
2024-12-11 19:46:11 -03:00
"/concurrency-check" ,
authMiddleware ( RateLimiterMode . CrawlStatus ) ,
wrap ( concurrencyCheckController )
2024-10-01 16:04:39 -03:00
) ;
2024-12-11 19:46:11 -03:00
v1Router . ws ( "/crawl/:jobId" , crawlStatusWSController ) ;
2024-08-17 01:04:14 +02:00
2024-11-12 12:23:24 -05:00
v1Router . post (
2024-12-11 19:46:11 -03:00
"/extract" ,
authMiddleware ( RateLimiterMode . Scrape ) ,
checkCreditsMiddleware ( 1 ) ,
wrap ( extractController )
2024-11-12 12:23:24 -05:00
) ;
2024-08-16 23:48:50 +02:00
// v1Router.post("/crawlWebsitePreview", crawlPreviewController);
2024-08-27 09:42:55 -03:00
v1Router . delete (
"/crawl/:jobId" ,
2024-09-24 18:28:46 +02:00
authMiddleware ( RateLimiterMode . CrawlStatus ) ,
2024-08-27 09:42:55 -03:00
crawlCancelController
) ;
2024-08-16 23:48:50 +02:00
// v1Router.get("/checkJobStatus/:jobId", crawlJobStatusPreviewController);
2024-08-06 15:24:45 -03:00
// // Auth route for key based authentication
2024-08-16 23:48:50 +02:00
// v1Router.get("/keyAuth", keyAuthController);
2024-08-06 15:24:45 -03:00
// // Search routes
2024-08-16 23:48:50 +02:00
// v0Router.post("/search", searchController);
2024-08-06 15:24:45 -03:00
// Health/Probe routes
2024-08-16 23:48:50 +02:00
// v1Router.get("/health/liveness", livenessController);
// v1Router.get("/health/readiness", readinessController);