Files
firecrawl/apps/api/src/routes/v1.ts
T

259 lines
7.2 KiB
TypeScript
Raw Normal View History

import express, { NextFunction, Request, Response } from "express";
2024-08-27 09:42:55 -03:00
import { crawlController } from "../controllers/v1/crawl";
2024-08-06 15:24:45 -03:00
// import { crawlStatusController } from "../../src/controllers/v1/crawl-status";
import { scrapeController } from "../../src/controllers/v1/scrape";
2024-08-27 09:42:55 -03:00
import { crawlStatusController } from "../controllers/v1/crawl-status";
import { mapController } from "../controllers/v1/map";
2024-12-11 19:46:11 -03:00
import {
ErrorResponse,
RequestWithACUC,
RequestWithAuth,
2024-12-11 19:51:08 -03:00
RequestWithMaybeAuth,
2024-12-11 19:46:11 -03:00
} from "../controllers/v1/types";
import { RateLimiterMode } from "../types";
2024-08-26 18:48:00 -03:00
import { authenticateUser } from "../controllers/auth";
import { createIdempotencyKey } from "../services/idempotency/create";
import { validateIdempotencyKey } from "../services/idempotency/validate";
import { checkTeamCredits } from "../services/billing/credit_billing";
2024-08-17 01:04:14 +02:00
import expressWs from "express-ws";
import { crawlStatusWSController } from "../controllers/v1/crawl-status-ws";
2024-08-19 13:28:54 -03:00
import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist";
2024-08-27 09:42:55 -03:00
import { crawlCancelController } from "../controllers/v1/crawl-cancel";
2024-11-07 20:57:33 +01:00
import { logger } from "../lib/logger";
2024-08-31 14:23:55 -03:00
import { scrapeStatusController } from "../controllers/v1/scrape-status";
2024-10-01 16:04:39 -03:00
import { concurrencyCheckController } from "../controllers/v1/concurrency-check";
2024-10-23 15:37:24 -03:00
import { batchScrapeController } from "../controllers/v1/batch-scrape";
2024-11-12 12:23:24 -05:00
import { extractController } from "../controllers/v1/extract";
2025-01-03 20:44:27 -03:00
import { extractStatusController } from "../controllers/v1/extract-status";
2024-12-20 15:44:17 -03:00
import { creditUsageController } from "../controllers/v1/credit-usage";
2024-12-20 18:09:49 -03:00
import { BLOCKLISTED_URL_MESSAGE } from "../lib/strings";
2025-01-02 19:31:03 -03:00
import { searchController } from "../controllers/v1/search";
2025-01-17 17:12:04 +01:00
import { crawlErrorsController } from "../controllers/v1/crawl-errors";
2024-08-06 15:24:45 -03:00
2024-12-11 19:46:11 -03:00
function checkCreditsMiddleware(
2024-12-11 19:51:08 -03:00
minimum?: number,
2024-12-11 19:46:11 -03:00
): (req: RequestWithAuth, res: Response, next: NextFunction) => void {
return (req, res, next) => {
(async () => {
if (!minimum && req.body) {
minimum =
(req.body as any)?.limit ?? (req.body as any)?.urls?.length ?? 1;
}
const { success, remainingCredits, chunk } = await checkTeamCredits(
req.acuc,
req.auth.team_id,
2024-12-11 19:51:08 -03:00
minimum ?? 1,
2024-12-11 19:46:11 -03:00
);
if (chunk) {
req.acuc = chunk;
}
if (!success) {
logger.error(
2024-12-11 19:51:08 -03:00
`Insufficient credits: ${JSON.stringify({ team_id: req.auth.team_id, minimum, remainingCredits })}`,
2024-12-11 19:46:11 -03:00
);
if (!res.headersSent) {
2024-12-11 19:48:22 -03:00
return res.status(402).json({
success: false,
error:
2024-12-11 19:51:08 -03:00
"Insufficient credits to perform this request. For more credits, you can upgrade your plan at https://firecrawl.dev/pricing or try changing the request limit to a lower value.",
2024-12-11 19:48:22 -03:00
});
2024-12-11 19:46:11 -03:00
}
}
req.account = { remainingCredits };
next();
})().catch((err) => next(err));
};
}
2024-12-11 19:46:11 -03:00
export function authMiddleware(
2024-12-11 19:51:08 -03:00
rateLimiterMode: RateLimiterMode,
2024-12-11 19:46:11 -03:00
): (req: RequestWithMaybeAuth, res: Response, next: NextFunction) => void {
return (req, res, next) => {
(async () => {
2024-12-11 19:46:11 -03:00
const auth = await authenticateUser(req, res, rateLimiterMode);
if (!auth.success) {
if (!res.headersSent) {
return res
.status(auth.status)
.json({ success: false, error: auth.error });
} else {
return;
}
2024-12-11 19:46:11 -03:00
}
const { team_id, plan, chunk } = auth;
req.auth = { team_id, plan };
req.acuc = chunk ?? undefined;
if (chunk) {
req.account = { remainingCredits: chunk.remaining_credits };
}
next();
})().catch((err) => next(err));
};
}
2024-12-11 19:46:11 -03:00
function idempotencyMiddleware(
req: Request,
res: Response,
2024-12-11 19:51:08 -03:00
next: NextFunction,
2024-12-11 19:46:11 -03:00
) {
(async () => {
if (req.headers["x-idempotency-key"]) {
const isIdempotencyValid = await validateIdempotencyKey(req);
if (!isIdempotencyValid) {
2024-09-04 11:29:32 -03:00
if (!res.headersSent) {
2024-12-11 19:46:11 -03:00
return res
.status(409)
.json({ success: false, error: "Idempotency key already used" });
2024-09-04 11:29:32 -03:00
}
2024-12-11 19:46:11 -03:00
}
createIdempotencyKey(req);
2024-08-19 13:28:54 -03:00
}
next();
2024-12-11 19:46:11 -03:00
})().catch((err) => next(err));
2024-08-19 13:28:54 -03:00
}
2024-12-11 19:46:11 -03:00
function blocklistMiddleware(req: Request, res: Response, next: NextFunction) {
if (typeof req.body.url === "string" && isUrlBlocked(req.body.url)) {
if (!res.headersSent) {
2024-12-11 19:48:22 -03:00
return res.status(403).json({
success: false,
2024-12-20 18:09:49 -03:00
error: BLOCKLISTED_URL_MESSAGE,
2024-12-11 19:48:22 -03:00
});
}
2024-12-11 19:46:11 -03:00
}
next();
}
export function wrap(
2024-12-11 19:51:08 -03:00
controller: (req: Request, res: Response) => Promise<any>,
2024-12-11 19:46:11 -03:00
): (req: Request, res: Response, next: NextFunction) => any {
return (req, res, next) => {
controller(req, res).catch((err) => next(err));
};
}
2024-08-17 01:04:14 +02:00
expressWs(express());
2024-08-06 15:24:45 -03:00
export const v1Router = express.Router();
v1Router.post(
2024-12-11 19:46:11 -03:00
"/scrape",
authMiddleware(RateLimiterMode.Scrape),
checkCreditsMiddleware(1),
blocklistMiddleware,
2024-12-11 19:51:08 -03:00
wrap(scrapeController),
);
v1Router.post(
2024-12-11 19:46:11 -03:00
"/crawl",
authMiddleware(RateLimiterMode.Crawl),
checkCreditsMiddleware(),
blocklistMiddleware,
idempotencyMiddleware,
2024-12-11 19:51:08 -03:00
wrap(crawlController),
);
2024-10-17 19:40:18 +02:00
v1Router.post(
2024-12-11 19:46:11 -03:00
"/batch/scrape",
authMiddleware(RateLimiterMode.Crawl),
checkCreditsMiddleware(),
blocklistMiddleware,
idempotencyMiddleware,
2024-12-11 19:51:08 -03:00
wrap(batchScrapeController),
2024-10-17 19:40:18 +02:00
);
2025-01-02 19:31:03 -03:00
v1Router.post(
"/search",
authMiddleware(RateLimiterMode.Search),
checkCreditsMiddleware(),
wrap(searchController),
);
v1Router.post(
2024-12-11 19:46:11 -03:00
"/map",
authMiddleware(RateLimiterMode.Map),
checkCreditsMiddleware(1),
blocklistMiddleware,
2024-12-11 19:51:08 -03:00
wrap(mapController),
);
v1Router.get(
2024-12-11 19:46:11 -03:00
"/crawl/:jobId",
authMiddleware(RateLimiterMode.CrawlStatus),
2024-12-11 19:51:08 -03:00
wrap(crawlStatusController),
);
2024-10-17 19:40:18 +02:00
v1Router.get(
2024-12-11 19:46:11 -03:00
"/batch/scrape/:jobId",
authMiddleware(RateLimiterMode.CrawlStatus),
// Yes, it uses the same controller as the normal crawl status controller
2024-12-11 19:51:08 -03:00
wrap((req: any, res): any => crawlStatusController(req, res, true)),
2024-10-17 19:40:18 +02:00
);
2025-01-17 17:12:04 +01:00
v1Router.get(
"/crawl/:jobId/errors",
authMiddleware(RateLimiterMode.CrawlStatus),
wrap(crawlErrorsController),
);
v1Router.get(
"/batch/scrape/:jobId/errors",
authMiddleware(RateLimiterMode.CrawlStatus),
wrap(crawlErrorsController),
);
2025-01-10 18:35:10 -03:00
v1Router.get(
"/scrape/:jobId",
2025-01-09 19:14:00 +01:00
authMiddleware(RateLimiterMode.CrawlStatus),
wrap(scrapeStatusController),
);
2024-08-31 14:23:55 -03:00
2024-10-01 16:04:39 -03:00
v1Router.get(
2024-12-11 19:46:11 -03:00
"/concurrency-check",
authMiddleware(RateLimiterMode.CrawlStatus),
2024-12-11 19:51:08 -03:00
wrap(concurrencyCheckController),
2024-10-01 16:04:39 -03:00
);
2024-12-11 19:46:11 -03:00
v1Router.ws("/crawl/:jobId", crawlStatusWSController);
2024-08-17 01:04:14 +02:00
2024-11-12 12:23:24 -05:00
v1Router.post(
2024-12-11 19:46:11 -03:00
"/extract",
2025-01-08 15:24:38 -03:00
authMiddleware(RateLimiterMode.Extract),
2024-12-11 19:46:11 -03:00
checkCreditsMiddleware(1),
2024-12-11 19:51:08 -03:00
wrap(extractController),
2024-11-12 12:23:24 -05:00
);
2025-01-03 20:44:27 -03:00
v1Router.get(
"/extract/:jobId",
authMiddleware(RateLimiterMode.ExtractStatus),
2025-01-03 20:44:27 -03:00
wrap(extractStatusController),
);
2024-08-16 23:48:50 +02:00
// v1Router.post("/crawlWebsitePreview", crawlPreviewController);
2024-08-27 09:42:55 -03:00
v1Router.delete(
"/crawl/:jobId",
2024-09-24 18:28:46 +02:00
authMiddleware(RateLimiterMode.CrawlStatus),
2024-12-11 19:51:08 -03:00
crawlCancelController,
2024-08-27 09:42:55 -03:00
);
2024-08-16 23:48:50 +02:00
// v1Router.get("/checkJobStatus/:jobId", crawlJobStatusPreviewController);
2024-08-06 15:24:45 -03:00
// // Auth route for key based authentication
2024-08-16 23:48:50 +02:00
// v1Router.get("/keyAuth", keyAuthController);
2024-08-06 15:24:45 -03:00
// // Search routes
2024-08-16 23:48:50 +02:00
// v0Router.post("/search", searchController);
2024-08-06 15:24:45 -03:00
// Health/Probe routes
2024-08-16 23:48:50 +02:00
// v1Router.get("/health/liveness", livenessController);
// v1Router.get("/health/readiness", readinessController);
2024-12-20 15:44:17 -03:00
v1Router.get(
"/team/credit-usage",
authMiddleware(RateLimiterMode.CrawlStatus),
wrap(creditUsageController),
);