Nick: from bulk to batch
This commit is contained in:
+6
-6
@@ -1,8 +1,8 @@
|
||||
import { Response } from "express";
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
import {
|
||||
BulkScrapeRequest,
|
||||
bulkScrapeRequestSchema,
|
||||
BatchScrapeRequest,
|
||||
batchScrapeRequestSchema,
|
||||
CrawlResponse,
|
||||
legacyScrapeOptions,
|
||||
RequestWithAuth,
|
||||
@@ -17,11 +17,11 @@ import { logCrawl } from "../../services/logging/crawl_log";
|
||||
import { getScrapeQueue } from "../../services/queue-service";
|
||||
import { getJobPriority } from "../../lib/job-priority";
|
||||
|
||||
export async function bulkScrapeController(
|
||||
req: RequestWithAuth<{}, CrawlResponse, BulkScrapeRequest>,
|
||||
export async function batchScrapeController(
|
||||
req: RequestWithAuth<{}, CrawlResponse, BatchScrapeRequest>,
|
||||
res: Response<CrawlResponse>
|
||||
) {
|
||||
req.body = bulkScrapeRequestSchema.parse(req.body);
|
||||
req.body = batchScrapeRequestSchema.parse(req.body);
|
||||
|
||||
const id = uuidv4();
|
||||
|
||||
@@ -92,7 +92,7 @@ export async function bulkScrapeController(
|
||||
return res.status(200).json({
|
||||
success: true,
|
||||
id,
|
||||
url: `${protocol}://${req.get("host")}/v1/bulk/scrape/${id}`,
|
||||
url: `${protocol}://${req.get("host")}/v1/batch/scrape/${id}`,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -44,7 +44,7 @@ export async function getJobs(ids: string[]) {
|
||||
return jobs;
|
||||
}
|
||||
|
||||
export async function crawlStatusController(req: RequestWithAuth<CrawlStatusParams, undefined, CrawlStatusResponse>, res: Response<CrawlStatusResponse>) {
|
||||
export async function crawlStatusController(req: RequestWithAuth<CrawlStatusParams, undefined, CrawlStatusResponse>, res: Response<CrawlStatusResponse>, isBatch = false) {
|
||||
const sc = await getCrawl(req.params.jobId);
|
||||
if (!sc) {
|
||||
return res.status(404).json({ success: false, error: "Job not found" });
|
||||
@@ -113,7 +113,7 @@ export async function crawlStatusController(req: RequestWithAuth<CrawlStatusPara
|
||||
const data = doneJobs.map(x => x.returnvalue);
|
||||
|
||||
const protocol = process.env.ENV === "local" ? req.protocol : "https";
|
||||
const nextURL = new URL(`${protocol}://${req.get("host")}/v1/crawl/${req.params.jobId}`);
|
||||
const nextURL = new URL(`${protocol}://${req.get("host")}/v1/${isBatch ? "batch/scrape" : "crawl"}/${req.params.jobId}`);
|
||||
|
||||
nextURL.searchParams.set("skip", (start + data.length).toString());
|
||||
|
||||
|
||||
@@ -144,7 +144,7 @@ export const scrapeRequestSchema = scrapeOptions.extend({
|
||||
|
||||
export type ScrapeRequest = z.infer<typeof scrapeRequestSchema>;
|
||||
|
||||
export const bulkScrapeRequestSchema = scrapeOptions.extend({
|
||||
export const batchScrapeRequestSchema = scrapeOptions.extend({
|
||||
urls: url.array(),
|
||||
origin: z.string().optional().default("api"),
|
||||
}).strict(strictMessage).refine(
|
||||
@@ -163,7 +163,7 @@ export const bulkScrapeRequestSchema = scrapeOptions.extend({
|
||||
return obj;
|
||||
});
|
||||
|
||||
export type BulkScrapeRequest = z.infer<typeof bulkScrapeRequestSchema>;
|
||||
export type BatchScrapeRequest = z.infer<typeof batchScrapeRequestSchema>;
|
||||
|
||||
const crawlerOptions = z.object({
|
||||
includePaths: z.string().array().default([]),
|
||||
|
||||
@@ -17,7 +17,7 @@ import { crawlCancelController } from "../controllers/v1/crawl-cancel";
|
||||
import { Logger } from "../lib/logger";
|
||||
import { scrapeStatusController } from "../controllers/v1/scrape-status";
|
||||
import { concurrencyCheckController } from "../controllers/v1/concurrency-check";
|
||||
import { bulkScrapeController } from "../controllers/v1/bulk-scrape";
|
||||
import { batchScrapeController } from "../controllers/v1/batch-scrape";
|
||||
// import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview";
|
||||
// import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status";
|
||||
// import { searchController } from "../../src/controllers/v1/search";
|
||||
@@ -124,12 +124,12 @@ v1Router.post(
|
||||
);
|
||||
|
||||
v1Router.post(
|
||||
"/bulk/scrape",
|
||||
"/batch/scrape",
|
||||
authMiddleware(RateLimiterMode.Crawl),
|
||||
checkCreditsMiddleware(),
|
||||
blocklistMiddleware,
|
||||
idempotencyMiddleware,
|
||||
wrap(bulkScrapeController)
|
||||
wrap(batchScrapeController)
|
||||
);
|
||||
|
||||
v1Router.post(
|
||||
@@ -147,9 +147,10 @@ v1Router.get(
|
||||
);
|
||||
|
||||
v1Router.get(
|
||||
"/bulk/scrape/:jobId",
|
||||
"/batch/scrape/:jobId",
|
||||
authMiddleware(RateLimiterMode.CrawlStatus),
|
||||
wrap(crawlStatusController)
|
||||
// Yes, it uses the same controller as the normal crawl status controller
|
||||
wrap((req:any, res):any => crawlStatusController(req, res, true))
|
||||
);
|
||||
|
||||
v1Router.get(
|
||||
|
||||
Reference in New Issue
Block a user