v1 restructure

This commit is contained in:
Gergő Móricz
2024-08-15 21:51:59 +02:00
parent af08ab0b1a
commit 4165de1773
21 changed files with 211 additions and 133 deletions
@@ -1,6 +1,6 @@
import { crawlController } from '../crawl' import { crawlController } from '../v0/crawl'
import { Request, Response } from 'express'; import { Request, Response } from 'express';
import { authenticateUser } from '../auth'; // Ensure this import is correct import { authenticateUser } from '../v0/auth'; // Ensure this import is correct
import { createIdempotencyKey } from '../../services/idempotency/create'; import { createIdempotencyKey } from '../../services/idempotency/create';
import { validateIdempotencyKey } from '../../services/idempotency/validate'; import { validateIdempotencyKey } from '../../services/idempotency/validate';
import { v4 as uuidv4 } from 'uuid'; import { v4 as uuidv4 } from 'uuid';
@@ -1,9 +1,9 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { Job } from "bullmq"; import { Job } from "bullmq";
import { Logger } from "../../lib/logger"; import { Logger } from "../../../lib/logger";
import { getScrapeQueue } from "../../services/queue-service"; import { getScrapeQueue } from "../../../services/queue-service";
import { checkAlerts } from "../../services/alerts"; import { checkAlerts } from "../../../services/alerts";
export async function cleanBefore24hCompleteJobsController( export async function cleanBefore24hCompleteJobsController(
req: Request, req: Request,
@@ -1,7 +1,7 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import Redis from "ioredis"; import Redis from "ioredis";
import { Logger } from "../../lib/logger"; import { Logger } from "../../../lib/logger";
import { redisRateLimitClient } from "../../services/rate-limiter"; import { redisRateLimitClient } from "../../../services/rate-limiter";
export async function redisHealthController(req: Request, res: Response) { export async function redisHealthController(req: Request, res: Response) {
const retryOperation = async (operation, retries = 3) => { const retryOperation = async (operation, retries = 3) => {
@@ -1,19 +1,19 @@
import { parseApi } from "../../src/lib/parseApi"; import { parseApi } from "../../../src/lib/parseApi";
import { getRateLimiter } from "../../src/services/rate-limiter"; import { getRateLimiter } from "../../../src/services/rate-limiter";
import { import {
AuthResponse, AuthResponse,
NotificationType, NotificationType,
RateLimiterMode, RateLimiterMode,
} from "../../src/types"; } from "../../../src/types";
import { supabase_service } from "../../src/services/supabase"; import { supabase_service } from "../../../src/services/supabase";
import { withAuth } from "../../src/lib/withAuth"; import { withAuth } from "../../../src/lib/withAuth";
import { RateLimiterRedis } from "rate-limiter-flexible"; import { RateLimiterRedis } from "rate-limiter-flexible";
import { setTraceAttributes } from "@hyperdx/node-opentelemetry"; import { setTraceAttributes } from "@hyperdx/node-opentelemetry";
import { sendNotification } from "../services/notification/email_notification"; import { sendNotification } from "../../services/notification/email_notification";
import { Logger } from "../lib/logger"; import { Logger } from "../../lib/logger";
import { redlock } from "../../src/services/redlock"; import { redlock } from "../../../src/services/redlock";
import { getValue } from "../../src/services/redis"; import { getValue } from "../../../src/services/redis";
import { setValue } from "../../src/services/redis"; import { setValue } from "../../../src/services/redis";
import { validate } from "uuid"; import { validate } from "uuid";
function normalizedApiIsUuid(potentialUuid: string): boolean { function normalizedApiIsUuid(potentialUuid: string): boolean {
@@ -1,9 +1,9 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../../src/types"; import { RateLimiterMode } from "../../../src/types";
import { supabase_service } from "../../src/services/supabase"; import { supabase_service } from "../../../src/services/supabase";
import { Logger } from "../../src/lib/logger"; import { Logger } from "../../../src/lib/logger";
import { getCrawl, saveCrawl } from "../../src/lib/crawl-redis"; import { getCrawl, saveCrawl } from "../../../src/lib/crawl-redis";
export async function crawlCancelController(req: Request, res: Response) { export async function crawlCancelController(req: Request, res: Response) {
try { try {
@@ -1,10 +1,10 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../../src/types"; import { RateLimiterMode } from "../../../src/types";
import { getScrapeQueue } from "../../src/services/queue-service"; import { getScrapeQueue } from "../../../src/services/queue-service";
import { Logger } from "../../src/lib/logger"; import { Logger } from "../../../src/lib/logger";
import { getCrawl, getCrawlJobs } from "../../src/lib/crawl-redis"; import { getCrawl, getCrawlJobs } from "../../../src/lib/crawl-redis";
import { supabaseGetJobById } from "../../src/lib/supabase-jobs"; import { supabaseGetJobById } from "../../../src/lib/supabase-jobs";
export async function crawlStatusController(req: Request, res: Response) { export async function crawlStatusController(req: Request, res: Response) {
try { try {
@@ -1,18 +1,18 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { checkTeamCredits } from "../../src/services/billing/credit_billing"; import { checkTeamCredits } from "../../../src/services/billing/credit_billing";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../../src/types"; import { RateLimiterMode } from "../../../src/types";
import { addScrapeJob } from "../../src/services/queue-jobs"; import { addScrapeJob } from "../../../src/services/queue-jobs";
import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist"; import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist";
import { logCrawl } from "../../src/services/logging/crawl_log"; import { logCrawl } from "../../../src/services/logging/crawl_log";
import { validateIdempotencyKey } from "../../src/services/idempotency/validate"; import { validateIdempotencyKey } from "../../../src/services/idempotency/validate";
import { createIdempotencyKey } from "../../src/services/idempotency/create"; import { createIdempotencyKey } from "../../../src/services/idempotency/create";
import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../src/lib/default-values"; import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../../src/lib/default-values";
import { v4 as uuidv4 } from "uuid"; import { v4 as uuidv4 } from "uuid";
import { Logger } from "../../src/lib/logger"; import { Logger } from "../../../src/lib/logger";
import { addCrawlJob, addCrawlJobs, crawlToCrawler, lockURL, lockURLs, saveCrawl, StoredCrawl } from "../../src/lib/crawl-redis"; import { addCrawlJob, addCrawlJobs, crawlToCrawler, lockURL, lockURLs, saveCrawl, StoredCrawl } from "../../../src/lib/crawl-redis";
import { getScrapeQueue } from "../../src/services/queue-service"; import { getScrapeQueue } from "../../../src/services/queue-service";
import { checkAndUpdateURL } from "../../src/lib/validateUrl"; import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
export async function crawlController(req: Request, res: Response) { export async function crawlController(req: Request, res: Response) {
try { try {
@@ -1,12 +1,12 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../../src/types"; import { RateLimiterMode } from "../../../src/types";
import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist"; import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist";
import { v4 as uuidv4 } from "uuid"; import { v4 as uuidv4 } from "uuid";
import { Logger } from "../../src/lib/logger"; import { Logger } from "../../../src/lib/logger";
import { addCrawlJob, crawlToCrawler, lockURL, saveCrawl, StoredCrawl } from "../../src/lib/crawl-redis"; import { addCrawlJob, crawlToCrawler, lockURL, saveCrawl, StoredCrawl } from "../../../src/lib/crawl-redis";
import { addScrapeJob } from "../../src/services/queue-jobs"; import { addScrapeJob } from "../../../src/services/queue-jobs";
import { checkAndUpdateURL } from "../../src/lib/validateUrl"; import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
export async function crawlPreviewController(req: Request, res: Response) { export async function crawlPreviewController(req: Request, res: Response) {
try { try {
@@ -1,5 +1,5 @@
import { AuthResponse, RateLimiterMode } from "../types"; import { AuthResponse, RateLimiterMode } from "../../types";
import { Request, Response } from "express"; import { Request, Response } from "express";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
@@ -1,17 +1,17 @@
import { ExtractorOptions, PageOptions } from './../lib/entities'; import { ExtractorOptions, PageOptions } from './../../lib/entities';
import { Request, Response } from "express"; import { Request, Response } from "express";
import { billTeam, checkTeamCredits } from "../services/billing/credit_billing"; import { billTeam, checkTeamCredits } from "../../services/billing/credit_billing";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../types"; import { RateLimiterMode } from "../../types";
import { logJob } from "../services/logging/log_job"; import { logJob } from "../../services/logging/log_job";
import { Document } from "../lib/entities"; import { Document } from "../../lib/entities";
import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function
import { numTokensFromString } from '../lib/LLM-extraction/helpers'; import { numTokensFromString } from '../../lib/LLM-extraction/helpers';
import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../lib/default-values'; import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../../lib/default-values';
import { addScrapeJob } from '../services/queue-jobs'; import { addScrapeJob } from '../../services/queue-jobs';
import { scrapeQueueEvents } from '../services/queue-service'; import { scrapeQueueEvents } from '../../services/queue-service';
import { v4 as uuidv4 } from "uuid"; import { v4 as uuidv4 } from "uuid";
import { Logger } from '../lib/logger'; import { Logger } from '../../lib/logger';
export async function scrapeHelper( export async function scrapeHelper(
jobId: string, jobId: string,
@@ -1,15 +1,15 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { WebScraperDataProvider } from "../scraper/WebScraper"; import { WebScraperDataProvider } from "../../scraper/WebScraper";
import { billTeam, checkTeamCredits } from "../services/billing/credit_billing"; import { billTeam, checkTeamCredits } from "../../services/billing/credit_billing";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../types"; import { RateLimiterMode } from "../../types";
import { logJob } from "../services/logging/log_job"; import { logJob } from "../../services/logging/log_job";
import { PageOptions, SearchOptions } from "../lib/entities"; import { PageOptions, SearchOptions } from "../../lib/entities";
import { search } from "../search"; import { search } from "../../search";
import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
import { v4 as uuidv4 } from "uuid"; import { v4 as uuidv4 } from "uuid";
import { Logger } from "../lib/logger"; import { Logger } from "../../lib/logger";
import { getScrapeQueue, scrapeQueueEvents } from "../services/queue-service"; import { getScrapeQueue, scrapeQueueEvents } from "../../services/queue-service";
export async function searchHelper( export async function searchHelper(
jobId: string, jobId: string,
@@ -1,8 +1,8 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { Logger } from "../../src/lib/logger"; import { Logger } from "../../../src/lib/logger";
import { getCrawl, getCrawlJobs } from "../../src/lib/crawl-redis"; import { getCrawl, getCrawlJobs } from "../../../src/lib/crawl-redis";
import { getScrapeQueue } from "../../src/services/queue-service"; import { getScrapeQueue } from "../../../src/services/queue-service";
import { supabaseGetJobById } from "../../src/lib/supabase-jobs"; import { supabaseGetJobById } from "../../../src/lib/supabase-jobs";
export async function crawlJobStatusPreviewController(req: Request, res: Response) { export async function crawlJobStatusPreviewController(req: Request, res: Response) {
try { try {
@@ -1,9 +1,6 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../../../src/types"; import { RateLimiterMode } from "../../../src/types";
import { addWebScraperJob } from "../../../src/services/queue-jobs";
import { getWebScraperQueue } from "../../../src/services/queue-service";
import { supabaseGetJobById } from "../../../src/lib/supabase-jobs";
import { Logger } from "../../../src/lib/logger"; import { Logger } from "../../../src/lib/logger";
import { v4 as uuidv4 } from "uuid"; import { v4 as uuidv4 } from "uuid";
+16 -15
View File
@@ -1,20 +1,16 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { WebScraperDataProvider } from "../../../src/scraper/WebScraper";
import { billTeam } from "../../../src/services/billing/credit_billing";
import { checkTeamCredits } from "../../../src/services/billing/credit_billing"; import { checkTeamCredits } from "../../../src/services/billing/credit_billing";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../../../src/types"; import { RateLimiterMode } from "../../../src/types";
import { addWebScraperJob } from "../../../src/services/queue-jobs";
import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist"; import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist";
import { logCrawl } from "../../../src/services/logging/crawl_log";
import { validateIdempotencyKey } from "../../../src/services/idempotency/validate"; import { validateIdempotencyKey } from "../../../src/services/idempotency/validate";
import { createIdempotencyKey } from "../../../src/services/idempotency/create"; import { createIdempotencyKey } from "../../../src/services/idempotency/create";
import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../../src/lib/default-values";
import { v4 as uuidv4 } from "uuid"; import { v4 as uuidv4 } from "uuid";
import { Logger } from "../../../src/lib/logger"; import { Logger } from "../../../src/lib/logger";
import { checkAndUpdateURL } from "../../../src/lib/validateUrl"; import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
import { CrawlRequest, CrawlResponse } from "./types";
export async function crawlController(req: Request, res: Response) { export async function crawlController(req: Request<{}, {}, CrawlRequest>, res: Response<CrawlResponse>) {
// expected req.body // expected req.body
// req.body = { // req.body = {
@@ -39,52 +35,57 @@ export async function crawlController(req: Request, res: Response) {
RateLimiterMode.Crawl RateLimiterMode.Crawl
); );
if (!success) { if (!success) {
return res.status(status).json({ error }); return res.status(status).json({ success: false, error });
} }
if (req.headers["x-idempotency-key"]) { if (req.headers["x-idempotency-key"]) {
const isIdempotencyValid = await validateIdempotencyKey(req); const isIdempotencyValid = await validateIdempotencyKey(req);
if (!isIdempotencyValid) { if (!isIdempotencyValid) {
return res.status(409).json({ error: "Idempotency key already used" }); return res.status(409).json({ success: false, error: "Idempotency key already used" });
} }
try { try {
createIdempotencyKey(req); createIdempotencyKey(req);
} catch (error) { } catch (error) {
Logger.error(error); Logger.error(error);
return res.status(500).json({ error: error.message }); return res.status(500).json({ success: false, error: error.message });
} }
} }
const { success: creditsCheckSuccess, message: creditsCheckMessage } = const { success: creditsCheckSuccess, message: creditsCheckMessage } =
await checkTeamCredits(team_id, 1); await checkTeamCredits(team_id, 1);
if (!creditsCheckSuccess) { if (!creditsCheckSuccess) {
return res.status(402).json({ error: "Insufficient credits" }); return res.status(402).json({ success: false, error: "Insufficient credits" });
} }
let url = req.body.url; let url = req.body.url;
if (!url) { if (!url) {
return res.status(400).json({ error: "Url is required" }); return res.status(400).json({ success: false, error: "Url is required" });
} }
if (isUrlBlocked(url)) { if (isUrlBlocked(url)) {
return res return res
.status(403) .status(403)
.json({ .json({
success: false,
error: error:
"Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
}); });
} }
try { try {
url = checkAndUpdateURL(url); url = checkAndUpdateURL(url).url;
} catch (error) { } catch (error) {
return res.status(400).json({ error: 'Invalid Url' }); return res.status(400).json({ success: false, error: 'Invalid Url' });
} }
// TODO: add job to queue // TODO: add job to queue
const id = uuidv4(); const id = uuidv4();
return res.status(200).json({ jobId: id, url: `${req.protocol}://${req.get('host')}/v1/crawl/${id}` }); return res.status(200).json({
success: true,
id,
url: `${req.protocol}://${req.get('host')}/v1/crawl/${id}`,
});
// const mode = req.body.mode ?? "crawl"; // const mode = req.body.mode ?? "crawl";
@@ -134,6 +135,6 @@ export async function crawlController(req: Request, res: Response) {
// res.json({ jobId: job.id }); // res.json({ jobId: job.id });
} catch (error) { } catch (error) {
Logger.error(error); Logger.error(error);
return res.status(500).json({ error: error.message }); return res.status(500).json({ success: false, error: error.message });
} }
} }
+10 -19
View File
@@ -1,29 +1,19 @@
import { Request, Response } from "express"; import { Request, Response } from "express";
import { WebScraperDataProvider } from "../../../src/scraper/WebScraper";
import { billTeam } from "../../../src/services/billing/credit_billing";
import { checkTeamCredits } from "../../../src/services/billing/credit_billing";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../../../src/types"; import { RateLimiterMode } from "../../../src/types";
import { addWebScraperJob } from "../../../src/services/queue-jobs";
import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist"; import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist";
import { logCrawl } from "../../../src/services/logging/crawl_log";
import { validateIdempotencyKey } from "../../../src/services/idempotency/validate";
import { createIdempotencyKey } from "../../../src/services/idempotency/create";
import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../../src/lib/default-values";
import { v4 as uuidv4 } from "uuid";
import { Logger } from "../../../src/lib/logger"; import { Logger } from "../../../src/lib/logger";
import { checkAndUpdateURL } from "../../../src/lib/validateUrl"; import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
import { MapRequest, MapResponse } from "./types";
export async function mapController(req: Request, res: Response) { export async function mapController(req: Request<{}, MapResponse, MapRequest>, res: Response<MapResponse>) {
// expected req.body // expected req.body
// req.body = { // req.body = {
// url: string // url: string
// ignoreSitemap: true?? // crawlerOptions:
// other crawler options?
// } // }
try { try {
const { success, team_id, error, status } = await authenticateUser( const { success, team_id, error, status } = await authenticateUser(
req, req,
@@ -31,7 +21,7 @@ export async function mapController(req: Request, res: Response) {
RateLimiterMode.Crawl RateLimiterMode.Crawl
); );
if (!success) { if (!success) {
return res.status(status).json({ error }); return res.status(status).json({ success: false, error });
} }
// if (req.headers["x-idempotency-key"]) { // if (req.headers["x-idempotency-key"]) {
@@ -55,25 +45,26 @@ export async function mapController(req: Request, res: Response) {
let url = req.body.url; let url = req.body.url;
if (!url) { if (!url) {
return res.status(400).json({ error: "Url is required" }); return res.status(400).json({ success: false, error: "Url is required" });
} }
if (isUrlBlocked(url)) { if (isUrlBlocked(url)) {
return res return res
.status(403) .status(403)
.json({ .json({
success: false,
error: error:
"Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
}); });
} }
try { try {
url = checkAndUpdateURL(url); url = checkAndUpdateURL(url).url;
} catch (error) { } catch (error) {
return res.status(400).json({ error: 'Invalid Url' }); return res.status(400).json({ success: false, error: 'Invalid Url' });
} }
return res.status(200).json({ urls: [ "test1", "test2" ] }); return res.status(200).json({ success: true, links: [ "test1", "test2" ] });
// const mode = req.body.mode ?? "crawl"; // const mode = req.body.mode ?? "crawl";
@@ -123,6 +114,6 @@ export async function mapController(req: Request, res: Response) {
// res.json({ jobId: job.id }); // res.json({ jobId: job.id });
} catch (error) { } catch (error) {
Logger.error(error); Logger.error(error);
return res.status(500).json({ error: error.message }); return res.status(500).json({ success: false, error: error.message });
} }
} }
+7 -15
View File
@@ -1,19 +1,12 @@
// import { ExtractorOptions, PageOptions } from './../../lib/entities';
import { Request, Response } from "express"; import { Request, Response } from "express";
// import { WebScraperDataProvider } from "../../scraper/WebScraper";
// import { billTeam, checkTeamCredits } from "../../services/billing/credit_billing";
import { authenticateUser } from "./auth"; import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../../types"; import { RateLimiterMode } from "../../types";
// import { logJob } from "../../services/logging/log_job";
// import { Document } from "../../lib/entities";
import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function
// import { numTokensFromString } from '../../lib/LLM-extraction/helpers';
// import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../../../src/lib/default-values';
// import { v4 as uuidv4 } from "uuid";
import { Logger } from '../../lib/logger'; import { Logger } from '../../lib/logger';
import { checkAndUpdateURL } from '../../lib/validateUrl'; import { checkAndUpdateURL } from '../../lib/validateUrl';
import { ScrapeRequest, ScrapeResponse } from "./types";
export async function scrapeController(req: Request, res: Response) { export async function scrapeController(req: Request<{}, ScrapeResponse, ScrapeRequest>, res: Response<ScrapeResponse>) {
let url = req.body.url; let url = req.body.url;
if (!url) { if (!url) {
return { success: false, error: "Url is required", returnCode: 400 }; return { success: false, error: "Url is required", returnCode: 400 };
@@ -24,7 +17,7 @@ export async function scrapeController(req: Request, res: Response) {
} }
try { try {
url = checkAndUpdateURL(url); url = checkAndUpdateURL(url).url;
} catch (error) { } catch (error) {
return { success: false, error: "Invalid URL", returnCode: 400 }; return { success: false, error: "Invalid URL", returnCode: 400 };
} }
@@ -53,20 +46,19 @@ export async function scrapeController(req: Request, res: Response) {
RateLimiterMode.Scrape RateLimiterMode.Scrape
); );
if (!success) { if (!success) {
return res.status(status).json({ error }); return res.status(status).json({ success: false, error });
} }
// check credits // check credits
const result = { const result: ScrapeResponse = {
success: true, success: true,
warning: "test", warning: "test",
data: { data: {
markdown: "test", markdown: "test",
content: "test",
html: "test", html: "test",
rawHtml: "test", rawHtml: "test",
linksOnPage: ["test1", "test2"], links: ["test1", "test2"],
screenshot: "test", screenshot: "test",
metadata: { metadata: {
title: "test", title: "test",
@@ -174,7 +166,7 @@ export async function scrapeController(req: Request, res: Response) {
// return res.status(result.returnCode).json(result); // return res.status(result.returnCode).json(result);
} catch (error) { } catch (error) {
Logger.error(error); Logger.error(error);
return res.status(500).json({ error: error.message }); return res.status(500).json({ success: false, error: error.message });
} }
} }
+97
View File
@@ -0,0 +1,97 @@
export type Format = "markdown" | "html" | "rawHtml" | "links" | "screenshot" | "screenshot@fullPage";
export type ScrapeRequest = {
url: string;
formats?: Format[];
headers?: { [K: string]: string };
includeTags?: string[];
excludeTags?: string[];
onlyMainContent?: boolean;
timeout?: number;
waitFor?: number;
}
export type CrawlerOptions = {
includePaths?: string[];
excludePaths?: string[];
maxDepth?: number;
limit?: number;
allowBackwardLinks?: boolean; // >> TODO: CHANGE THIS NAME???
allowExternalLinks?: boolean;
ignoreSitemap?: boolean;
};
export type CrawlRequest = {
url: string;
crawlerOptions?: CrawlerOptions;
scrapeOptions?: Exclude<ScrapeRequest, "url">;
};
export type MapRequest = {
url: string;
crawlerOptions?: CrawlerOptions;
};
export type Document = {
markdown?: string,
html?: string,
rawHtml?: string,
links?: string[],
screenshot?: string,
metadata: {
title?: string;
description?: string;
language?: string;
keywords?: string;
robots?: string;
ogTitle?: string;
ogDescription?: string;
ogUrl?: string;
ogImage?: string;
ogAudio?: string;
ogDeterminer?: string;
ogLocale?: string;
ogLocaleAlternate?: string[];
ogSiteName?: string;
ogVideo?: string;
dcTermsCreated?: string;
dcDateCreated?: string;
dcDate?: string;
dcTermsType?: string;
dcType?: string;
dcTermsAudience?: string;
dcTermsSubject?: string;
dcSubject?: string;
dcDescription?: string;
dcTermsKeywords?: string;
modifiedTime?: string;
publishedTime?: string;
articleTag?: string;
articleSection?: string;
sourceURL?: string;
statusCode?: number;
error?: string;
},
}
export type ErrorResponse = {
success: false;
error: string;
};
export type ScrapeResponse = ErrorResponse | {
success: true;
warning?: string;
data: Document;
};
export type CrawlResponse = ErrorResponse | {
success: true;
id: string;
url: string;
}
export type MapResponse = ErrorResponse | {
success: true;
links: string[];
}
+2 -2
View File
@@ -1,10 +1,10 @@
import express from "express"; import express from "express";
import { redisHealthController } from "../controllers/admin/redis-health"; import { redisHealthController } from "../controllers/v0/admin/redis-health";
import { import {
checkQueuesController, checkQueuesController,
cleanBefore24hCompleteJobsController, cleanBefore24hCompleteJobsController,
queuesController, queuesController,
} from "../controllers/admin/queue"; } from "../controllers/v0/admin/queue";
export const adminRouter = express.Router(); export const adminRouter = express.Router();
+10 -10
View File
@@ -1,14 +1,14 @@
import express from "express"; import express from "express";
import { crawlController } from "../../src/controllers/crawl"; import { crawlController } from "../../src/controllers/v0/crawl";
import { crawlStatusController } from "../../src/controllers/crawl-status"; import { crawlStatusController } from "../../src/controllers/v0/crawl-status";
import { scrapeController } from "../../src/controllers/scrape"; import { scrapeController } from "../../src/controllers/v0/scrape";
import { crawlPreviewController } from "../../src/controllers/crawlPreview"; import { crawlPreviewController } from "../../src/controllers/v0/crawlPreview";
import { crawlJobStatusPreviewController } from "../../src/controllers/status"; import { crawlJobStatusPreviewController } from "../../src/controllers/v0/status";
import { searchController } from "../../src/controllers/search"; import { searchController } from "../../src/controllers/v0/search";
import { crawlCancelController } from "../../src/controllers/crawl-cancel"; import { crawlCancelController } from "../../src/controllers/v0/crawl-cancel";
import { keyAuthController } from "../../src/controllers/keyAuth"; import { keyAuthController } from "../../src/controllers/v0/keyAuth";
import { livenessController } from "../controllers/liveness"; import { livenessController } from "../controllers/v0/liveness";
import { readinessController } from "../controllers/readiness"; import { readinessController } from "../controllers/v0/readiness";
export const v0Router = express.Router(); export const v0Router = express.Router();