diff --git a/apps/api/src/controllers/v1/__tests__/crawl.test.ts.WIP b/apps/api/src/controllers/v1/__tests__/crawl.test.ts.WIP new file mode 100644 index 00000000..621c7436 --- /dev/null +++ b/apps/api/src/controllers/v1/__tests__/crawl.test.ts.WIP @@ -0,0 +1,47 @@ +import { crawlController } from '../crawl' +import { Request, Response } from 'express'; +import { authenticateUser } from '../auth'; // Ensure this import is correct +import { createIdempotencyKey } from '../../services/idempotency/create'; +import { validateIdempotencyKey } from '../../services/idempotency/validate'; +import { v4 as uuidv4 } from 'uuid'; + +jest.mock('../auth', () => ({ + authenticateUser: jest.fn().mockResolvedValue({ + success: true, + team_id: 'team123', + error: null, + status: 200 + }), + reduce: jest.fn() +})); +jest.mock('../../services/idempotency/validate'); + +describe('crawlController', () => { + it('should prevent duplicate requests using the same idempotency key', async () => { + const req = { + headers: { + 'x-idempotency-key': await uuidv4(), + 'Authorization': `Bearer ${process.env.TEST_API_KEY}` + }, + body: { + url: 'https://mendable.ai' + } + } as unknown as Request; + const res = { + status: jest.fn().mockReturnThis(), + json: jest.fn() + } as unknown as Response; + + // Mock the idempotency key validation to return false for the second call + (validateIdempotencyKey as jest.Mock).mockResolvedValueOnce(true).mockResolvedValueOnce(false); + + // First request should succeed + await crawlController(req, res); + expect(res.status).not.toHaveBeenCalledWith(409); + + // Second request with the same key should fail + await crawlController(req, res); + expect(res.status).toHaveBeenCalledWith(409); + expect(res.json).toHaveBeenCalledWith({ error: 'Idempotency key already used' }); + }); +}); \ No newline at end of file diff --git a/apps/api/src/controllers/v1/admin/queue.ts.WIP b/apps/api/src/controllers/v1/admin/queue.ts.WIP new file mode 100644 index 00000000..cb5f99ed --- /dev/null +++ b/apps/api/src/controllers/v1/admin/queue.ts.WIP @@ -0,0 +1,87 @@ +import { Request, Response } from "express"; + +import { Job } from "bull"; +import { Logger } from "../../lib/logger"; +import { getWebScraperQueue } from "../../services/queue-service"; +import { checkAlerts } from "../../services/alerts"; + +export async function cleanBefore24hCompleteJobsController( + req: Request, + res: Response +) { + Logger.info("🐂 Cleaning jobs older than 24h"); + try { + const webScraperQueue = getWebScraperQueue(); + const batchSize = 10; + const numberOfBatches = 9; // Adjust based on your needs + const completedJobsPromises: Promise[] = []; + for (let i = 0; i < numberOfBatches; i++) { + completedJobsPromises.push( + webScraperQueue.getJobs( + ["completed"], + i * batchSize, + i * batchSize + batchSize, + true + ) + ); + } + const completedJobs: Job[] = ( + await Promise.all(completedJobsPromises) + ).flat(); + const before24hJobs = + completedJobs.filter( + (job) => job.finishedOn < Date.now() - 24 * 60 * 60 * 1000 + ) || []; + + let count = 0; + + if (!before24hJobs) { + return res.status(200).send(`No jobs to remove.`); + } + + for (const job of before24hJobs) { + try { + await job.remove(); + count++; + } catch (jobError) { + Logger.error(`🐂 Failed to remove job with ID ${job.id}: ${jobError}`); + } + } + return res.status(200).send(`Removed ${count} completed jobs.`); + } catch (error) { + Logger.error(`🐂 Failed to clean last 24h complete jobs: ${error}`); + return res.status(500).send("Failed to clean jobs"); + } +} + + +export async function checkQueuesController(req: Request, res: Response) { + try { + await checkAlerts(); + return res.status(200).send("Alerts initialized"); + } catch (error) { + Logger.debug(`Failed to initialize alerts: ${error}`); + return res.status(500).send("Failed to initialize alerts"); + } + } + + // Use this as a "health check" that way we dont destroy the server +export async function queuesController(req: Request, res: Response) { + try { + const webScraperQueue = getWebScraperQueue(); + + const [webScraperActive] = await Promise.all([ + webScraperQueue.getActiveCount(), + ]); + + const noActiveJobs = webScraperActive === 0; + // 200 if no active jobs, 503 if there are active jobs + return res.status(noActiveJobs ? 200 : 500).json({ + webScraperActive, + noActiveJobs, + }); + } catch (error) { + Logger.error(error); + return res.status(500).json({ error: error.message }); + } + } \ No newline at end of file diff --git a/apps/api/src/controllers/v1/admin/redis-health.ts.WIP b/apps/api/src/controllers/v1/admin/redis-health.ts.WIP new file mode 100644 index 00000000..3b1e2518 --- /dev/null +++ b/apps/api/src/controllers/v1/admin/redis-health.ts.WIP @@ -0,0 +1,85 @@ +import { Request, Response } from "express"; +import Redis from "ioredis"; +import { Logger } from "../../lib/logger"; +import { redisRateLimitClient } from "../../services/rate-limiter"; + +export async function redisHealthController(req: Request, res: Response) { + const retryOperation = async (operation, retries = 3) => { + for (let attempt = 1; attempt <= retries; attempt++) { + try { + return await operation(); + } catch (error) { + if (attempt === retries) throw error; + Logger.warn(`Attempt ${attempt} failed: ${error.message}. Retrying...`); + await new Promise((resolve) => setTimeout(resolve, 2000)); // Wait 2 seconds before retrying + } + } + }; + + try { + const queueRedis = new Redis(process.env.REDIS_URL); + + const testKey = "test"; + const testValue = "test"; + + // Test queueRedis + let queueRedisHealth; + try { + await retryOperation(() => queueRedis.set(testKey, testValue)); + queueRedisHealth = await retryOperation(() => queueRedis.get(testKey)); + await retryOperation(() => queueRedis.del(testKey)); + } catch (error) { + Logger.error(`queueRedis health check failed: ${error}`); + queueRedisHealth = null; + } + + // Test redisRateLimitClient + let redisRateLimitHealth; + try { + await retryOperation(() => redisRateLimitClient.set(testKey, testValue)); + redisRateLimitHealth = await retryOperation(() => + redisRateLimitClient.get(testKey) + ); + await retryOperation(() => redisRateLimitClient.del(testKey)); + } catch (error) { + Logger.error(`redisRateLimitClient health check failed: ${error}`); + redisRateLimitHealth = null; + } + + const healthStatus = { + queueRedis: queueRedisHealth === testValue ? "healthy" : "unhealthy", + redisRateLimitClient: + redisRateLimitHealth === testValue ? "healthy" : "unhealthy", + }; + + if ( + healthStatus.queueRedis === "healthy" && + healthStatus.redisRateLimitClient === "healthy" + ) { + Logger.info("Both Redis instances are healthy"); + return res.status(200).json({ status: "healthy", details: healthStatus }); + } else { + Logger.info( + `Redis instances health check: ${JSON.stringify(healthStatus)}` + ); + // await sendSlackWebhook( + // `[REDIS DOWN] Redis instances health check: ${JSON.stringify( + // healthStatus + // )}`, + // true + // ); + return res + .status(500) + .json({ status: "unhealthy", details: healthStatus }); + } + } catch (error) { + Logger.error(`Redis health check failed: ${error}`); + // await sendSlackWebhook( + // `[REDIS DOWN] Redis instances health check: ${error.message}`, + // true + // ); + return res + .status(500) + .json({ status: "unhealthy", message: error.message }); + } +} diff --git a/apps/api/src/controllers/v1/auth.ts b/apps/api/src/controllers/v1/auth.ts new file mode 100644 index 00000000..bd45648d --- /dev/null +++ b/apps/api/src/controllers/v1/auth.ts @@ -0,0 +1,220 @@ +import { parseApi } from "../../../src/lib/parseApi"; +import { getRateLimiter, } from "../../../src/services/rate-limiter"; +import { AuthResponse, NotificationType, RateLimiterMode } from "../../../src/types"; +import { supabase_service } from "../../../src/services/supabase"; +import { withAuth } from "../../../src/lib/withAuth"; +import { RateLimiterRedis } from "rate-limiter-flexible"; +import { setTraceAttributes } from '@hyperdx/node-opentelemetry'; +import { sendNotification } from "../../../src/services/notification/email_notification"; +import { Logger } from "../../../src/lib/logger"; + +export async function authenticateUser(req, res, mode?: RateLimiterMode): Promise { + return withAuth(supaAuthenticateUser)(req, res, mode); +} +function setTrace(team_id: string, api_key: string) { + try { + setTraceAttributes({ + team_id, + api_key + }); + } catch (error) { + Logger.error(`Error setting trace attributes: ${error.message}`); + } + +} +export async function supaAuthenticateUser( + req, + res, + mode?: RateLimiterMode +): Promise<{ + success: boolean; + team_id?: string; + error?: string; + status?: number; + plan?: string; +}> { + const authHeader = req.headers.authorization; + if (!authHeader) { + return { success: false, error: "Unauthorized", status: 401 }; + } + const token = authHeader.split(" ")[1]; // Extract the token from "Bearer " + if (!token) { + return { + success: false, + error: "Unauthorized: Token missing", + status: 401, + }; + } + + const incomingIP = (req.headers["x-forwarded-for"] || + req.socket.remoteAddress) as string; + const iptoken = incomingIP + token; + + let rateLimiter: RateLimiterRedis; + let subscriptionData: { team_id: string, plan: string } | null = null; + let normalizedApi: string; + + let team_id: string; + + if (token == "this_is_just_a_preview_token") { + rateLimiter = getRateLimiter(RateLimiterMode.Preview, token); + team_id = "preview"; + } else { + normalizedApi = parseApi(token); + + const { data, error } = await supabase_service.rpc( + 'get_key_and_price_id_2', { api_key: normalizedApi } + ); + // get_key_and_price_id_2 rpc definition: + // create or replace function get_key_and_price_id_2(api_key uuid) + // returns table(key uuid, team_id uuid, price_id text) as $$ + // begin + // if api_key is null then + // return query + // select null::uuid as key, null::uuid as team_id, null::text as price_id; + // end if; + + // return query + // select ak.key, ak.team_id, s.price_id + // from api_keys ak + // left join subscriptions s on ak.team_id = s.team_id + // where ak.key = api_key; + // end; + // $$ language plpgsql; + + if (error) { + Logger.warn(`Error fetching key and price_id: ${error.message}`); + } else { + // console.log('Key and Price ID:', data); + } + + + + if (error || !data || data.length === 0) { + Logger.warn(`Error fetching api key: ${error.message} or data is empty`); + return { + success: false, + error: "Unauthorized: Invalid token", + status: 401, + }; + } + const internal_team_id = data[0].team_id; + team_id = internal_team_id; + + const plan = getPlanByPriceId(data[0].price_id); + // HyperDX Logging + setTrace(team_id, normalizedApi); + subscriptionData = { + team_id: team_id, + plan: plan + } + switch (mode) { + case RateLimiterMode.Crawl: + rateLimiter = getRateLimiter(RateLimiterMode.Crawl, token, subscriptionData.plan); + break; + case RateLimiterMode.Scrape: + rateLimiter = getRateLimiter(RateLimiterMode.Scrape, token, subscriptionData.plan); + break; + case RateLimiterMode.Search: + rateLimiter = getRateLimiter(RateLimiterMode.Search, token, subscriptionData.plan); + break; + case RateLimiterMode.CrawlStatus: + rateLimiter = getRateLimiter(RateLimiterMode.CrawlStatus, token); + break; + + case RateLimiterMode.Preview: + rateLimiter = getRateLimiter(RateLimiterMode.Preview, token); + break; + default: + rateLimiter = getRateLimiter(RateLimiterMode.Crawl, token); + break; + // case RateLimiterMode.Search: + // rateLimiter = await searchRateLimiter(RateLimiterMode.Search, token); + // break; + } + } + + const team_endpoint_token = token === "this_is_just_a_preview_token" ? iptoken : team_id; + + try { + await rateLimiter.consume(team_endpoint_token); + } catch (rateLimiterRes) { + Logger.error(`Rate limit exceeded: ${rateLimiterRes}`); + const secs = Math.round(rateLimiterRes.msBeforeNext / 1000) || 1; + const retryDate = new Date(Date.now() + rateLimiterRes.msBeforeNext); + + // We can only send a rate limit email every 7 days, send notification already has the date in between checking + const startDate = new Date(); + const endDate = new Date(); + endDate.setDate(endDate.getDate() + 7); + // await sendNotification(team_id, NotificationType.RATE_LIMIT_REACHED, startDate.toISOString(), endDate.toISOString()); + return { + success: false, + error: `Rate limit exceeded. Consumed points: ${rateLimiterRes.consumedPoints}, Remaining points: ${rateLimiterRes.remainingPoints}. Upgrade your plan at https://firecrawl.dev/pricing for increased rate limits or please retry after ${secs}s, resets at ${retryDate}`, + status: 429, + }; + } + + if ( + token === "this_is_just_a_preview_token" && + (mode === RateLimiterMode.Scrape || mode === RateLimiterMode.Preview || mode === RateLimiterMode.Search) + ) { + return { success: true, team_id: "preview" }; + // check the origin of the request and make sure its from firecrawl.dev + // const origin = req.headers.origin; + // if (origin && origin.includes("firecrawl.dev")){ + // return { success: true, team_id: "preview" }; + // } + // if(process.env.ENV !== "production") { + // return { success: true, team_id: "preview" }; + // } + + // return { success: false, error: "Unauthorized: Invalid token", status: 401 }; + } + + // make sure api key is valid, based on the api_keys table in supabase + if (!subscriptionData) { + normalizedApi = parseApi(token); + + const { data, error } = await supabase_service + .from("api_keys") + .select("*") + .eq("key", normalizedApi); + + + + if (error || !data || data.length === 0) { + Logger.warn(`Error fetching api key: ${error.message} or data is empty`); + return { + success: false, + error: "Unauthorized: Invalid token", + status: 401, + }; + } + + subscriptionData = data[0]; + } + + return { success: true, team_id: subscriptionData.team_id, plan: subscriptionData.plan ?? ""}; +} +function getPlanByPriceId(price_id: string) { + switch (price_id) { + case process.env.STRIPE_PRICE_ID_STARTER: + return 'starter'; + case process.env.STRIPE_PRICE_ID_STANDARD: + return 'standard'; + case process.env.STRIPE_PRICE_ID_SCALE: + return 'scale'; + case process.env.STRIPE_PRICE_ID_HOBBY: + case process.env.STRIPE_PRICE_ID_HOBBY_YEARLY: + return 'hobby'; + case process.env.STRIPE_PRICE_ID_STANDARD_NEW: + case process.env.STRIPE_PRICE_ID_STANDARD_NEW_YEARLY: + return 'standardnew'; + case process.env.STRIPE_PRICE_ID_GROWTH: + case process.env.STRIPE_PRICE_ID_GROWTH_YEARLY: + return 'growth'; + default: + return 'free'; + } +} \ No newline at end of file diff --git a/apps/api/src/controllers/v1/crawl-cancel.ts.WIP b/apps/api/src/controllers/v1/crawl-cancel.ts.WIP new file mode 100644 index 00000000..d0c109ec --- /dev/null +++ b/apps/api/src/controllers/v1/crawl-cancel.ts.WIP @@ -0,0 +1,71 @@ +import { Request, Response } from "express"; +import { authenticateUser } from "./auth"; +import { RateLimiterMode } from "../../src/types"; +import { addWebScraperJob } from "../../src/services/queue-jobs"; +import { getWebScraperQueue } from "../../src/services/queue-service"; +import { supabase_service } from "../../src/services/supabase"; +import { billTeam } from "../../src/services/billing/credit_billing"; +import { Logger } from "../../src/lib/logger"; + +export async function crawlCancelController(req: Request, res: Response) { + try { + const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true'; + + const { success, team_id, error, status } = await authenticateUser( + req, + res, + RateLimiterMode.CrawlStatus + ); + if (!success) { + return res.status(status).json({ error }); + } + const job = await getWebScraperQueue().getJob(req.params.jobId); + if (!job) { + return res.status(404).json({ error: "Job not found" }); + } + + // check if the job belongs to the team + if (useDbAuthentication) { + const { data, error: supaError } = await supabase_service + .from("bulljobs_teams") + .select("*") + .eq("job_id", req.params.jobId) + .eq("team_id", team_id); + if (supaError) { + return res.status(500).json({ error: supaError.message }); + } + + if (data.length === 0) { + return res.status(403).json({ error: "Unauthorized" }); + } + } + + const jobState = await job.getState(); + const { partialDocs } = await job.progress(); + + if (partialDocs && partialDocs.length > 0 && jobState === "active") { + Logger.info("Billing team for partial docs..."); + // Note: the credits that we will bill them here might be lower than the actual + // due to promises that are not yet resolved + await billTeam(team_id, partialDocs.length); + } + + try { + await getWebScraperQueue().client.del(job.lockKey()); + await job.takeLock(); + await job.discard(); + await job.moveToFailed(Error("Job cancelled by user"), true); + } catch (error) { + Logger.error(error); + } + + const newJobState = await job.getState(); + + res.json({ + status: "cancelled" + }); + } catch (error) { + Logger.error(error); + return res.status(500).json({ error: error.message }); + } +} diff --git a/apps/api/src/controllers/v1/crawl-status.ts b/apps/api/src/controllers/v1/crawl-status.ts new file mode 100644 index 00000000..da2bc11e --- /dev/null +++ b/apps/api/src/controllers/v1/crawl-status.ts @@ -0,0 +1,89 @@ +import { Request, Response } from "express"; +import { authenticateUser } from "./auth"; +import { RateLimiterMode } from "../../../src/types"; +import { addWebScraperJob } from "../../../src/services/queue-jobs"; +import { getWebScraperQueue } from "../../../src/services/queue-service"; +import { supabaseGetJobById } from "../../../src/lib/supabase-jobs"; +import { Logger } from "../../../src/lib/logger"; +import { v4 as uuidv4 } from "uuid"; + +export async function crawlStatusController(req: Request, res: Response) { + // TODO: validate req.params.jobId + + try { + const { success, team_id, error, status } = await authenticateUser( + req, + res, + RateLimiterMode.CrawlStatus + ); + if (!success) { + return res.status(status).json({ error }); + } + + // const job = await getWebScraperQueue().getJob(req.params.jobId); + // if (!job) { + // return res.status(404).json({ error: "Job not found" }); + // } + + // const { current, current_url, total, current_step, partialDocs } = await job.progress(); + + // let data = job.returnvalue; + // if (process.env.USE_DB_AUTHENTICATION === "true") { + // const supabaseData = await supabaseGetJobById(req.params.jobId); + + // if (supabaseData) { + // data = supabaseData.docs; + // } + // } + + // const jobStatus = await job.getState(); + + // mock: + const id = uuidv4(); + const result = { + totalCount: 100, + creditsUsed: 2, + expiresAt: new Date(Date.now() + 24 * 60 * 60 * 1000).getTime(), + status: "scraping", // scraping, completed, failed + next: `${req.protocol}://${req.get("host")}/v1/crawl/${id}`, + data: [{ + markdown: "test", + content: "test", + html: "test", + rawHtml: "test", + linksOnPage: ["test1", "test2"], + screenshot: "test", + metadata: { + title: "test", + description: "test", + language: "test", + sourceURL: "test", + statusCode: 200, + error: "test" + } + }, + { + markdown: "test", + content: "test", + html: "test", + rawHtml: "test", + linksOnPage: ["test1", "test2"], + screenshot: "test", + metadata: { + title: "test", + description: "test", + language: "test", + sourceURL: "test", + statusCode: 200, + error: "test" + } + }] + } + + res.status(200).json(result); + } catch (error) { + Logger.error(error); + return res.status(500).json({ error: error.message }); + } +} + diff --git a/apps/api/src/controllers/v1/crawl.ts b/apps/api/src/controllers/v1/crawl.ts new file mode 100644 index 00000000..a00ad7ca --- /dev/null +++ b/apps/api/src/controllers/v1/crawl.ts @@ -0,0 +1,139 @@ +import { Request, Response } from "express"; +import { WebScraperDataProvider } from "../../../src/scraper/WebScraper"; +import { billTeam } from "../../../src/services/billing/credit_billing"; +import { checkTeamCredits } from "../../../src/services/billing/credit_billing"; +import { authenticateUser } from "./auth"; +import { RateLimiterMode } from "../../../src/types"; +import { addWebScraperJob } from "../../../src/services/queue-jobs"; +import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist"; +import { logCrawl } from "../../../src/services/logging/crawl_log"; +import { validateIdempotencyKey } from "../../../src/services/idempotency/validate"; +import { createIdempotencyKey } from "../../../src/services/idempotency/create"; +import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../../src/lib/default-values"; +import { v4 as uuidv4 } from "uuid"; +import { Logger } from "../../../src/lib/logger"; +import { checkAndUpdateURL } from "../../../src/lib/validateUrl"; + +export async function crawlController(req: Request, res: Response) { + // expected req.body + + // req.body = { + // url: string + // crawlerOptions: { + // includePaths: string[] + // excludePaths: string[] + // maxDepth: number + // limit: number + // allowBackwardLinks: boolean >> TODO: CHANGE THIS NAME??? + // allowExternalLinks: boolean + // ignoreSitemap: number + // } + // scrapeOptions: Exclude + // } + + + try { + const { success, team_id, error, status } = await authenticateUser( + req, + res, + RateLimiterMode.Crawl + ); + if (!success) { + return res.status(status).json({ error }); + } + + if (req.headers["x-idempotency-key"]) { + const isIdempotencyValid = await validateIdempotencyKey(req); + if (!isIdempotencyValid) { + return res.status(409).json({ error: "Idempotency key already used" }); + } + try { + createIdempotencyKey(req); + } catch (error) { + Logger.error(error); + return res.status(500).json({ error: error.message }); + } + } + + const { success: creditsCheckSuccess, message: creditsCheckMessage } = + await checkTeamCredits(team_id, 1); + if (!creditsCheckSuccess) { + return res.status(402).json({ error: "Insufficient credits" }); + } + + let url = req.body.url; + if (!url) { + return res.status(400).json({ error: "Url is required" }); + } + + if (isUrlBlocked(url)) { + return res + .status(403) + .json({ + error: + "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", + }); + } + + try { + url = checkAndUpdateURL(url); + } catch (error) { + return res.status(400).json({ error: 'Invalid Url' }); + } + + // TODO: add job to queue + + const id = uuidv4(); + return res.status(200).json({ jobId: id, url: `${req.protocol}://${req.get('host')}/v1/crawl/${id}` }); + + // const mode = req.body.mode ?? "crawl"; + + // const crawlerOptions = { ...defaultCrawlerOptions, ...req.body.crawlerOptions }; + // const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions }; + + // if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this? + // try { + // const a = new WebScraperDataProvider(); + // await a.setOptions({ + // jobId: uuidv4(), + // mode: "single_urls", + // urls: [url], + // crawlerOptions: { ...crawlerOptions, returnOnlyUrls: true }, + // pageOptions: pageOptions, + // }); + + // const docs = await a.getDocuments(false, (progress) => { + // job.progress({ + // current: progress.current, + // total: progress.total, + // current_step: "SCRAPING", + // current_url: progress.currentDocumentUrl, + // }); + // }); + // return res.json({ + // success: true, + // documents: docs, + // }); + // } catch (error) { + // Logger.error(error); + // return res.status(500).json({ error: error.message }); + // } + // } + + // const job = await addWebScraperJob({ + // url: url, + // mode: mode ?? "crawl", // fix for single urls not working + // crawlerOptions: crawlerOptions, + // team_id: team_id, + // pageOptions: pageOptions, + // origin: req.body.origin ?? defaultOrigin, + // }); + + // await logCrawl(job.id.toString(), team_id); + + // res.json({ jobId: job.id }); + } catch (error) { + Logger.error(error); + return res.status(500).json({ error: error.message }); + } +} diff --git a/apps/api/src/controllers/v1/crawlPreview.ts.WIP b/apps/api/src/controllers/v1/crawlPreview.ts.WIP new file mode 100644 index 00000000..7c5c804d --- /dev/null +++ b/apps/api/src/controllers/v1/crawlPreview.ts.WIP @@ -0,0 +1,46 @@ +import { Request, Response } from "express"; +import { authenticateUser } from "./auth"; +import { RateLimiterMode } from "../../src/types"; +import { addWebScraperJob } from "../../src/services/queue-jobs"; +import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist"; +import { Logger } from "../../src/lib/logger"; + +export async function crawlPreviewController(req: Request, res: Response) { + try { + const { success, team_id, error, status } = await authenticateUser( + req, + res, + RateLimiterMode.Preview + ); + if (!success) { + return res.status(status).json({ error }); + } + // authenticate on supabase + const url = req.body.url; + if (!url) { + return res.status(400).json({ error: "Url is required" }); + } + + if (isUrlBlocked(url)) { + return res.status(403).json({ error: "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." }); + } + + const mode = req.body.mode ?? "crawl"; + const crawlerOptions = req.body.crawlerOptions ?? {}; + const pageOptions = req.body.pageOptions ?? { onlyMainContent: false, includeHtml: false, removeTags: [] }; + + const job = await addWebScraperJob({ + url: url, + mode: mode ?? "crawl", // fix for single urls not working + crawlerOptions: { ...crawlerOptions, limit: 5, maxCrawledLinks: 5 }, + team_id: "preview", + pageOptions: pageOptions, + origin: "website-preview", + }); + + res.json({ jobId: job.id }); + } catch (error) { + Logger.error(error); + return res.status(500).json({ error: error.message }); + } +} diff --git a/apps/api/src/controllers/v1/keyAuth.ts.WIP b/apps/api/src/controllers/v1/keyAuth.ts.WIP new file mode 100644 index 00000000..351edd18 --- /dev/null +++ b/apps/api/src/controllers/v1/keyAuth.ts.WIP @@ -0,0 +1,24 @@ + +import { AuthResponse, RateLimiterMode } from "../types"; + +import { Request, Response } from "express"; +import { authenticateUser } from "./auth"; + + +export const keyAuthController = async (req: Request, res: Response) => { + try { + // make sure to authenticate user first, Bearer + const { success, team_id, error, status } = await authenticateUser( + req, + res + ); + if (!success) { + return res.status(status).json({ error }); + } + // if success, return success: true + return res.status(200).json({ success: true }); + } catch (error) { + return res.status(500).json({ error: error.message }); + } +}; + diff --git a/apps/api/src/controllers/v1/liveness.ts.WIP b/apps/api/src/controllers/v1/liveness.ts.WIP new file mode 100644 index 00000000..8ff1a96f --- /dev/null +++ b/apps/api/src/controllers/v1/liveness.ts.WIP @@ -0,0 +1,6 @@ +import { Request, Response } from "express"; + +export async function livenessController(req: Request, res: Response) { + //TODO: add checks if the application is live and healthy like checking the redis connection + res.status(200).json({ status: "ok" }); +} diff --git a/apps/api/src/controllers/v1/map.ts b/apps/api/src/controllers/v1/map.ts new file mode 100644 index 00000000..391b8a10 --- /dev/null +++ b/apps/api/src/controllers/v1/map.ts @@ -0,0 +1,128 @@ +import { Request, Response } from "express"; +import { WebScraperDataProvider } from "../../../src/scraper/WebScraper"; +import { billTeam } from "../../../src/services/billing/credit_billing"; +import { checkTeamCredits } from "../../../src/services/billing/credit_billing"; +import { authenticateUser } from "./auth"; +import { RateLimiterMode } from "../../../src/types"; +import { addWebScraperJob } from "../../../src/services/queue-jobs"; +import { isUrlBlocked } from "../../../src/scraper/WebScraper/utils/blocklist"; +import { logCrawl } from "../../../src/services/logging/crawl_log"; +import { validateIdempotencyKey } from "../../../src/services/idempotency/validate"; +import { createIdempotencyKey } from "../../../src/services/idempotency/create"; +import { defaultCrawlPageOptions, defaultCrawlerOptions, defaultOrigin } from "../../../src/lib/default-values"; +import { v4 as uuidv4 } from "uuid"; +import { Logger } from "../../../src/lib/logger"; +import { checkAndUpdateURL } from "../../../src/lib/validateUrl"; + +export async function mapController(req: Request, res: Response) { + // expected req.body + + // req.body = { + // url: string + // ignoreSitemap: true?? + // other crawler options? + // } + + + try { + const { success, team_id, error, status } = await authenticateUser( + req, + res, + RateLimiterMode.Crawl + ); + if (!success) { + return res.status(status).json({ error }); + } + + // if (req.headers["x-idempotency-key"]) { + // const isIdempotencyValid = await validateIdempotencyKey(req); + // if (!isIdempotencyValid) { + // return res.status(409).json({ error: "Idempotency key already used" }); + // } + // try { + // createIdempotencyKey(req); + // } catch (error) { + // Logger.error(error); + // return res.status(500).json({ error: error.message }); + // } + // } + + // const { success: creditsCheckSuccess, message: creditsCheckMessage } = + // await checkTeamCredits(team_id, 1); + // if (!creditsCheckSuccess) { + // return res.status(402).json({ error: "Insufficient credits" }); + // } + + let url = req.body.url; + if (!url) { + return res.status(400).json({ error: "Url is required" }); + } + + if (isUrlBlocked(url)) { + return res + .status(403) + .json({ + error: + "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", + }); + } + + try { + url = checkAndUpdateURL(url); + } catch (error) { + return res.status(400).json({ error: 'Invalid Url' }); + } + + return res.status(200).json({ urls: [ "test1", "test2" ] }); + + // const mode = req.body.mode ?? "crawl"; + + // const crawlerOptions = { ...defaultCrawlerOptions, ...req.body.crawlerOptions }; + // const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions }; + + // if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this? + // try { + // const a = new WebScraperDataProvider(); + // await a.setOptions({ + // jobId: uuidv4(), + // mode: "single_urls", + // urls: [url], + // crawlerOptions: { ...crawlerOptions, returnOnlyUrls: true }, + // pageOptions: pageOptions, + // }); + + // const docs = await a.getDocuments(false, (progress) => { + // job.progress({ + // current: progress.current, + // total: progress.total, + // current_step: "SCRAPING", + // current_url: progress.currentDocumentUrl, + // }); + // }); + // return res.json({ + // success: true, + // documents: docs, + // }); + // } catch (error) { + // Logger.error(error); + // return res.status(500).json({ error: error.message }); + // } + // } + + // const job = await addWebScraperJob({ + // url: url, + // mode: mode ?? "crawl", // fix for single urls not working + // crawlerOptions: crawlerOptions, + // team_id: team_id, + // pageOptions: pageOptions, + // origin: req.body.origin ?? defaultOrigin, + // }); + + // await logCrawl(job.id.toString(), team_id); + + // res.json({ jobId: job.id }); + } catch (error) { + Logger.error(error); + return res.status(500).json({ error: error.message }); + } +} diff --git a/apps/api/src/controllers/v1/readiness.ts.WIP b/apps/api/src/controllers/v1/readiness.ts.WIP new file mode 100644 index 00000000..cdb1f02c --- /dev/null +++ b/apps/api/src/controllers/v1/readiness.ts.WIP @@ -0,0 +1,6 @@ +import { Request, Response } from "express"; + +export async function readinessController(req: Request, res: Response) { + // TODO: add checks when the application is ready to serve traffic + res.status(200).json({ status: "ok" }); +} diff --git a/apps/api/src/controllers/v1/scrape.ts b/apps/api/src/controllers/v1/scrape.ts new file mode 100644 index 00000000..bf529ad2 --- /dev/null +++ b/apps/api/src/controllers/v1/scrape.ts @@ -0,0 +1,253 @@ +// import { ExtractorOptions, PageOptions } from './../../lib/entities'; +import { Request, Response } from "express"; +// import { WebScraperDataProvider } from "../../scraper/WebScraper"; +// import { billTeam, checkTeamCredits } from "../../services/billing/credit_billing"; +import { authenticateUser } from "./auth"; +import { RateLimiterMode } from "../../types"; +// import { logJob } from "../../services/logging/log_job"; +// import { Document } from "../../lib/entities"; +import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; // Import the isUrlBlocked function +// import { numTokensFromString } from '../../lib/LLM-extraction/helpers'; +// import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../../../src/lib/default-values'; +// import { v4 as uuidv4 } from "uuid"; +import { Logger } from '../../lib/logger'; +import { checkAndUpdateURL } from '../../lib/validateUrl'; + +export async function scrapeController(req: Request, res: Response) { + let url = req.body.url; + if (!url) { + return { success: false, error: "Url is required", returnCode: 400 }; + } + + if (isUrlBlocked(url)) { + return { success: false, error: "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", returnCode: 403 }; + } + + try { + url = checkAndUpdateURL(url); + } catch (error) { + return { success: false, error: "Invalid URL", returnCode: 400 }; + } + + // TODO: check req.body + // mockup req.body + // req.body = { + // url: "test", + // headers: { + // "x-key": "test" + // }, + // formats: ["markdown", "html", "rawHtml", "content", "linksOnPage", "screenshot", "fullPageScreenshot"], + // includeTags: ["test"], + // excludeTags: ["test"], + // onlyMainContent: false, + // timeout: 30000, + // waitFor: number + // } + + try { + let earlyReturn = false; + // make sure to authenticate user first, Bearer + const { success, team_id, error, status, plan } = await authenticateUser( + req, + res, + RateLimiterMode.Scrape + ); + if (!success) { + return res.status(status).json({ error }); + } + + // check credits + + const result = { + success: true, + warning: "test", + data: { + markdown: "test", + content: "test", + html: "test", + rawHtml: "test", + linksOnPage: ["test1", "test2"], + screenshot: "test", + metadata: { + title: "test", + description: "test", + language: "test", + sourceURL: "test", + statusCode: 200, + error: "test" + } + } + } + + return res.status(200).json(result); + + // const crawlerOptions = req.body.crawlerOptions ?? {}; + // const pageOptions = { ...defaultPageOptions, ...req.body.pageOptions }; + // const extractorOptions = { ...defaultExtractorOptions, ...req.body.extractorOptions }; + // const origin = req.body.origin ?? defaultOrigin; + // let timeout = req.body.timeout ?? defaultTimeout; + + // if (extractorOptions.mode.includes("llm-extraction")) { + // pageOptions.onlyMainContent = true; + // timeout = req.body.timeout ?? 90000; + // } + + // const checkCredits = async () => { + // try { + // const { success: creditsCheckSuccess, message: creditsCheckMessage } = await checkTeamCredits(team_id, 1); + // if (!creditsCheckSuccess) { + // earlyReturn = true; + // return res.status(402).json({ error: "Insufficient credits" }); + // } + // } catch (error) { + // Logger.error(error); + // earlyReturn = true; + // return res.status(500).json({ error: "Error checking team credits. Please contact hello@firecrawl.com for help." }); + // } + // }; + + + // await checkCredits(); + + // const jobId = uuidv4(); + + // const startTime = new Date().getTime(); + // const result = await scrapeHelper( + // jobId, + // req, + // team_id, + // crawlerOptions, + // pageOptions, + // extractorOptions, + // timeout, + // plan + // ); + // const endTime = new Date().getTime(); + // const timeTakenInSeconds = (endTime - startTime) / 1000; + // const numTokens = (result.data && result.data.markdown) ? numTokensFromString(result.data.markdown, "gpt-3.5-turbo") : 0; + + // if (result.success) { + // let creditsToBeBilled = 1; // Assuming 1 credit per document + // const creditsPerLLMExtract = 50; + + // if (extractorOptions.mode.includes("llm-extraction")) { + // // creditsToBeBilled = creditsToBeBilled + (creditsPerLLMExtract * filteredDocs.length); + // creditsToBeBilled += creditsPerLLMExtract; + // } + + // let startTimeBilling = new Date().getTime(); + + // if (earlyReturn) { + // // Don't bill if we're early returning + // return; + // } + // const billingResult = await billTeam( + // team_id, + // creditsToBeBilled + // ); + // if (!billingResult.success) { + // return res.status(402).json({ + // success: false, + // error: "Failed to bill team. Insufficient credits or subscription not found.", + // }); + // } + // } + + // logJob({ + // job_id: jobId, + // success: result.success, + // message: result.error, + // num_docs: 1, + // docs: [result.data], + // time_taken: timeTakenInSeconds, + // team_id: team_id, + // mode: "scrape", + // url: req.body.url, + // crawlerOptions: crawlerOptions, + // pageOptions: pageOptions, + // origin: origin, + // extractor_options: extractorOptions, + // num_tokens: numTokens, + // }); + + + // return res.status(result.returnCode).json(result); + } catch (error) { + Logger.error(error); + return res.status(500).json({ error: error.message }); + } +} + + +// export async function scrapeHelper( +// jobId: string, +// req: Request, +// team_id: string, +// crawlerOptions: any, +// pageOptions: PageOptions, +// extractorOptions: ExtractorOptions, +// timeout: number, +// plan?: string +// ): Promise<{ +// success: boolean; +// error?: string; +// data?: Document; +// returnCode: number; +// }> { + + // const url = req.body.url; + // if (!url) { + // return { success: false, error: "Url is required", returnCode: 400 }; + // } + + // if (isUrlBlocked(url)) { + // return { success: false, error: "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", returnCode: 403 }; + // } + + // const a = new WebScraperDataProvider(); + // await a.setOptions({ + // jobId, + // mode: "single_urls", + // urls: [url], + // crawlerOptions: { + // ...crawlerOptions, + // }, + // pageOptions: pageOptions, + // extractorOptions: extractorOptions, + // }); + + // const timeoutPromise = new Promise<{ success: boolean; error?: string; returnCode: number }>((_, reject) => + // setTimeout(() => reject({ success: false, error: "Request timed out. Increase the timeout by passing `timeout` param to the request.", returnCode: 408 }), timeout) + // ); + + // const docsPromise = a.getDocuments(false); + + // let docs; + // try { + // docs = await Promise.race([docsPromise, timeoutPromise]); + // } catch (error) { + // return error; + // } + + // // make sure doc.content is not empty + // let filteredDocs = docs.filter( + // (doc: { content?: string }) => doc.content && doc.content.trim().length > 0 + // ); + // if (filteredDocs.length === 0) { + // return { success: true, error: "No page found", returnCode: 200, data: docs[0] }; + // } + + + // // Remove rawHtml if pageOptions.rawHtml is false and extractorOptions.mode is llm-extraction-from-raw-html + // if (!pageOptions.includeRawHtml && extractorOptions.mode == "llm-extraction-from-raw-html") { + // filteredDocs.forEach(doc => { + // delete doc.rawHtml; + // }); + // } + + // return { + // success: true, + // data: filteredDocs[0], + // returnCode: 200, + // }; +// } \ No newline at end of file diff --git a/apps/api/src/controllers/v1/search.ts.WIP b/apps/api/src/controllers/v1/search.ts.WIP new file mode 100644 index 00000000..dfd9b8b9 --- /dev/null +++ b/apps/api/src/controllers/v1/search.ts.WIP @@ -0,0 +1,197 @@ +import { Request, Response } from "express"; +import { WebScraperDataProvider } from "../scraper/WebScraper"; +import { billTeam, checkTeamCredits } from "../services/billing/credit_billing"; +import { authenticateUser } from "./auth"; +import { RateLimiterMode } from "../types"; +import { logJob } from "../services/logging/log_job"; +import { PageOptions, SearchOptions } from "../lib/entities"; +import { search } from "../search"; +import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; +import { v4 as uuidv4 } from "uuid"; +import { Logger } from "../lib/logger"; + +export async function searchHelper( + jobId: string, + req: Request, + team_id: string, + crawlerOptions: any, + pageOptions: PageOptions, + searchOptions: SearchOptions, +): Promise<{ + success: boolean; + error?: string; + data?: any; + returnCode: number; +}> { + const query = req.body.query; + const advanced = false; + if (!query) { + return { success: false, error: "Query is required", returnCode: 400 }; + } + + const tbs = searchOptions.tbs ?? null; + const filter = searchOptions.filter ?? null; + const num_results = searchOptions.limit ?? 7; + const num_results_buffer = Math.floor(num_results * 1.5); + + let res = await search({ + query: query, + advanced: advanced, + num_results: num_results_buffer, + tbs: tbs, + filter: filter, + lang: searchOptions.lang ?? "en", + country: searchOptions.country ?? "us", + location: searchOptions.location, + }); + + let justSearch = pageOptions.fetchPageContent === false; + + + if (justSearch) { + const billingResult = await billTeam( + team_id, + res.length + ); + if (!billingResult.success) { + return { + success: false, + error: + "Failed to bill team. Insufficient credits or subscription not found.", + returnCode: 402, + }; + } + return { success: true, data: res, returnCode: 200 }; + } + + res = res.filter((r) => !isUrlBlocked(r.url)); + if (res.length > num_results) { + res = res.slice(0, num_results); + } + + if (res.length === 0) { + return { success: true, error: "No search results found", returnCode: 200 }; + } + + // filter out social media links + + + const a = new WebScraperDataProvider(); + await a.setOptions({ + jobId, + mode: "single_urls", + urls: res.map((r) => r.url).slice(0, searchOptions.limit ?? 7), + crawlerOptions: { + ...crawlerOptions, + }, + pageOptions: { + ...pageOptions, + onlyMainContent: pageOptions?.onlyMainContent ?? true, + fetchPageContent: pageOptions?.fetchPageContent ?? true, + includeHtml: pageOptions?.includeHtml ?? false, + removeTags: pageOptions?.removeTags ?? [], + fallback: false, + }, + }); + + const docs = await a.getDocuments(false); + + if (docs.length === 0) { + return { success: true, error: "No search results found", returnCode: 200 }; + } + + // make sure doc.content is not empty + const filteredDocs = docs.filter( + (doc: { content?: string }) => doc.content && doc.content.trim().length > 0 + ); + + if (filteredDocs.length === 0) { + return { success: true, error: "No page found", returnCode: 200, data: docs }; + } + + const billingResult = await billTeam( + team_id, + filteredDocs.length + ); + if (!billingResult.success) { + return { + success: false, + error: + "Failed to bill team. Insufficient credits or subscription not found.", + returnCode: 402, + }; + } + + return { + success: true, + data: filteredDocs, + returnCode: 200, + }; +} + +export async function searchController(req: Request, res: Response) { + try { + // make sure to authenticate user first, Bearer + const { success, team_id, error, status } = await authenticateUser( + req, + res, + RateLimiterMode.Search + ); + if (!success) { + return res.status(status).json({ error }); + } + const crawlerOptions = req.body.crawlerOptions ?? {}; + const pageOptions = req.body.pageOptions ?? { + includeHtml: false, + onlyMainContent: true, + fetchPageContent: true, + removeTags: [], + fallback: false, + }; + const origin = req.body.origin ?? "api"; + + const searchOptions = req.body.searchOptions ?? { limit: 7 }; + + const jobId = uuidv4(); + + try { + const { success: creditsCheckSuccess, message: creditsCheckMessage } = + await checkTeamCredits(team_id, 1); + if (!creditsCheckSuccess) { + return res.status(402).json({ error: "Insufficient credits" }); + } + } catch (error) { + Logger.error(error); + return res.status(500).json({ error: "Internal server error" }); + } + const startTime = new Date().getTime(); + const result = await searchHelper( + jobId, + req, + team_id, + crawlerOptions, + pageOptions, + searchOptions, + ); + const endTime = new Date().getTime(); + const timeTakenInSeconds = (endTime - startTime) / 1000; + logJob({ + job_id: jobId, + success: result.success, + message: result.error, + num_docs: result.data ? result.data.length : 0, + docs: result.data, + time_taken: timeTakenInSeconds, + team_id: team_id, + mode: "search", + url: req.body.query, + crawlerOptions: crawlerOptions, + pageOptions: pageOptions, + origin: origin, + }); + return res.status(result.returnCode).json(result); + } catch (error) { + Logger.error(error); + return res.status(500).json({ error: error.message }); + } +} diff --git a/apps/api/src/controllers/v1/status.ts.WIP b/apps/api/src/controllers/v1/status.ts.WIP new file mode 100644 index 00000000..3d7fccbb --- /dev/null +++ b/apps/api/src/controllers/v1/status.ts.WIP @@ -0,0 +1,42 @@ +import { Request, Response } from "express"; +import { getWebScraperQueue } from "../../src/services/queue-service"; +import { supabaseGetJobById } from "../../src/lib/supabase-jobs"; +import { Logger } from "../../src/lib/logger"; + +export async function crawlJobStatusPreviewController(req: Request, res: Response) { + try { + const job = await getWebScraperQueue().getJob(req.params.jobId); + if (!job) { + return res.status(404).json({ error: "Job not found" }); + } + + const { current, current_url, total, current_step, partialDocs } = await job.progress(); + let data = job.returnvalue; + if (process.env.USE_DB_AUTHENTICATION === "true") { + const supabaseData = await supabaseGetJobById(req.params.jobId); + + if (supabaseData) { + data = supabaseData.docs; + } + } + + let jobStatus = await job.getState(); + if (jobStatus === 'waiting' || jobStatus === 'stuck') { + jobStatus = 'active'; + } + + res.json({ + status: jobStatus, + // progress: job.progress(), + current, + current_url, + current_step, + total, + data: data ? data : null, + partial_data: jobStatus == 'completed' ? [] : partialDocs, + }); + } catch (error) { + Logger.error(error); + return res.status(500).json({ error: error.message }); + } +} diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 6550a2fd..b7e7af04 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -13,6 +13,7 @@ import { ScrapeEvents } from "./lib/scrape-events"; import http from 'node:http'; import https from 'node:https'; import CacheableLookup from 'cacheable-lookup'; +import { v1Router } from "./routes/v1"; const { createBullBoard } = require("@bull-board/api"); const { BullAdapter } = require("@bull-board/api/bullAdapter"); @@ -78,6 +79,7 @@ if (cluster.isMaster) { // register router app.use(v0Router); + app.use(v1Router); app.use(adminRouter); const DEFAULT_PORT = process.env.PORT ?? 3002; diff --git a/apps/api/src/lib/validateUrl.ts b/apps/api/src/lib/validateUrl.ts new file mode 100644 index 00000000..b5f30434 --- /dev/null +++ b/apps/api/src/lib/validateUrl.ts @@ -0,0 +1,38 @@ + +const protocolIncluded = (url: string) => { + // if :// not in the start of the url assume http (maybe https?) + // regex checks if :// appears before any . + return(/^([^.:]+:\/\/)/.test(url)); +} + +const getURLobj = (s: string) => { + // URL fails if we dont include the protocol ie google.com + let error = false; + let urlObj = {}; + try { + urlObj = new URL(s); + } catch (err) { + error = true; + } + return { error, urlObj }; +}; + +export const checkAndUpdateURL = (url: string) => { + + if (!protocolIncluded(url)) { + url = `http://${url}`; + } + + const { error, urlObj } = getURLobj(url); + if (error) { + throw new Error("Invalid URL"); + } + + const typedUrlObj = urlObj as URL; + + if(typedUrlObj.protocol !== "http:" && typedUrlObj.protocol !== "https:") { + throw new Error("Invalid URL"); + } + + return { urlObj: typedUrlObj, url: url }; +} diff --git a/apps/api/src/routes/v1.ts b/apps/api/src/routes/v1.ts new file mode 100644 index 00000000..5099fee9 --- /dev/null +++ b/apps/api/src/routes/v1.ts @@ -0,0 +1,34 @@ +import express from "express"; +import { crawlController } from "../../src/controllers/v1/crawl"; +// import { crawlStatusController } from "../../src/controllers/v1/crawl-status"; +import { scrapeController } from "../../src/controllers/v1/scrape"; +import { crawlStatusController } from "../../src/controllers/v1/crawl-status"; +import { mapController } from "../../src/controllers/v1/map"; +// import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview"; +// import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status"; +// import { searchController } from "../../src/controllers/v1/search"; +// import { crawlCancelController } from "../../src/controllers/v1/crawl-cancel"; +// import { keyAuthController } from "../../src/controllers/v1/keyAuth"; +// import { livenessController } from "../controllers/v1/liveness"; +// import { readinessController } from "../controllers/v1/readiness"; + +export const v1Router = express.Router(); + +v1Router.post("/v1/scrape", scrapeController); +v1Router.post("/v1/crawl", crawlController); +v1Router.get("/v1/crawl/:jobId", crawlStatusController); +// v1Router.post("/v1/crawlWebsitePreview", crawlPreviewController); +// v1Router.delete("/v1/crawl/cancel/:jobId", crawlCancelController); +// v1Router.get("/v1/checkJobStatus/:jobId", crawlJobStatusPreviewController); + +// // Auth route for key based authentication +// v1Router.get("/v1/keyAuth", keyAuthController); + +// // Search routes +// v0Router.post("/v1/search", searchController); + +// Health/Probe routes +// v1Router.get("/v1/health/liveness", livenessController); +// v1Router.get("/v1/health/readiness", readinessController); + +v1Router.post("/v1/map", mapController); \ No newline at end of file