[Feat] Added blocklist for social media urls

This commit is contained in:
rafaelsideguide
2024-04-23 18:50:35 -03:00
parent b7f6b9be13
commit 849c0b6ebf
6 changed files with 101 additions and 0 deletions
+6
View File
@@ -2,6 +2,7 @@ import { Request, Response } from "express";
import { authenticateUser } from "./auth";
import { RateLimiterMode } from "../../src/types";
import { addWebScraperJob } from "../../src/services/queue-jobs";
import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist";
export async function crawlPreviewController(req: Request, res: Response) {
try {
@@ -18,6 +19,11 @@ export async function crawlPreviewController(req: Request, res: Response) {
if (!url) {
return res.status(400).json({ error: "Url is required" });
}
if (isUrlBlocked(url)) {
return res.status(403).json({ error: "URL is blocked due to policy restrictions" });
}
const mode = req.body.mode ?? "crawl";
const crawlerOptions = req.body.crawlerOptions ?? {};
const pageOptions = req.body.pageOptions ?? { onlyMainContent: false };