This commit is contained in:
rafaelsideguide
2024-09-10 09:09:43 -03:00
6 changed files with 38 additions and 27 deletions
+8 -1
View File
@@ -30,7 +30,14 @@ export const url = z.preprocess(
"URL must have a valid top-level domain or be a valid path" "URL must have a valid top-level domain or be a valid path"
) )
.refine( .refine(
(x) => checkUrl(x as string), (x) => {
try {
checkUrl(x as string)
return true;
} catch (_) {
return false;
}
},
"Invalid URL" "Invalid URL"
) )
.refine( .refine(
+4
View File
@@ -201,6 +201,10 @@ if (cluster.isMaster) {
Sentry.setupExpressErrorHandler(app); Sentry.setupExpressErrorHandler(app);
app.use((err: unknown, req: Request<{}, ErrorResponse, undefined>, res: ResponseWithSentry<ErrorResponse>, next: NextFunction) => { app.use((err: unknown, req: Request<{}, ErrorResponse, undefined>, res: ResponseWithSentry<ErrorResponse>, next: NextFunction) => {
if (err instanceof SyntaxError && 'status' in err && err.status === 400 && 'body' in err) {
return res.status(400).json({ success: false, error: 'Bad request, malformed JSON' });
}
const id = res.sentry ?? uuidv4(); const id = res.sentry ?? uuidv4();
let verbose = JSON.stringify(err); let verbose = JSON.stringify(err);
if (verbose === "{}") { if (verbose === "{}") {
+1 -1
View File
@@ -83,7 +83,7 @@ function idempotencyMiddleware(req: Request, res: Response, next: NextFunction)
} }
function blocklistMiddleware(req: Request, res: Response, next: NextFunction) { function blocklistMiddleware(req: Request, res: Response, next: NextFunction) {
if (req.body.url && isUrlBlocked(req.body.url)) { if (typeof req.body.url === "string" && isUrlBlocked(req.body.url)) {
if (!res.headersSent) { if (!res.headersSent) {
return res.status(403).json({ success: false, error: "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." }); return res.status(403).json({ success: false, error: "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." });
} }
@@ -55,7 +55,7 @@ export async function scrapWithFireEngine({
try { try {
const reqParams = await generateRequestParams(url); const reqParams = await generateRequestParams(url);
let waitParam = reqParams["params"]?.wait ?? waitFor; let waitParam = reqParams["params"]?.wait ?? waitFor;
let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "playwright"; let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "chrome-cdp";
let screenshotParam = reqParams["params"]?.screenshot ?? screenshot; let screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
let fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot; let fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot;
let fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions; let fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;
@@ -96,15 +96,15 @@ function getScrapingFallbackOrder(
"fetch", "fetch",
].filter(Boolean); ].filter(Boolean);
if (isWaitPresent || isScreenshotPresent || isHeadersPresent) { // if (isWaitPresent || isScreenshotPresent || isHeadersPresent) {
defaultOrder = [ // defaultOrder = [
"fire-engine", // "fire-engine",
useFireEngine ? undefined : "playwright", // useFireEngine ? undefined : "playwright",
...defaultOrder.filter( // ...defaultOrder.filter(
(scraper) => scraper !== "fire-engine" && scraper !== "playwright" // (scraper) => scraper !== "fire-engine" && scraper !== "playwright"
), // ),
].filter(Boolean); // ].filter(Boolean);
} // }
const filteredDefaultOrder = defaultOrder.filter( const filteredDefaultOrder = defaultOrder.filter(
(scraper: (typeof baseScrapers)[number]) => (scraper: (typeof baseScrapers)[number]) =>
+8 -8
View File
@@ -6,7 +6,7 @@ const RATE_LIMITS = {
crawl: { crawl: {
default: 3, default: 3,
free: 2, free: 2,
starter: 3, starter: 10,
standard: 5, standard: 5,
standardOld: 40, standardOld: 40,
scale: 50, scale: 50,
@@ -19,9 +19,9 @@ const RATE_LIMITS = {
scrape: { scrape: {
default: 20, default: 20,
free: 10, free: 10,
starter: 20, starter: 100,
standard: 100, standard: 100,
standardOld: 40, standardOld: 100,
scale: 500, scale: 500,
hobby: 20, hobby: 20,
standardNew: 100, standardNew: 100,
@@ -32,8 +32,8 @@ const RATE_LIMITS = {
search: { search: {
default: 20, default: 20,
free: 5, free: 5,
starter: 20, starter: 50,
standard: 40, standard: 50,
standardOld: 40, standardOld: 40,
scale: 500, scale: 500,
hobby: 10, hobby: 10,
@@ -45,9 +45,9 @@ const RATE_LIMITS = {
map:{ map:{
default: 20, default: 20,
free: 5, free: 5,
starter: 20, starter: 50,
standard: 40, standard: 50,
standardOld: 40, standardOld: 50,
scale: 500, scale: 500,
hobby: 10, hobby: 10,
standardNew: 50, standardNew: 50,