Merge branch 'main' of https://github.com/mendableai/firecrawl
This commit is contained in:
@@ -30,7 +30,14 @@ export const url = z.preprocess(
|
|||||||
"URL must have a valid top-level domain or be a valid path"
|
"URL must have a valid top-level domain or be a valid path"
|
||||||
)
|
)
|
||||||
.refine(
|
.refine(
|
||||||
(x) => checkUrl(x as string),
|
(x) => {
|
||||||
|
try {
|
||||||
|
checkUrl(x as string)
|
||||||
|
return true;
|
||||||
|
} catch (_) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
},
|
||||||
"Invalid URL"
|
"Invalid URL"
|
||||||
)
|
)
|
||||||
.refine(
|
.refine(
|
||||||
|
|||||||
+11
-7
@@ -201,16 +201,20 @@ if (cluster.isMaster) {
|
|||||||
Sentry.setupExpressErrorHandler(app);
|
Sentry.setupExpressErrorHandler(app);
|
||||||
|
|
||||||
app.use((err: unknown, req: Request<{}, ErrorResponse, undefined>, res: ResponseWithSentry<ErrorResponse>, next: NextFunction) => {
|
app.use((err: unknown, req: Request<{}, ErrorResponse, undefined>, res: ResponseWithSentry<ErrorResponse>, next: NextFunction) => {
|
||||||
|
if (err instanceof SyntaxError && 'status' in err && err.status === 400 && 'body' in err) {
|
||||||
|
return res.status(400).json({ success: false, error: 'Bad request, malformed JSON' });
|
||||||
|
}
|
||||||
|
|
||||||
const id = res.sentry ?? uuidv4();
|
const id = res.sentry ?? uuidv4();
|
||||||
let verbose = JSON.stringify(err);
|
let verbose = JSON.stringify(err);
|
||||||
if (verbose === "{}") {
|
if (verbose === "{}") {
|
||||||
if (err instanceof Error) {
|
if (err instanceof Error) {
|
||||||
verbose = JSON.stringify({
|
verbose = JSON.stringify({
|
||||||
message: err.message,
|
message: err.message,
|
||||||
name: err.name,
|
name: err.name,
|
||||||
stack: err.stack,
|
stack: err.stack,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Logger.error("Error occurred in request! (" + req.path + ") -- ID " + id + " -- " + verbose);
|
Logger.error("Error occurred in request! (" + req.path + ") -- ID " + id + " -- " + verbose);
|
||||||
|
|||||||
@@ -83,7 +83,7 @@ function idempotencyMiddleware(req: Request, res: Response, next: NextFunction)
|
|||||||
}
|
}
|
||||||
|
|
||||||
function blocklistMiddleware(req: Request, res: Response, next: NextFunction) {
|
function blocklistMiddleware(req: Request, res: Response, next: NextFunction) {
|
||||||
if (req.body.url && isUrlBlocked(req.body.url)) {
|
if (typeof req.body.url === "string" && isUrlBlocked(req.body.url)) {
|
||||||
if (!res.headersSent) {
|
if (!res.headersSent) {
|
||||||
return res.status(403).json({ success: false, error: "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." });
|
return res.status(403).json({ success: false, error: "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." });
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ export async function scrapWithFireEngine({
|
|||||||
try {
|
try {
|
||||||
const reqParams = await generateRequestParams(url);
|
const reqParams = await generateRequestParams(url);
|
||||||
let waitParam = reqParams["params"]?.wait ?? waitFor;
|
let waitParam = reqParams["params"]?.wait ?? waitFor;
|
||||||
let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "playwright";
|
let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "chrome-cdp";
|
||||||
let screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
|
let screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
|
||||||
let fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot;
|
let fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot;
|
||||||
let fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;
|
let fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;
|
||||||
|
|||||||
@@ -96,15 +96,15 @@ function getScrapingFallbackOrder(
|
|||||||
"fetch",
|
"fetch",
|
||||||
].filter(Boolean);
|
].filter(Boolean);
|
||||||
|
|
||||||
if (isWaitPresent || isScreenshotPresent || isHeadersPresent) {
|
// if (isWaitPresent || isScreenshotPresent || isHeadersPresent) {
|
||||||
defaultOrder = [
|
// defaultOrder = [
|
||||||
"fire-engine",
|
// "fire-engine",
|
||||||
useFireEngine ? undefined : "playwright",
|
// useFireEngine ? undefined : "playwright",
|
||||||
...defaultOrder.filter(
|
// ...defaultOrder.filter(
|
||||||
(scraper) => scraper !== "fire-engine" && scraper !== "playwright"
|
// (scraper) => scraper !== "fire-engine" && scraper !== "playwright"
|
||||||
),
|
// ),
|
||||||
].filter(Boolean);
|
// ].filter(Boolean);
|
||||||
}
|
// }
|
||||||
|
|
||||||
const filteredDefaultOrder = defaultOrder.filter(
|
const filteredDefaultOrder = defaultOrder.filter(
|
||||||
(scraper: (typeof baseScrapers)[number]) =>
|
(scraper: (typeof baseScrapers)[number]) =>
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ const RATE_LIMITS = {
|
|||||||
crawl: {
|
crawl: {
|
||||||
default: 3,
|
default: 3,
|
||||||
free: 2,
|
free: 2,
|
||||||
starter: 3,
|
starter: 10,
|
||||||
standard: 5,
|
standard: 5,
|
||||||
standardOld: 40,
|
standardOld: 40,
|
||||||
scale: 50,
|
scale: 50,
|
||||||
@@ -19,9 +19,9 @@ const RATE_LIMITS = {
|
|||||||
scrape: {
|
scrape: {
|
||||||
default: 20,
|
default: 20,
|
||||||
free: 10,
|
free: 10,
|
||||||
starter: 20,
|
starter: 100,
|
||||||
standard: 100,
|
standard: 100,
|
||||||
standardOld: 40,
|
standardOld: 100,
|
||||||
scale: 500,
|
scale: 500,
|
||||||
hobby: 20,
|
hobby: 20,
|
||||||
standardNew: 100,
|
standardNew: 100,
|
||||||
@@ -32,8 +32,8 @@ const RATE_LIMITS = {
|
|||||||
search: {
|
search: {
|
||||||
default: 20,
|
default: 20,
|
||||||
free: 5,
|
free: 5,
|
||||||
starter: 20,
|
starter: 50,
|
||||||
standard: 40,
|
standard: 50,
|
||||||
standardOld: 40,
|
standardOld: 40,
|
||||||
scale: 500,
|
scale: 500,
|
||||||
hobby: 10,
|
hobby: 10,
|
||||||
@@ -45,9 +45,9 @@ const RATE_LIMITS = {
|
|||||||
map:{
|
map:{
|
||||||
default: 20,
|
default: 20,
|
||||||
free: 5,
|
free: 5,
|
||||||
starter: 20,
|
starter: 50,
|
||||||
standard: 40,
|
standard: 50,
|
||||||
standardOld: 40,
|
standardOld: 50,
|
||||||
scale: 500,
|
scale: 500,
|
||||||
hobby: 10,
|
hobby: 10,
|
||||||
standardNew: 50,
|
standardNew: 50,
|
||||||
|
|||||||
Reference in New Issue
Block a user