feat(concurrency-limit): set limit based on plan
This commit is contained in:
@@ -171,7 +171,8 @@ export async function crawlController(req: Request, res: Response) {
|
|||||||
url,
|
url,
|
||||||
mode: "single_urls",
|
mode: "single_urls",
|
||||||
crawlerOptions: crawlerOptions,
|
crawlerOptions: crawlerOptions,
|
||||||
team_id: team_id,
|
team_id,
|
||||||
|
plan,
|
||||||
pageOptions: pageOptions,
|
pageOptions: pageOptions,
|
||||||
origin: req.body.origin ?? defaultOrigin,
|
origin: req.body.origin ?? defaultOrigin,
|
||||||
crawl_id: id,
|
crawl_id: id,
|
||||||
@@ -211,7 +212,8 @@ export async function crawlController(req: Request, res: Response) {
|
|||||||
url,
|
url,
|
||||||
mode: "single_urls",
|
mode: "single_urls",
|
||||||
crawlerOptions: crawlerOptions,
|
crawlerOptions: crawlerOptions,
|
||||||
team_id: team_id,
|
team_id,
|
||||||
|
plan,
|
||||||
pageOptions: pageOptions,
|
pageOptions: pageOptions,
|
||||||
origin: req.body.origin ?? defaultOrigin,
|
origin: req.body.origin ?? defaultOrigin,
|
||||||
crawl_id: id,
|
crawl_id: id,
|
||||||
|
|||||||
@@ -107,7 +107,8 @@ export async function crawlPreviewController(req: Request, res: Response) {
|
|||||||
url,
|
url,
|
||||||
mode: "single_urls",
|
mode: "single_urls",
|
||||||
crawlerOptions: crawlerOptions,
|
crawlerOptions: crawlerOptions,
|
||||||
team_id: team_id,
|
team_id,
|
||||||
|
plan,
|
||||||
pageOptions: pageOptions,
|
pageOptions: pageOptions,
|
||||||
origin: "website-preview",
|
origin: "website-preview",
|
||||||
crawl_id: id,
|
crawl_id: id,
|
||||||
@@ -121,7 +122,8 @@ export async function crawlPreviewController(req: Request, res: Response) {
|
|||||||
url,
|
url,
|
||||||
mode: "single_urls",
|
mode: "single_urls",
|
||||||
crawlerOptions: crawlerOptions,
|
crawlerOptions: crawlerOptions,
|
||||||
team_id: team_id,
|
team_id,
|
||||||
|
plan,
|
||||||
pageOptions: pageOptions,
|
pageOptions: pageOptions,
|
||||||
origin: "website-preview",
|
origin: "website-preview",
|
||||||
crawl_id: id,
|
crawl_id: id,
|
||||||
|
|||||||
@@ -60,6 +60,7 @@ export async function scrapeHelper(
|
|||||||
mode: "single_urls",
|
mode: "single_urls",
|
||||||
crawlerOptions,
|
crawlerOptions,
|
||||||
team_id,
|
team_id,
|
||||||
|
plan,
|
||||||
pageOptions,
|
pageOptions,
|
||||||
extractorOptions,
|
extractorOptions,
|
||||||
origin: req.body.origin ?? defaultOrigin,
|
origin: req.body.origin ?? defaultOrigin,
|
||||||
|
|||||||
@@ -106,6 +106,7 @@ export async function crawlController(
|
|||||||
url,
|
url,
|
||||||
mode: "single_urls",
|
mode: "single_urls",
|
||||||
team_id: req.auth.team_id,
|
team_id: req.auth.team_id,
|
||||||
|
plan: req.auth.plan,
|
||||||
crawlerOptions,
|
crawlerOptions,
|
||||||
pageOptions,
|
pageOptions,
|
||||||
origin: "api",
|
origin: "api",
|
||||||
@@ -138,6 +139,7 @@ export async function crawlController(
|
|||||||
mode: "single_urls",
|
mode: "single_urls",
|
||||||
crawlerOptions: crawlerOptions,
|
crawlerOptions: crawlerOptions,
|
||||||
team_id: req.auth.team_id,
|
team_id: req.auth.team_id,
|
||||||
|
plan: req.auth.plan,
|
||||||
pageOptions: pageOptions,
|
pageOptions: pageOptions,
|
||||||
origin: "api",
|
origin: "api",
|
||||||
crawl_id: id,
|
crawl_id: id,
|
||||||
|
|||||||
@@ -44,6 +44,7 @@ export async function scrapeController(
|
|||||||
mode: "single_urls",
|
mode: "single_urls",
|
||||||
crawlerOptions: {},
|
crawlerOptions: {},
|
||||||
team_id: req.auth.team_id,
|
team_id: req.auth.team_id,
|
||||||
|
plan: req.auth.plan,
|
||||||
pageOptions,
|
pageOptions,
|
||||||
extractorOptions,
|
extractorOptions,
|
||||||
origin: req.body.origin,
|
origin: req.body.origin,
|
||||||
|
|||||||
@@ -34,9 +34,10 @@ import {
|
|||||||
deleteJobPriority,
|
deleteJobPriority,
|
||||||
getJobPriority,
|
getJobPriority,
|
||||||
} from "../../src/lib/job-priority";
|
} from "../../src/lib/job-priority";
|
||||||
import { PlanType } from "../types";
|
import { PlanType, RateLimiterMode } from "../types";
|
||||||
import { getJobs } from "../../src/controllers/v1/crawl-status";
|
import { getJobs } from "../../src/controllers/v1/crawl-status";
|
||||||
import { configDotenv } from "dotenv";
|
import { configDotenv } from "dotenv";
|
||||||
|
import { getRateLimiterPoints } from "./rate-limiter";
|
||||||
configDotenv();
|
configDotenv();
|
||||||
|
|
||||||
if (process.env.ENV === "production") {
|
if (process.env.ENV === "production") {
|
||||||
@@ -131,9 +132,9 @@ const workerFun = async (
|
|||||||
if (job) {
|
if (job) {
|
||||||
const concurrencyLimiterKey = "concurrency-limiter:" + job.data?.team_id;
|
const concurrencyLimiterKey = "concurrency-limiter:" + job.data?.team_id;
|
||||||
|
|
||||||
if (job.data && job.data.team_id) {
|
if (job.data && job.data.team_id && job.data.plan) {
|
||||||
const concurrencyLimiterThrottledKey = "concurrency-limiter:" + job.data.team_id + ":throttled";
|
const concurrencyLimiterThrottledKey = "concurrency-limiter:" + job.data.team_id + ":throttled";
|
||||||
const concurrencyLimit = 10; // TODO: determine based on price id
|
const concurrencyLimit = getRateLimiterPoints(RateLimiterMode.Scrape, undefined, job.data.plan);
|
||||||
const now = Date.now();
|
const now = Date.now();
|
||||||
const stalledJobTimeoutMs = 2 * 60 * 1000;
|
const stalledJobTimeoutMs = 2 * 60 * 1000;
|
||||||
const throttledJobTimeoutMs = 10 * 60 * 1000;
|
const throttledJobTimeoutMs = 10 * 60 * 1000;
|
||||||
@@ -382,6 +383,7 @@ async function processJob(job: Job, token: string) {
|
|||||||
mode: "single_urls",
|
mode: "single_urls",
|
||||||
crawlerOptions: sc.crawlerOptions,
|
crawlerOptions: sc.crawlerOptions,
|
||||||
team_id: sc.team_id,
|
team_id: sc.team_id,
|
||||||
|
plan: job.data.plan,
|
||||||
pageOptions: sc.pageOptions,
|
pageOptions: sc.pageOptions,
|
||||||
origin: job.data.origin,
|
origin: job.data.origin,
|
||||||
crawl_id: job.data.crawl_id,
|
crawl_id: job.data.crawl_id,
|
||||||
|
|||||||
@@ -123,14 +123,18 @@ const testSuiteTokens = ["a01ccae", "6254cf9", "0f96e673", "23befa1b", "69141c4"
|
|||||||
|
|
||||||
const manual = ["69be9e74-7624-4990-b20d-08e0acc70cf6"];
|
const manual = ["69be9e74-7624-4990-b20d-08e0acc70cf6"];
|
||||||
|
|
||||||
export function getRateLimiter(
|
function makePlanKey(plan?: string) {
|
||||||
|
return plan ? plan.replace("-", "") : "default"; // "default"
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getRateLimiterPoints(
|
||||||
mode: RateLimiterMode,
|
mode: RateLimiterMode,
|
||||||
token: string,
|
token?: string,
|
||||||
plan?: string,
|
plan?: string,
|
||||||
teamId?: string
|
teamId?: string
|
||||||
) {
|
) {
|
||||||
|
|
||||||
if (testSuiteTokens.some(testToken => token.includes(testToken))) {
|
if (token && testSuiteTokens.some(testToken => token.includes(testToken))) {
|
||||||
return testSuiteRateLimiter;
|
return testSuiteRateLimiter;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -146,9 +150,17 @@ export function getRateLimiter(
|
|||||||
|
|
||||||
if (!rateLimitConfig) return serverRateLimiter;
|
if (!rateLimitConfig) return serverRateLimiter;
|
||||||
|
|
||||||
const planKey = plan ? plan.replace("-", "") : "default"; // "default"
|
|
||||||
const points =
|
const points =
|
||||||
rateLimitConfig[planKey] || rateLimitConfig.default || rateLimitConfig; // 5
|
rateLimitConfig[makePlanKey(plan)] || rateLimitConfig.default || rateLimitConfig; // 5
|
||||||
|
|
||||||
return createRateLimiter(`${mode}-${planKey}`, points);
|
return points;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getRateLimiter(
|
||||||
|
mode: RateLimiterMode,
|
||||||
|
token?: string,
|
||||||
|
plan?: string,
|
||||||
|
teamId?: string
|
||||||
|
) {
|
||||||
|
return createRateLimiter(`${mode}-${makePlanKey(plan)}`, getRateLimiterPoints(mode, token, plan, teamId));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ export interface WebScraperOptions {
|
|||||||
pageOptions: any;
|
pageOptions: any;
|
||||||
extractorOptions?: any;
|
extractorOptions?: any;
|
||||||
team_id: string;
|
team_id: string;
|
||||||
|
plan: string;
|
||||||
origin?: string;
|
origin?: string;
|
||||||
crawl_id?: string;
|
crawl_id?: string;
|
||||||
sitemapped?: boolean;
|
sitemapped?: boolean;
|
||||||
|
|||||||
Reference in New Issue
Block a user