Files
firecrawl/apps/api/src/lib/cache.ts
T

71 lines
1.7 KiB
TypeScript
Raw Normal View History

2024-11-14 19:47:12 +01:00
import IORedis from "ioredis";
import { ScrapeOptions } from "../controllers/v1/types";
import { InternalOptions } from "../scraper/scrapeURL";
import { logger as _logger } from "./logger";
2024-12-11 19:46:11 -03:00
const logger = _logger.child({ module: "cache" });
export const cacheRedis = process.env.CACHE_REDIS_URL
? new IORedis(process.env.CACHE_REDIS_URL, {
2024-12-11 19:51:08 -03:00
maxRetriesPerRequest: null,
2024-12-11 19:46:11 -03:00
})
: null;
export function cacheKey(
url: string,
scrapeOptions: ScrapeOptions,
2024-12-11 19:51:08 -03:00
internalOptions: InternalOptions,
2024-12-11 19:46:11 -03:00
): string | null {
if (!cacheRedis) return null;
// these options disqualify a cache
if (
internalOptions.v0CrawlOnlyUrls ||
internalOptions.forceEngine?.includes("cache") ||
2024-12-13 22:30:57 +01:00
scrapeOptions.fastMode ||
2024-12-11 19:46:11 -03:00
internalOptions.atsv ||
(scrapeOptions.actions && scrapeOptions.actions.length > 0)
) {
return null;
}
return "cache:" + url + ":waitFor:" + scrapeOptions.waitFor;
2024-11-14 19:47:12 +01:00
}
export type CacheEntry = {
2024-12-11 19:46:11 -03:00
url: string;
html: string;
statusCode: number;
error?: string;
2024-11-14 19:47:12 +01:00
};
export async function saveEntryToCache(key: string, entry: CacheEntry) {
2024-12-11 19:46:11 -03:00
if (!cacheRedis) return;
2024-11-14 19:47:12 +01:00
2025-01-21 19:17:06 -03:00
if (!entry.html || entry.html.length < 100) {
2025-01-22 18:47:44 -03:00
logger.warn("Skipping cache save for short HTML", {
key,
htmlLength: entry.html?.length,
});
2025-01-21 19:17:06 -03:00
return;
}
2024-12-11 19:46:11 -03:00
try {
2025-01-23 02:37:04 -03:00
await cacheRedis.set(key, JSON.stringify(entry), "EX", 14400); // 4 hours in seconds
2024-12-11 19:46:11 -03:00
} catch (error) {
logger.warn("Failed to save to cache", { key, error });
}
2024-11-14 19:47:12 +01:00
}
2024-12-11 19:46:11 -03:00
export async function getEntryFromCache(
2024-12-11 19:51:08 -03:00
key: string,
2024-12-11 19:46:11 -03:00
): Promise<CacheEntry | null> {
if (!cacheRedis) return null;
try {
return JSON.parse((await cacheRedis.get(key)) ?? "null");
} catch (error) {
logger.warn("Failed to get from cache", { key, error });
return null;
}
2024-11-14 19:47:12 +01:00
}