Fix LLMs.txt cache bug with subdomains and add bypass option (#1557)

* Fix LLMs.txt cache bug with subdomains and add bypass option (#1519) Co-Authored-By: hello@sideguide.dev <hello+firecrawl@sideguide.dev> * Nick: * Update LLMs.txt test file to use helper functions and concurrent tests Co-Authored-By: hello@sideguide.dev <hello+firecrawl@sideguide.dev> * Remove LLMs.txt test file as requested Co-Authored-By: hello@sideguide.dev <hello+firecrawl@sideguide.dev> * Change parameter name to 'cache' and keep 7-day expiration Co-Authored-By: hello@sideguide.dev <hello+firecrawl@sideguide.dev> * Update generate-llmstxt-supabase.ts * Update JS and Python SDKs to include cache parameter Co-Authored-By: hello@sideguide.dev <hello+firecrawl@sideguide.dev> * Fix LLMs.txt cache implementation to use normalizeUrl and exact matching Co-Authored-By: hello@sideguide.dev <hello+firecrawl@sideguide.dev> * Revert "Fix LLMs.txt cache implementation to use normalizeUrl and exact matching" This reverts commit d05b9964677b7b2384453329d2ac99d841467053. * Nick: --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: hello@sideguide.dev <hello+firecrawl@sideguide.dev> Co-authored-by: Nicolas <nicolascamara29@gmail.com>
2025-05-16 16:29:09 -03:00
parent ab30c8e4ac
commit 7ccbbec488
12 changed files with 47 additions and 9 deletions
@@ -19,6 +19,7 @@ interface GenerateLLMsTextServiceOptions {
  url: string;
  maxUrls: number;
  showFullText: boolean;
+  cache?: boolean;
  subId?: string;
 }

@@ -63,7 +64,7 @@ function limitLlmsTxtEntries(llmstxt: string, maxEntries: number): string {
 export async function performGenerateLlmsTxt(
  options: GenerateLLMsTextServiceOptions,
 ) {
-  const { generationId, teamId, url, maxUrls = 100, showFullText, subId } =
+  const { generationId, teamId, url, maxUrls = 100, showFullText, cache = true, subId } =
    options;
  const startTime = Date.now();
  const logger = _logger.child({
@@ -79,8 +80,8 @@ export async function performGenerateLlmsTxt(
    // Enforce max URL limit
    const effectiveMaxUrls = Math.min(maxUrls, 5000);

-    // Check cache first
-    const cachedResult = await getLlmsTextFromCache(url, effectiveMaxUrls);
+    // Check cache first, unless cache is set to false
+    const cachedResult = cache ? await getLlmsTextFromCache(url, effectiveMaxUrls) : null;
    if (cachedResult) {
      logger.info("Found cached LLMs text", { url });