[feat] improved the scrape for gdrive pdfs

2024-06-04 17:47:28 -03:00
parent a547f9a78e
commit b5045d1661
2 changed files with 18 additions and 8 deletions
@@ -331,8 +331,13 @@ export async function scrapSingleUrl(
    // Check for custom scraping conditions
    const customScraperResult = await handleCustomScraping(text, url);

-    if(customScraperResult){
-      customScrapedContent  = await scrapWithFireEngine(customScraperResult.url, customScraperResult.wait_after_load)
+    if (customScraperResult){
+      switch (customScraperResult.scraper) {
+        case "fire-engine":
+          customScrapedContent  = await scrapWithFireEngine(customScraperResult.url, customScraperResult.wait_after_load)
+        case "pdf":
+          customScrapedContent  = { html: await fetchAndProcessPdf(customScraperResult.url), screenshot }
+      }
    }

    if (customScrapedContent) {