Update extract.ts

This commit is contained in:
Nicolas
2024-11-14 15:03:06 -05:00
parent 1b5f6a0959
commit 796cd0746d
+10 -4
View File
@@ -42,7 +42,8 @@ export async function extractController(
let docs: Document[] = []; let docs: Document[] = [];
const earlyReturn = false; const earlyReturn = false;
for (const url of req.body.urls) { // Process all URLs in parallel
const urlPromises = req.body.urls.map(async (url) => {
if (url.includes('/*')) { if (url.includes('/*')) {
// Handle glob pattern URLs // Handle glob pattern URLs
const baseUrl = url.replace('/*', ''); const baseUrl = url.replace('/*', '');
@@ -84,15 +85,20 @@ export async function extractController(
.slice(0, MAX_RANKING_LIMIT); .slice(0, MAX_RANKING_LIMIT);
} }
links.push(...mappedLinks); return mappedLinks;
} else { } else {
// Handle direct URLs without glob pattern // Handle direct URLs without glob pattern
if (!isUrlBlocked(url)) { if (!isUrlBlocked(url)) {
links.push(url); return [url];
} }
return [];
} }
} });
// Wait for all URL processing to complete and flatten results
const processedUrls = await Promise.all(urlPromises);
links.push(...processedUrls.flat());
// Scrape all links in parallel // Scrape all links in parallel
const scrapePromises = links.map(async (url) => { const scrapePromises = links.map(async (url) => {