search port

This commit is contained in:
Gergő Móricz
2024-08-15 20:10:43 +02:00
parent 86326f34e9
commit fc08ff450d
+20 -30
View File
@@ -9,6 +9,7 @@ import { search } from "../search";
import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist";
import { v4 as uuidv4 } from "uuid"; import { v4 as uuidv4 } from "uuid";
import { Logger } from "../lib/logger"; import { Logger } from "../lib/logger";
import { getScrapeQueue, scrapeQueueEvents } from "../services/queue-service";
export async function searchHelper( export async function searchHelper(
jobId: string, jobId: string,
@@ -75,26 +76,28 @@ export async function searchHelper(
// filter out social media links // filter out social media links
const jobDatas = res.map(x => {
const a = new WebScraperDataProvider(); const url = x.url;
await a.setOptions({ const uuid = uuidv4();
jobId, return {
name: uuid,
data: {
url,
mode: "single_urls", mode: "single_urls",
urls: res.map((r) => r.url).slice(0, Math.min(searchOptions.limit ?? 5, 5)), crawlerOptions: crawlerOptions,
crawlerOptions: { team_id: team_id,
...crawlerOptions, pageOptions: pageOptions,
}, },
pageOptions: { opts: {
...pageOptions, jobId: uuid,
onlyMainContent: pageOptions?.onlyMainContent ?? true, priority: 10,
fetchPageContent: pageOptions?.fetchPageContent ?? true, }
includeHtml: pageOptions?.includeHtml ?? false, };
removeTags: pageOptions?.removeTags ?? [], })
fallback: false,
},
});
const docs = await a.getDocuments(false); const jobs = await getScrapeQueue().addBulk(jobDatas);
const docs = (await Promise.all(jobs.map(x => x.waitUntilFinished(scrapeQueueEvents, 60000)))).map(x => x[0]);
if (docs.length === 0) { if (docs.length === 0) {
return { success: true, error: "No search results found", returnCode: 200 }; return { success: true, error: "No search results found", returnCode: 200 };
@@ -109,19 +112,6 @@ export async function searchHelper(
return { success: true, error: "No page found", returnCode: 200, data: docs }; return { success: true, error: "No page found", returnCode: 200, data: docs };
} }
const billingResult = await billTeam(
team_id,
filteredDocs.length
);
if (!billingResult.success) {
return {
success: false,
error:
"Failed to bill team. Insufficient credits or subscription not found.",
returnCode: 402,
};
}
return { return {
success: true, success: true,
data: filteredDocs, data: filteredDocs,