Nick: streaming callback experimental
This commit is contained in:
@@ -3,12 +3,15 @@ import { logger as _logger } from "../logger";
|
|||||||
|
|
||||||
export enum ExtractStep {
|
export enum ExtractStep {
|
||||||
INITIAL = "initial",
|
INITIAL = "initial",
|
||||||
|
MAP = "map",
|
||||||
|
MAP_RERANK = "map-rerank",
|
||||||
MULTI_ENTITY = "multi-entity",
|
MULTI_ENTITY = "multi-entity",
|
||||||
MULTI_ENTITY_SCRAPE = "multi-entity-scrape",
|
MULTI_ENTITY_SCRAPE = "multi-entity-scrape",
|
||||||
MULTI_ENTITY_EXTRACT = "multi-entity-extract",
|
MULTI_ENTITY_EXTRACT = "multi-entity-extract",
|
||||||
SCRAPE = "scrape",
|
SCRAPE = "scrape",
|
||||||
MAP = "map",
|
|
||||||
EXTRACT = "extract",
|
EXTRACT = "extract",
|
||||||
|
|
||||||
COMPLETE = "complete",
|
COMPLETE = "complete",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -170,6 +170,8 @@ export async function performExtraction(
|
|||||||
],
|
],
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let startMap = Date.now();
|
||||||
|
let aggMapLinks: string[] = [];
|
||||||
// Process URLs
|
// Process URLs
|
||||||
const urlPromises = request.urls.map((url) =>
|
const urlPromises = request.urls.map((url) =>
|
||||||
processUrl(
|
processUrl(
|
||||||
@@ -185,8 +187,19 @@ export async function performExtraction(
|
|||||||
schema: request.schema,
|
schema: request.schema,
|
||||||
},
|
},
|
||||||
urlTraces,
|
urlTraces,
|
||||||
),
|
(links: string[]) => {
|
||||||
);
|
aggMapLinks.push(...links);
|
||||||
|
updateExtract(extractId, {
|
||||||
|
steps: [
|
||||||
|
{
|
||||||
|
step: ExtractStep.MAP,
|
||||||
|
startedAt: startMap,
|
||||||
|
finishedAt: Date.now(),
|
||||||
|
discoveredLinks: aggMapLinks,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
}));
|
||||||
|
|
||||||
const processedUrls = await Promise.all(urlPromises);
|
const processedUrls = await Promise.all(urlPromises);
|
||||||
const links = processedUrls.flat().filter((url) => url);
|
const links = processedUrls.flat().filter((url) => url);
|
||||||
@@ -205,8 +218,8 @@ export async function performExtraction(
|
|||||||
status: "processing",
|
status: "processing",
|
||||||
steps: [
|
steps: [
|
||||||
{
|
{
|
||||||
step: ExtractStep.MAP,
|
step: ExtractStep.MAP_RERANK,
|
||||||
startedAt: Date.now(),
|
startedAt: startMap,
|
||||||
finishedAt: Date.now(),
|
finishedAt: Date.now(),
|
||||||
discoveredLinks: links,
|
discoveredLinks: links,
|
||||||
},
|
},
|
||||||
@@ -221,6 +234,7 @@ export async function performExtraction(
|
|||||||
// if so, it splits the results into 2 types of completions:
|
// if so, it splits the results into 2 types of completions:
|
||||||
// 1. the first one is a completion that will extract the array of items
|
// 1. the first one is a completion that will extract the array of items
|
||||||
// 2. the second one is multiple completions that will extract the items from the array
|
// 2. the second one is multiple completions that will extract the items from the array
|
||||||
|
let startAnalyze = Date.now();
|
||||||
const { isMultiEntity, multiEntityKeys, reasoning, keyIndicators } =
|
const { isMultiEntity, multiEntityKeys, reasoning, keyIndicators } =
|
||||||
await analyzeSchemaAndPrompt(links, request.schema, request.prompt ?? "");
|
await analyzeSchemaAndPrompt(links, request.schema, request.prompt ?? "");
|
||||||
|
|
||||||
@@ -239,7 +253,7 @@ export async function performExtraction(
|
|||||||
steps: [
|
steps: [
|
||||||
{
|
{
|
||||||
step: ExtractStep.MULTI_ENTITY,
|
step: ExtractStep.MULTI_ENTITY,
|
||||||
startedAt: Date.now(),
|
startedAt: startAnalyze,
|
||||||
finishedAt: Date.now(),
|
finishedAt: Date.now(),
|
||||||
discoveredLinks: [],
|
discoveredLinks: [],
|
||||||
},
|
},
|
||||||
@@ -254,12 +268,14 @@ export async function performExtraction(
|
|||||||
steps: [
|
steps: [
|
||||||
{
|
{
|
||||||
step: ExtractStep.MULTI_ENTITY_SCRAPE,
|
step: ExtractStep.MULTI_ENTITY_SCRAPE,
|
||||||
startedAt: Date.now(),
|
startedAt: startAnalyze,
|
||||||
finishedAt: Date.now(),
|
finishedAt: Date.now(),
|
||||||
discoveredLinks: links,
|
discoveredLinks: links,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let startScrape = Date.now();
|
||||||
const scrapePromises = links.map((url) => {
|
const scrapePromises = links.map((url) => {
|
||||||
if (!docsMap.has(url)) {
|
if (!docsMap.has(url)) {
|
||||||
return scrapeDocument(
|
return scrapeDocument(
|
||||||
@@ -280,6 +296,20 @@ export async function performExtraction(
|
|||||||
(doc): doc is Document => doc !== null,
|
(doc): doc is Document => doc !== null,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let endScrape = Date.now();
|
||||||
|
|
||||||
|
await updateExtract(extractId, {
|
||||||
|
status: "processing",
|
||||||
|
steps: [
|
||||||
|
{
|
||||||
|
step: ExtractStep.MULTI_ENTITY_SCRAPE,
|
||||||
|
startedAt: startScrape,
|
||||||
|
finishedAt: endScrape,
|
||||||
|
discoveredLinks: links,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
for (const doc of multyEntityDocs) {
|
for (const doc of multyEntityDocs) {
|
||||||
if (doc?.metadata?.url) {
|
if (doc?.metadata?.url) {
|
||||||
docsMap.set(doc.metadata.url, doc);
|
docsMap.set(doc.metadata.url, doc);
|
||||||
@@ -352,7 +382,7 @@ export async function performExtraction(
|
|||||||
steps: [
|
steps: [
|
||||||
{
|
{
|
||||||
step: ExtractStep.MULTI_ENTITY_EXTRACT,
|
step: ExtractStep.MULTI_ENTITY_EXTRACT,
|
||||||
startedAt: Date.now(),
|
startedAt: startScrape,
|
||||||
finishedAt: Date.now(),
|
finishedAt: Date.now(),
|
||||||
discoveredLinks: [doc.metadata.url || doc.metadata.sourceURL || ""],
|
discoveredLinks: [doc.metadata.url || doc.metadata.sourceURL || ""],
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ interface ProcessUrlOptions {
|
|||||||
export async function processUrl(
|
export async function processUrl(
|
||||||
options: ProcessUrlOptions,
|
options: ProcessUrlOptions,
|
||||||
urlTraces: URLTrace[],
|
urlTraces: URLTrace[],
|
||||||
|
updateExtractCallback: (links: string[]) => void,
|
||||||
): Promise<string[]> {
|
): Promise<string[]> {
|
||||||
const trace: URLTrace = {
|
const trace: URLTrace = {
|
||||||
url: options.url,
|
url: options.url,
|
||||||
@@ -160,6 +161,8 @@ export async function processUrl(
|
|||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
updateExtractCallback(mappedLinks.map((x) => x.url));
|
||||||
|
|
||||||
|
|
||||||
// Perform reranking using either prompt or schema
|
// Perform reranking using either prompt or schema
|
||||||
let searchQuery = "";
|
let searchQuery = "";
|
||||||
|
|||||||
Reference in New Issue
Block a user