diff --git a/apps/api/src/lib/extract/helpers/source-tracker.ts b/apps/api/src/lib/extract/helpers/source-tracker.ts index 797ea3ef..ae933c15 100644 --- a/apps/api/src/lib/extract/helpers/source-tracker.ts +++ b/apps/api/src/lib/extract/helpers/source-tracker.ts @@ -3,7 +3,7 @@ import { areMergeable } from "./merge-null-val-objs"; import { transformArrayToObject } from "./transform-array-to-obj"; interface TransformedResult { - transformed: { [key: string]: any[] }; + transformed: { [key: string]: any[] } | any[]; url: string; } @@ -23,56 +23,70 @@ export class SourceTracker { * Transform raw extraction results into a format that preserves source information */ transformResults(extractionResults: { extract: any; url: string }[], schema: any, withTransform: boolean = true) { - // First transform each result individually + // Handle array outputs + if (Array.isArray(extractionResults[0]?.extract)) { + this.transformedResults = extractionResults.map(result => ({ + transformed: result.extract, + url: result.url + })); + + if (withTransform) { + // Combine all extracts to match original behavior + const combinedExtracts = extractionResults.map(r => r.extract).flat(); + return combinedExtracts; + } + return this.transformedResults; + } + + // Handle object outputs (original behavior) this.transformedResults = extractionResults.map(result => ({ transformed: transformArrayToObject(schema, [result.extract]), url: result.url })); + if (withTransform) { - // Then combine all extracts and transform them together to match original behavior - const combinedExtracts = extractionResults.map(r => r.extract); - return transformArrayToObject(schema, combinedExtracts); + // Then combine all extracts and transform them together to match original behavior + const combinedExtracts = extractionResults.map(r => r.extract); + return transformArrayToObject(schema, combinedExtracts); } return this.transformedResults; } - /** - * Merge all transformed results into one object - this is now only used internally - */ - private mergeTransformedResults() { - return this.transformedResults.reduce((acc, curr) => { - Object.keys(curr.transformed).forEach(key => { - const value = curr.transformed[key]; - if (!acc[key]) { - acc[key] = Array.isArray(value) ? [...value] : value; - } else if (Array.isArray(acc[key]) && Array.isArray(value)) { - acc[key].push(...value); - } else if (typeof acc[key] === 'object' && typeof value === 'object') { - acc[key] = { ...acc[key], ...value }; - } - }); - return acc; - }, {} as { [key: string]: any[] }); - } - /** * Track sources for each item before deduplication */ - trackPreDeduplicationSources(multiEntityResult: { [key: string]: any[] }) { + trackPreDeduplicationSources(multiEntityResult: { [key: string]: any[] } | any[]) { try { - Object.keys(multiEntityResult).forEach(key => { - multiEntityResult[key].forEach((item: any) => { - const itemKey = JSON.stringify(item); - const matchingSources = this.transformedResults - .filter(result => - result.transformed[key]?.some((resultItem: any) => - JSON.stringify(resultItem) === itemKey + if (Array.isArray(multiEntityResult)) { + // Handle array outputs + multiEntityResult.forEach((item: any) => { + const itemKey = JSON.stringify(item); + const matchingSources = this.transformedResults + .filter(result => + Array.isArray(result.transformed) && + result.transformed.some((resultItem: any) => + JSON.stringify(resultItem) === itemKey + ) ) - ) - .map(result => result.url); - this.preDedupeSourceMap.set(itemKey, matchingSources); - }); - }); + .map(result => result.url); + this.preDedupeSourceMap.set(itemKey, matchingSources); + }); + } else { + // Handle object outputs (original behavior) + Object.keys(multiEntityResult).forEach(key => { + multiEntityResult[key].forEach((item: any) => { + const itemKey = JSON.stringify(item); + const matchingSources = this.transformedResults + .filter(result => + result.transformed[key]?.some((resultItem: any) => + JSON.stringify(resultItem) === itemKey + ) + ) + .map(result => result.url); + this.preDedupeSourceMap.set(itemKey, matchingSources); + }); + }); + } } catch (error) { logger.error(`Failed to track pre-deduplication sources`, { error }); } @@ -82,35 +96,56 @@ export class SourceTracker { * Map sources to final deduplicated/merged items */ mapSourcesToFinalItems( - multiEntityResult: { [key: string]: any[] }, + multiEntityResult: { [key: string]: any[] } | any[], multiEntityKeys: string[] ): Record { try { - const sources: Record = {}; + const sources: Record = {}; - multiEntityKeys.forEach(key => { - if (multiEntityResult[key] && Array.isArray(multiEntityResult[key])) { - multiEntityResult[key].forEach((item: any, finalIndex: number) => { - const sourceKey = `${key}[${finalIndex}]`; + if (Array.isArray(multiEntityResult)) { + // Handle array outputs + multiEntityResult.forEach((item: any, finalIndex: number) => { + const sourceKey = `[${finalIndex}]`; const itemSources = new Set(); this.transformedResults.forEach(result => { - result.transformed[key]?.forEach((originalItem: any) => { - if (areMergeable(item, originalItem)) { - itemSources.add(result.url); - } - }); + if (Array.isArray(result.transformed)) { + result.transformed.forEach((originalItem: any) => { + if (areMergeable(item, originalItem)) { + itemSources.add(result.url); + } + }); + } }); sources[sourceKey] = Array.from(itemSources); }); - } - }); + } else { + // Handle object outputs (original behavior) + multiEntityKeys.forEach(key => { + if (multiEntityResult[key] && Array.isArray(multiEntityResult[key])) { + multiEntityResult[key].forEach((item: any, finalIndex: number) => { + const sourceKey = `${key}[${finalIndex}]`; + const itemSources = new Set(); - return sources; - } catch (error) { - logger.error(`Failed to map sources to final items`, { error }); - return {}; + this.transformedResults.forEach(result => { + result.transformed[key]?.forEach((originalItem: any) => { + if (areMergeable(item, originalItem)) { + itemSources.add(result.url); + } + }); + }); + + sources[sourceKey] = Array.from(itemSources); + }); + } + }); + } + + return sources; + } catch (error) { + logger.error(`Failed to map sources to final items`, { error }); + return {}; + } } -} } \ No newline at end of file