Update source-tracker.ts
This commit is contained in:
@@ -3,7 +3,7 @@ import { areMergeable } from "./merge-null-val-objs";
|
|||||||
import { transformArrayToObject } from "./transform-array-to-obj";
|
import { transformArrayToObject } from "./transform-array-to-obj";
|
||||||
|
|
||||||
interface TransformedResult {
|
interface TransformedResult {
|
||||||
transformed: { [key: string]: any[] };
|
transformed: { [key: string]: any[] } | any[];
|
||||||
url: string;
|
url: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -23,56 +23,70 @@ export class SourceTracker {
|
|||||||
* Transform raw extraction results into a format that preserves source information
|
* Transform raw extraction results into a format that preserves source information
|
||||||
*/
|
*/
|
||||||
transformResults(extractionResults: { extract: any; url: string }[], schema: any, withTransform: boolean = true) {
|
transformResults(extractionResults: { extract: any; url: string }[], schema: any, withTransform: boolean = true) {
|
||||||
// First transform each result individually
|
// Handle array outputs
|
||||||
|
if (Array.isArray(extractionResults[0]?.extract)) {
|
||||||
|
this.transformedResults = extractionResults.map(result => ({
|
||||||
|
transformed: result.extract,
|
||||||
|
url: result.url
|
||||||
|
}));
|
||||||
|
|
||||||
|
if (withTransform) {
|
||||||
|
// Combine all extracts to match original behavior
|
||||||
|
const combinedExtracts = extractionResults.map(r => r.extract).flat();
|
||||||
|
return combinedExtracts;
|
||||||
|
}
|
||||||
|
return this.transformedResults;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle object outputs (original behavior)
|
||||||
this.transformedResults = extractionResults.map(result => ({
|
this.transformedResults = extractionResults.map(result => ({
|
||||||
transformed: transformArrayToObject(schema, [result.extract]),
|
transformed: transformArrayToObject(schema, [result.extract]),
|
||||||
url: result.url
|
url: result.url
|
||||||
}));
|
}));
|
||||||
|
|
||||||
if (withTransform) {
|
if (withTransform) {
|
||||||
// Then combine all extracts and transform them together to match original behavior
|
// Then combine all extracts and transform them together to match original behavior
|
||||||
const combinedExtracts = extractionResults.map(r => r.extract);
|
const combinedExtracts = extractionResults.map(r => r.extract);
|
||||||
return transformArrayToObject(schema, combinedExtracts);
|
return transformArrayToObject(schema, combinedExtracts);
|
||||||
}
|
}
|
||||||
return this.transformedResults;
|
return this.transformedResults;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Merge all transformed results into one object - this is now only used internally
|
|
||||||
*/
|
|
||||||
private mergeTransformedResults() {
|
|
||||||
return this.transformedResults.reduce((acc, curr) => {
|
|
||||||
Object.keys(curr.transformed).forEach(key => {
|
|
||||||
const value = curr.transformed[key];
|
|
||||||
if (!acc[key]) {
|
|
||||||
acc[key] = Array.isArray(value) ? [...value] : value;
|
|
||||||
} else if (Array.isArray(acc[key]) && Array.isArray(value)) {
|
|
||||||
acc[key].push(...value);
|
|
||||||
} else if (typeof acc[key] === 'object' && typeof value === 'object') {
|
|
||||||
acc[key] = { ...acc[key], ...value };
|
|
||||||
}
|
|
||||||
});
|
|
||||||
return acc;
|
|
||||||
}, {} as { [key: string]: any[] });
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Track sources for each item before deduplication
|
* Track sources for each item before deduplication
|
||||||
*/
|
*/
|
||||||
trackPreDeduplicationSources(multiEntityResult: { [key: string]: any[] }) {
|
trackPreDeduplicationSources(multiEntityResult: { [key: string]: any[] } | any[]) {
|
||||||
try {
|
try {
|
||||||
Object.keys(multiEntityResult).forEach(key => {
|
if (Array.isArray(multiEntityResult)) {
|
||||||
multiEntityResult[key].forEach((item: any) => {
|
// Handle array outputs
|
||||||
const itemKey = JSON.stringify(item);
|
multiEntityResult.forEach((item: any) => {
|
||||||
const matchingSources = this.transformedResults
|
const itemKey = JSON.stringify(item);
|
||||||
.filter(result =>
|
const matchingSources = this.transformedResults
|
||||||
result.transformed[key]?.some((resultItem: any) =>
|
.filter(result =>
|
||||||
JSON.stringify(resultItem) === itemKey
|
Array.isArray(result.transformed) &&
|
||||||
|
result.transformed.some((resultItem: any) =>
|
||||||
|
JSON.stringify(resultItem) === itemKey
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
.map(result => result.url);
|
||||||
.map(result => result.url);
|
this.preDedupeSourceMap.set(itemKey, matchingSources);
|
||||||
this.preDedupeSourceMap.set(itemKey, matchingSources);
|
});
|
||||||
});
|
} else {
|
||||||
});
|
// Handle object outputs (original behavior)
|
||||||
|
Object.keys(multiEntityResult).forEach(key => {
|
||||||
|
multiEntityResult[key].forEach((item: any) => {
|
||||||
|
const itemKey = JSON.stringify(item);
|
||||||
|
const matchingSources = this.transformedResults
|
||||||
|
.filter(result =>
|
||||||
|
result.transformed[key]?.some((resultItem: any) =>
|
||||||
|
JSON.stringify(resultItem) === itemKey
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.map(result => result.url);
|
||||||
|
this.preDedupeSourceMap.set(itemKey, matchingSources);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Failed to track pre-deduplication sources`, { error });
|
logger.error(`Failed to track pre-deduplication sources`, { error });
|
||||||
}
|
}
|
||||||
@@ -82,35 +96,56 @@ export class SourceTracker {
|
|||||||
* Map sources to final deduplicated/merged items
|
* Map sources to final deduplicated/merged items
|
||||||
*/
|
*/
|
||||||
mapSourcesToFinalItems(
|
mapSourcesToFinalItems(
|
||||||
multiEntityResult: { [key: string]: any[] },
|
multiEntityResult: { [key: string]: any[] } | any[],
|
||||||
multiEntityKeys: string[]
|
multiEntityKeys: string[]
|
||||||
): Record<string, string[]> {
|
): Record<string, string[]> {
|
||||||
try {
|
try {
|
||||||
const sources: Record<string, string[]> = {};
|
const sources: Record<string, string[]> = {};
|
||||||
|
|
||||||
multiEntityKeys.forEach(key => {
|
if (Array.isArray(multiEntityResult)) {
|
||||||
if (multiEntityResult[key] && Array.isArray(multiEntityResult[key])) {
|
// Handle array outputs
|
||||||
multiEntityResult[key].forEach((item: any, finalIndex: number) => {
|
multiEntityResult.forEach((item: any, finalIndex: number) => {
|
||||||
const sourceKey = `${key}[${finalIndex}]`;
|
const sourceKey = `[${finalIndex}]`;
|
||||||
const itemSources = new Set<string>();
|
const itemSources = new Set<string>();
|
||||||
|
|
||||||
this.transformedResults.forEach(result => {
|
this.transformedResults.forEach(result => {
|
||||||
result.transformed[key]?.forEach((originalItem: any) => {
|
if (Array.isArray(result.transformed)) {
|
||||||
if (areMergeable(item, originalItem)) {
|
result.transformed.forEach((originalItem: any) => {
|
||||||
itemSources.add(result.url);
|
if (areMergeable(item, originalItem)) {
|
||||||
}
|
itemSources.add(result.url);
|
||||||
});
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
sources[sourceKey] = Array.from(itemSources);
|
sources[sourceKey] = Array.from(itemSources);
|
||||||
});
|
});
|
||||||
}
|
} else {
|
||||||
});
|
// Handle object outputs (original behavior)
|
||||||
|
multiEntityKeys.forEach(key => {
|
||||||
|
if (multiEntityResult[key] && Array.isArray(multiEntityResult[key])) {
|
||||||
|
multiEntityResult[key].forEach((item: any, finalIndex: number) => {
|
||||||
|
const sourceKey = `${key}[${finalIndex}]`;
|
||||||
|
const itemSources = new Set<string>();
|
||||||
|
|
||||||
return sources;
|
this.transformedResults.forEach(result => {
|
||||||
} catch (error) {
|
result.transformed[key]?.forEach((originalItem: any) => {
|
||||||
logger.error(`Failed to map sources to final items`, { error });
|
if (areMergeable(item, originalItem)) {
|
||||||
return {};
|
itemSources.add(result.url);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
sources[sourceKey] = Array.from(itemSources);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return sources;
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`Failed to map sources to final items`, { error });
|
||||||
|
return {};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user