8d467c8ca7
* feat: use strictNullChecking * feat: switch logger to Winston * feat(scrapeURL): first batch * fix(scrapeURL): error swallow * fix(scrapeURL): add timeout to EngineResultsTracker * fix(scrapeURL): report unexpected error to sentry * chore: remove unused modules * feat(transfomers/coerce): warn when a format's response is missing * feat(scrapeURL): feature flag priorities, engine quality sorting, PDF and DOCX support * (add note) * feat(scrapeURL): wip readme * feat(scrapeURL): LLM extract * feat(scrapeURL): better warnings * fix(scrapeURL/engines/fire-engine;playwright): fix screenshot * feat(scrapeURL): add forceEngine internal option * feat(scrapeURL/engines): scrapingbee * feat(scrapeURL/transformars): uploadScreenshot * feat(scrapeURL): more intense tests * bunch of stuff * get rid of WebScraper (mostly) * adapt batch scrape * add staging deploy workflow * fix yaml * fix logger issues * fix v1 test schema * feat(scrapeURL/fire-engine/chrome-cdp): remove wait inserts on actions * scrapeURL: v0 backwards compat * logger fixes * feat(scrapeurl): v0 returnOnlyUrls support * fix(scrapeURL/v0): URL leniency * fix(batch-scrape): ts non-nullable * fix(scrapeURL/fire-engine/chromecdp): fix wait action * fix(logger): remove error debug key * feat(requests.http): use dotenv expression * fix(scrapeURL/extractMetadata): extract custom metadata * fix crawl option conversion * feat(scrapeURL): Add retry logic to robustFetch * fix(scrapeURL): crawl stuff * fix(scrapeURL): LLM extract * fix(scrapeURL/v0): search fix * fix(tests/v0): grant larger response size to v0 crawl status * feat(scrapeURL): basic fetch engine * feat(scrapeURL): playwright engine * feat(scrapeURL): add url-specific parameters * Update readme and examples * added e2e tests for most parameters. Still a few actions, location and iframes to be done. * fixed type * Nick: * Update scrape.ts * Update index.ts * added actions and base64 check * Nick: skipTls feature flag? * 403 * todo * todo * fixes * yeet headers from url specific params * add warning when final engine has feature deficit * expose engine results tracker for ScrapeEvents implementation * ingest scrape events * fixed some tests * comment * Update index.test.ts * fixed rawHtml * Update index.test.ts * update comments * move geolocation to global f-e option, fix removeBase64Images * Nick: * trim url-specific params * Update index.ts --------- Co-authored-by: Eric Ciarla <ericciarla@yahoo.com> Co-authored-by: rafaelmmiller <8574157+rafaelmmiller@users.noreply.github.com> Co-authored-by: Nicolas <nicolascamara29@gmail.com>
82 lines
2.1 KiB
TypeScript
82 lines
2.1 KiB
TypeScript
import * as winston from "winston";
|
|
|
|
import { configDotenv } from "dotenv";
|
|
import Transport from "winston-transport";
|
|
configDotenv();
|
|
|
|
const logFormat = winston.format.printf(info =>
|
|
`${info.timestamp} ${info.level} [${info.metadata.module ?? ""}:${info.metadata.method ?? ""}]: ${info.message} ${info.level.includes("error") || info.level.includes("warn") ? JSON.stringify(
|
|
info.metadata,
|
|
(_, value) => {
|
|
if (value instanceof Error) {
|
|
return {
|
|
...value,
|
|
name: value.name,
|
|
message: value.message,
|
|
stack: value.stack,
|
|
cause: value.cause,
|
|
}
|
|
} else {
|
|
return value;
|
|
}
|
|
}
|
|
) : ""}`
|
|
)
|
|
|
|
export const logger = winston.createLogger({
|
|
level: process.env.LOGGING_LEVEL?.toLowerCase() ?? "debug",
|
|
format: winston.format.json({
|
|
replacer(key, value) {
|
|
if (value instanceof Error) {
|
|
return {
|
|
...value,
|
|
name: value.name,
|
|
message: value.message,
|
|
stack: value.stack,
|
|
cause: value.cause,
|
|
}
|
|
} else {
|
|
return value;
|
|
}
|
|
}
|
|
}),
|
|
transports: [
|
|
new winston.transports.Console({
|
|
format: winston.format.combine(
|
|
winston.format.timestamp({ format: "YYYY-MM-DD HH:mm:ss" }),
|
|
winston.format.metadata({ fillExcept: ["message", "level", "timestamp"] }),
|
|
...(((process.env.ENV === "production" && process.env.SENTRY_ENVIRONMENT === "dev") || (process.env.ENV !== "production")) ? [winston.format.colorize(), logFormat] : []),
|
|
),
|
|
}),
|
|
],
|
|
});
|
|
|
|
export type ArrayTransportOptions = Transport.TransportStreamOptions & {
|
|
array: any[];
|
|
scrapeId?: string;
|
|
};
|
|
|
|
export class ArrayTransport extends Transport {
|
|
private array: any[];
|
|
private scrapeId?: string;
|
|
|
|
constructor(opts: ArrayTransportOptions) {
|
|
super(opts);
|
|
this.array = opts.array;
|
|
this.scrapeId = opts.scrapeId;
|
|
}
|
|
|
|
log(info, next) {
|
|
setImmediate(() => {
|
|
this.emit("logged", info);
|
|
});
|
|
|
|
if (this.scrapeId !== undefined && info.scrapeId !== this.scrapeId) {
|
|
return next();
|
|
}
|
|
|
|
this.array.push(info);
|
|
|
|
next();
|
|
}
|
|
} |