Nick: formatting done

This commit is contained in:
Nicolas
2025-01-22 18:47:44 -03:00
parent 994e1eb502
commit 498558d358
53 changed files with 10672 additions and 10329 deletions
+52 -21
View File
@@ -24,55 +24,79 @@ export async function getLinksFromSitemap(
try {
if (mode === "fire-engine" && useFireEngine) {
const fetchResponse = await scrapeURL(
"sitemap",
"sitemap",
sitemapUrl,
scrapeOptions.parse({ formats: ["rawHtml"] }),
{ forceEngine: "fetch" },
);
if (fetchResponse.success && (fetchResponse.document.metadata.statusCode >= 200 && fetchResponse.document.metadata.statusCode < 300)) {
if (
fetchResponse.success &&
fetchResponse.document.metadata.statusCode >= 200 &&
fetchResponse.document.metadata.statusCode < 300
) {
content = fetchResponse.document.rawHtml!;
} else {
logger.debug(
"Failed to scrape sitemap via fetch, falling back to TLSClient...",
{ error: fetchResponse.success ? fetchResponse.document : fetchResponse.error },
{
error: fetchResponse.success
? fetchResponse.document
: fetchResponse.error,
},
);
const tlsResponse = await scrapeURL(
"sitemap",
"sitemap",
sitemapUrl,
scrapeOptions.parse({ formats: ["rawHtml"] }),
{ forceEngine: "fire-engine;tlsclient", v0DisableJsDom: true },
);
if (tlsResponse.success && (tlsResponse.document.metadata.statusCode >= 200 && tlsResponse.document.metadata.statusCode < 300)) {
if (
tlsResponse.success &&
tlsResponse.document.metadata.statusCode >= 200 &&
tlsResponse.document.metadata.statusCode < 300
) {
content = tlsResponse.document.rawHtml!;
} else {
logger.error(`Request failed for ${sitemapUrl}, ran out of engines!`, {
method: "getLinksFromSitemap",
mode,
sitemapUrl,
error: tlsResponse.success ? tlsResponse.document : tlsResponse.error,
});
logger.error(
`Request failed for ${sitemapUrl}, ran out of engines!`,
{
method: "getLinksFromSitemap",
mode,
sitemapUrl,
error: tlsResponse.success
? tlsResponse.document
: tlsResponse.error,
},
);
return 0;
}
}
} else {
const fetchResponse = await scrapeURL(
"sitemap",
"sitemap",
sitemapUrl,
scrapeOptions.parse({ formats: ["rawHtml"] }),
{ forceEngine: "fetch" },
);
if (fetchResponse.success && (fetchResponse.document.metadata.statusCode >= 200 && fetchResponse.document.metadata.statusCode < 300)) {
if (
fetchResponse.success &&
fetchResponse.document.metadata.statusCode >= 200 &&
fetchResponse.document.metadata.statusCode < 300
) {
content = fetchResponse.document.rawHtml!;
} else {
logger.error(`Request failed for ${sitemapUrl}, ran out of engines!`, {
method: "getLinksFromSitemap",
mode,
sitemapUrl,
});
logger.error(
`Request failed for ${sitemapUrl}, ran out of engines!`,
{
method: "getLinksFromSitemap",
mode,
sitemapUrl,
},
);
return 0;
}
}
@@ -165,13 +189,20 @@ export const fetchSitemapData = async (
const sitemapUrl = url.endsWith("/sitemap.xml") ? url : `${url}/sitemap.xml`;
try {
const fetchResponse = await scrapeURL(
"sitemap",
"sitemap",
sitemapUrl,
scrapeOptions.parse({ formats: ["rawHtml"], timeout: timeout || axiosTimeout }),
scrapeOptions.parse({
formats: ["rawHtml"],
timeout: timeout || axiosTimeout,
}),
{ forceEngine: "fetch" },
);
if (fetchResponse.success && (fetchResponse.document.metadata.statusCode >= 200 && fetchResponse.document.metadata.statusCode < 300)) {
if (
fetchResponse.success &&
fetchResponse.document.metadata.statusCode >= 200 &&
fetchResponse.document.metadata.statusCode < 300
) {
const xml = fetchResponse.document.rawHtml!;
const parsedXml = await parseStringPromise(xml);