Nick: formatting done
This commit is contained in:
@@ -24,55 +24,79 @@ export async function getLinksFromSitemap(
|
||||
try {
|
||||
if (mode === "fire-engine" && useFireEngine) {
|
||||
const fetchResponse = await scrapeURL(
|
||||
"sitemap",
|
||||
"sitemap",
|
||||
sitemapUrl,
|
||||
scrapeOptions.parse({ formats: ["rawHtml"] }),
|
||||
{ forceEngine: "fetch" },
|
||||
);
|
||||
|
||||
if (fetchResponse.success && (fetchResponse.document.metadata.statusCode >= 200 && fetchResponse.document.metadata.statusCode < 300)) {
|
||||
if (
|
||||
fetchResponse.success &&
|
||||
fetchResponse.document.metadata.statusCode >= 200 &&
|
||||
fetchResponse.document.metadata.statusCode < 300
|
||||
) {
|
||||
content = fetchResponse.document.rawHtml!;
|
||||
} else {
|
||||
logger.debug(
|
||||
"Failed to scrape sitemap via fetch, falling back to TLSClient...",
|
||||
{ error: fetchResponse.success ? fetchResponse.document : fetchResponse.error },
|
||||
{
|
||||
error: fetchResponse.success
|
||||
? fetchResponse.document
|
||||
: fetchResponse.error,
|
||||
},
|
||||
);
|
||||
|
||||
const tlsResponse = await scrapeURL(
|
||||
"sitemap",
|
||||
"sitemap",
|
||||
sitemapUrl,
|
||||
scrapeOptions.parse({ formats: ["rawHtml"] }),
|
||||
{ forceEngine: "fire-engine;tlsclient", v0DisableJsDom: true },
|
||||
);
|
||||
|
||||
if (tlsResponse.success && (tlsResponse.document.metadata.statusCode >= 200 && tlsResponse.document.metadata.statusCode < 300)) {
|
||||
if (
|
||||
tlsResponse.success &&
|
||||
tlsResponse.document.metadata.statusCode >= 200 &&
|
||||
tlsResponse.document.metadata.statusCode < 300
|
||||
) {
|
||||
content = tlsResponse.document.rawHtml!;
|
||||
} else {
|
||||
logger.error(`Request failed for ${sitemapUrl}, ran out of engines!`, {
|
||||
method: "getLinksFromSitemap",
|
||||
mode,
|
||||
sitemapUrl,
|
||||
error: tlsResponse.success ? tlsResponse.document : tlsResponse.error,
|
||||
});
|
||||
logger.error(
|
||||
`Request failed for ${sitemapUrl}, ran out of engines!`,
|
||||
{
|
||||
method: "getLinksFromSitemap",
|
||||
mode,
|
||||
sitemapUrl,
|
||||
error: tlsResponse.success
|
||||
? tlsResponse.document
|
||||
: tlsResponse.error,
|
||||
},
|
||||
);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const fetchResponse = await scrapeURL(
|
||||
"sitemap",
|
||||
"sitemap",
|
||||
sitemapUrl,
|
||||
scrapeOptions.parse({ formats: ["rawHtml"] }),
|
||||
{ forceEngine: "fetch" },
|
||||
);
|
||||
|
||||
if (fetchResponse.success && (fetchResponse.document.metadata.statusCode >= 200 && fetchResponse.document.metadata.statusCode < 300)) {
|
||||
if (
|
||||
fetchResponse.success &&
|
||||
fetchResponse.document.metadata.statusCode >= 200 &&
|
||||
fetchResponse.document.metadata.statusCode < 300
|
||||
) {
|
||||
content = fetchResponse.document.rawHtml!;
|
||||
} else {
|
||||
logger.error(`Request failed for ${sitemapUrl}, ran out of engines!`, {
|
||||
method: "getLinksFromSitemap",
|
||||
mode,
|
||||
sitemapUrl,
|
||||
});
|
||||
logger.error(
|
||||
`Request failed for ${sitemapUrl}, ran out of engines!`,
|
||||
{
|
||||
method: "getLinksFromSitemap",
|
||||
mode,
|
||||
sitemapUrl,
|
||||
},
|
||||
);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@@ -165,13 +189,20 @@ export const fetchSitemapData = async (
|
||||
const sitemapUrl = url.endsWith("/sitemap.xml") ? url : `${url}/sitemap.xml`;
|
||||
try {
|
||||
const fetchResponse = await scrapeURL(
|
||||
"sitemap",
|
||||
"sitemap",
|
||||
sitemapUrl,
|
||||
scrapeOptions.parse({ formats: ["rawHtml"], timeout: timeout || axiosTimeout }),
|
||||
scrapeOptions.parse({
|
||||
formats: ["rawHtml"],
|
||||
timeout: timeout || axiosTimeout,
|
||||
}),
|
||||
{ forceEngine: "fetch" },
|
||||
);
|
||||
|
||||
if (fetchResponse.success && (fetchResponse.document.metadata.statusCode >= 200 && fetchResponse.document.metadata.statusCode < 300)) {
|
||||
if (
|
||||
fetchResponse.success &&
|
||||
fetchResponse.document.metadata.statusCode >= 200 &&
|
||||
fetchResponse.document.metadata.statusCode < 300
|
||||
) {
|
||||
const xml = fetchResponse.document.rawHtml!;
|
||||
const parsedXml = await parseStringPromise(xml);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user