2025-01-13 22:30:15 -03:00
|
|
|
import { logger } from "../../lib/logger";
|
|
|
|
|
import { normalizeUrlOnlyHostname } from "../../lib/canonical-url";
|
|
|
|
|
import { supabase_service } from "../../services/supabase";
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Query the sitemap index for a given URL
|
|
|
|
|
* @param url The URL to query
|
|
|
|
|
* @returns A list of URLs found in the sitemap index aggregated from all sitemaps
|
|
|
|
|
*/
|
|
|
|
|
import { withAuth } from "../../lib/withAuth";
|
|
|
|
|
|
|
|
|
|
async function querySitemapIndexFunction(url: string) {
|
|
|
|
|
const originUrl = normalizeUrlOnlyHostname(url);
|
|
|
|
|
|
2025-01-19 12:33:44 -03:00
|
|
|
for (let attempt = 1; attempt <= 3; attempt++) {
|
|
|
|
|
try {
|
|
|
|
|
const { data, error } = await supabase_service
|
|
|
|
|
.from("crawl_maps")
|
|
|
|
|
.select("urls")
|
|
|
|
|
.eq("origin_url", originUrl);
|
|
|
|
|
|
|
|
|
|
if (error) {
|
|
|
|
|
throw error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const allUrls = data.map((entry) => entry.urls).flat();
|
|
|
|
|
return allUrls;
|
2025-01-13 22:30:15 -03:00
|
|
|
|
2025-01-19 12:33:44 -03:00
|
|
|
} catch (error) {
|
|
|
|
|
logger.error("(sitemap-index) Error querying the index", {
|
|
|
|
|
error,
|
|
|
|
|
attempt
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (attempt === 3) {
|
|
|
|
|
return [];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-01-13 22:30:15 -03:00
|
|
|
|
2025-01-19 12:33:44 -03:00
|
|
|
return [];
|
2025-01-13 22:30:15 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export const querySitemapIndex = withAuth(querySitemapIndexFunction, []);
|