Nick: formatting + error handling
This commit is contained in:
@@ -77,51 +77,71 @@ export function extractMetadata(soup: CheerioAPI, url: string): Metadata {
|
|||||||
title = soup("title").text() || null;
|
title = soup("title").text() || null;
|
||||||
description = soup('meta[name="description"]').attr("content") || null;
|
description = soup('meta[name="description"]').attr("content") || null;
|
||||||
|
|
||||||
language = soup('html').attr('lang') || null;
|
language = soup("html").attr("lang") || null;
|
||||||
|
|
||||||
keywords = soup('meta[name="keywords"]').attr("content") || null;
|
keywords = soup('meta[name="keywords"]').attr("content") || null;
|
||||||
robots = soup('meta[name="robots"]').attr("content") || null;
|
robots = soup('meta[name="robots"]').attr("content") || null;
|
||||||
ogTitle = soup('meta[property="og:title"]').attr("content") || null;
|
ogTitle = soup('meta[property="og:title"]').attr("content") || null;
|
||||||
ogDescription = soup('meta[property="og:description"]').attr("content") || null;
|
ogDescription =
|
||||||
|
soup('meta[property="og:description"]').attr("content") || null;
|
||||||
ogUrl = soup('meta[property="og:url"]').attr("content") || null;
|
ogUrl = soup('meta[property="og:url"]').attr("content") || null;
|
||||||
ogImage = soup('meta[property="og:image"]').attr("content") || null;
|
ogImage = soup('meta[property="og:image"]').attr("content") || null;
|
||||||
ogAudio = soup('meta[property="og:audio"]').attr("content") || null;
|
ogAudio = soup('meta[property="og:audio"]').attr("content") || null;
|
||||||
ogDeterminer = soup('meta[property="og:determiner"]').attr("content") || null;
|
ogDeterminer =
|
||||||
|
soup('meta[property="og:determiner"]').attr("content") || null;
|
||||||
ogLocale = soup('meta[property="og:locale"]').attr("content") || null;
|
ogLocale = soup('meta[property="og:locale"]').attr("content") || null;
|
||||||
ogLocaleAlternate = soup('meta[property="og:locale:alternate"]').map((i, el) => soup(el).attr("content")).get() || null;
|
ogLocaleAlternate =
|
||||||
|
soup('meta[property="og:locale:alternate"]')
|
||||||
|
.map((i, el) => soup(el).attr("content"))
|
||||||
|
.get() || null;
|
||||||
ogSiteName = soup('meta[property="og:site_name"]').attr("content") || null;
|
ogSiteName = soup('meta[property="og:site_name"]').attr("content") || null;
|
||||||
ogVideo = soup('meta[property="og:video"]').attr("content") || null;
|
ogVideo = soup('meta[property="og:video"]').attr("content") || null;
|
||||||
articleSection = soup('meta[name="article:section"]').attr("content") || null;
|
articleSection =
|
||||||
|
soup('meta[name="article:section"]').attr("content") || null;
|
||||||
articleTag = soup('meta[name="article:tag"]').attr("content") || null;
|
articleTag = soup('meta[name="article:tag"]').attr("content") || null;
|
||||||
publishedTime = soup('meta[property="article:published_time"]').attr("content") || null;
|
publishedTime =
|
||||||
modifiedTime = soup('meta[property="article:modified_time"]').attr("content") || null;
|
soup('meta[property="article:published_time"]').attr("content") || null;
|
||||||
dctermsKeywords = soup('meta[name="dcterms.keywords"]').attr("content") || null;
|
modifiedTime =
|
||||||
|
soup('meta[property="article:modified_time"]').attr("content") || null;
|
||||||
|
dctermsKeywords =
|
||||||
|
soup('meta[name="dcterms.keywords"]').attr("content") || null;
|
||||||
dcDescription = soup('meta[name="dc.description"]').attr("content") || null;
|
dcDescription = soup('meta[name="dc.description"]').attr("content") || null;
|
||||||
dcSubject = soup('meta[name="dc.subject"]').attr("content") || null;
|
dcSubject = soup('meta[name="dc.subject"]').attr("content") || null;
|
||||||
dctermsSubject = soup('meta[name="dcterms.subject"]').attr("content") || null;
|
dctermsSubject =
|
||||||
dctermsAudience = soup('meta[name="dcterms.audience"]').attr("content") || null;
|
soup('meta[name="dcterms.subject"]').attr("content") || null;
|
||||||
|
dctermsAudience =
|
||||||
|
soup('meta[name="dcterms.audience"]').attr("content") || null;
|
||||||
dcType = soup('meta[name="dc.type"]').attr("content") || null;
|
dcType = soup('meta[name="dc.type"]').attr("content") || null;
|
||||||
dctermsType = soup('meta[name="dcterms.type"]').attr("content") || null;
|
dctermsType = soup('meta[name="dcterms.type"]').attr("content") || null;
|
||||||
dcDate = soup('meta[name="dc.date"]').attr("content") || null;
|
dcDate = soup('meta[name="dc.date"]').attr("content") || null;
|
||||||
dcDateCreated = soup('meta[name="dc.date.created"]').attr("content") || null;
|
dcDateCreated =
|
||||||
dctermsCreated = soup('meta[name="dcterms.created"]').attr("content") || null;
|
soup('meta[name="dc.date.created"]').attr("content") || null;
|
||||||
|
dctermsCreated =
|
||||||
|
soup('meta[name="dcterms.created"]').attr("content") || null;
|
||||||
|
|
||||||
// Extract all meta tags for custom metadata
|
try {
|
||||||
soup("meta").each((i, elem) => {
|
// Extract all meta tags for custom metadata
|
||||||
const name = soup(elem).attr("name") || soup(elem).attr("property");
|
soup("meta").each((i, elem) => {
|
||||||
const content = soup(elem).attr("content");
|
try {
|
||||||
|
const name = soup(elem).attr("name") || soup(elem).attr("property");
|
||||||
|
const content = soup(elem).attr("content");
|
||||||
|
|
||||||
if (name && content) {
|
if (name && content) {
|
||||||
if (customMetadata[name] === undefined) {
|
if (customMetadata[name] === undefined) {
|
||||||
customMetadata[name] = content;
|
customMetadata[name] = content;
|
||||||
} else if (Array.isArray(customMetadata[name])) {
|
} else if (Array.isArray(customMetadata[name])) {
|
||||||
(customMetadata[name] as string[]).push(content);
|
(customMetadata[name] as string[]).push(content);
|
||||||
} else {
|
} else {
|
||||||
customMetadata[name] = [customMetadata[name] as string, content];
|
customMetadata[name] = [customMetadata[name] as string, content];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
Logger.error(`Error extracting custom metadata (in): ${error}`);
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
});
|
} catch (error) {
|
||||||
|
Logger.error(`Error extracting custom metadata: ${error}`);
|
||||||
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
Logger.error(`Error extracting metadata: ${error}`);
|
Logger.error(`Error extracting metadata: ${error}`);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user