Added pageOptions.removeTags

This commit is contained in:
rafaelsideguide
2024-06-13 10:51:05 -03:00
parent bad0f57134
commit 676d6e8ab5
8 changed files with 84 additions and 4 deletions
@@ -304,6 +304,19 @@ export async function scrapSingleUrl(
const removeUnwantedElements = (html: string, pageOptions: PageOptions) => {
const soup = cheerio.load(html);
soup("script, style, iframe, noscript, meta, head").remove();
if (pageOptions.removeTags) {
if (typeof pageOptions.removeTags === 'string') {
pageOptions.removeTags.split(',').forEach((tag) => {
soup(tag.trim()).remove();
});
} else if (Array.isArray(pageOptions.removeTags)) {
pageOptions.removeTags.forEach((tag) => {
soup(tag).remove();
});
}
}
if (pageOptions.onlyMainContent) {
// remove any other tags that are not in the main content
excludeNonMainTags.forEach((tag) => {