fix(scrapeURL): includeTags/excludeTags
This commit is contained in:
@@ -56,7 +56,7 @@ export const removeUnwantedElements = (
|
|||||||
) => {
|
) => {
|
||||||
const soup = load(html);
|
const soup = load(html);
|
||||||
|
|
||||||
if (scrapeOptions.includeTags && scrapeOptions.includeTags.length > 0) {
|
if (scrapeOptions.includeTags && scrapeOptions.includeTags.filter(x => x.trim().length !== 0).length > 0) {
|
||||||
// Create a new root element to hold the tags to keep
|
// Create a new root element to hold the tags to keep
|
||||||
const newRoot = load("<div></div>")("div");
|
const newRoot = load("<div></div>")("div");
|
||||||
scrapeOptions.includeTags.forEach((tag) => {
|
scrapeOptions.includeTags.forEach((tag) => {
|
||||||
@@ -69,7 +69,7 @@ export const removeUnwantedElements = (
|
|||||||
|
|
||||||
soup("script, style, noscript, meta, head").remove();
|
soup("script, style, noscript, meta, head").remove();
|
||||||
|
|
||||||
if (scrapeOptions.excludeTags && scrapeOptions.excludeTags.length > 0) {
|
if (scrapeOptions.excludeTags && scrapeOptions.excludeTags.filter(x => x.trim().length !== 0).length > 0) {
|
||||||
scrapeOptions.excludeTags.forEach((tag) => {
|
scrapeOptions.excludeTags.forEach((tag) => {
|
||||||
let elementsToRemove: Cheerio<AnyNode>;
|
let elementsToRemove: Cheerio<AnyNode>;
|
||||||
if (tag.startsWith("*") && tag.endsWith("*")) {
|
if (tag.startsWith("*") && tag.endsWith("*")) {
|
||||||
|
|||||||
Reference in New Issue
Block a user