83 lines
1.7 KiB
TypeScript
83 lines
1.7 KiB
TypeScript
import { logger } from "../../../lib/logger";
|
|
|
|
const socialMediaBlocklist = [
|
|
"facebook.com",
|
|
"x.com",
|
|
"twitter.com",
|
|
"instagram.com",
|
|
"linkedin.com",
|
|
"snapchat.com",
|
|
"tiktok.com",
|
|
"reddit.com",
|
|
"tumblr.com",
|
|
"flickr.com",
|
|
"whatsapp.com",
|
|
"wechat.com",
|
|
"telegram.org",
|
|
"researchhub.com",
|
|
"youtube.com",
|
|
"corterix.com",
|
|
"southwest.com",
|
|
"ryanair.com",
|
|
];
|
|
|
|
const allowedKeywords = [
|
|
"pulse",
|
|
"privacy",
|
|
"terms",
|
|
"policy",
|
|
"user-agreement",
|
|
"legal",
|
|
"help",
|
|
"policies",
|
|
"support",
|
|
"contact",
|
|
"about",
|
|
"careers",
|
|
"blog",
|
|
"press",
|
|
"conditions",
|
|
"tos",
|
|
"://library.tiktok.com",
|
|
"://ads.tiktok.com",
|
|
"://tiktok.com/business",
|
|
"://developers.facebook.com",
|
|
];
|
|
|
|
export function isUrlBlocked(url: string): boolean {
|
|
const lowerCaseUrl = url.toLowerCase();
|
|
|
|
// Check if the URL contains any allowed keywords as whole words
|
|
if (
|
|
allowedKeywords.some((keyword) =>
|
|
new RegExp(`\\b${keyword}\\b`, "i").test(lowerCaseUrl),
|
|
)
|
|
) {
|
|
return false;
|
|
}
|
|
|
|
try {
|
|
if (!url.startsWith("http://") && !url.startsWith("https://")) {
|
|
url = "https://" + url;
|
|
}
|
|
|
|
const urlObj = new URL(url);
|
|
const hostname = urlObj.hostname.toLowerCase();
|
|
|
|
// Check if the URL matches any domain in the blocklist
|
|
const isBlocked = socialMediaBlocklist.some((domain) => {
|
|
const domainPattern = new RegExp(
|
|
`(^|\\.)${domain.replace(".", "\\.")}(\\.|$)`,
|
|
"i",
|
|
);
|
|
return domainPattern.test(hostname);
|
|
});
|
|
|
|
return isBlocked;
|
|
} catch (e) {
|
|
// If an error occurs (e.g., invalid URL), return false
|
|
logger.error(`Error parsing the following URL: ${url}`);
|
|
return false;
|
|
}
|
|
}
|