fix(scraper): improve charset detection regex to accurately parse meta tags (#1265)
This commit is contained in:
@@ -74,7 +74,7 @@ export async function scrapeURLWithFetch(
|
|||||||
|
|
||||||
const buf = Buffer.from(await x.arrayBuffer());
|
const buf = Buffer.from(await x.arrayBuffer());
|
||||||
let text = buf.toString("utf8");
|
let text = buf.toString("utf8");
|
||||||
const charset = (text.match(/charset=["']?(.+?)["']?>/) ?? [])[1]
|
const charset = (text.match(/<meta\b[^>]*charset\s*=\s*["']?([^"'\s\/>]+)/i) ?? [])[1]
|
||||||
try {
|
try {
|
||||||
if (charset) {
|
if (charset) {
|
||||||
text = new TextDecoder(charset.trim()).decode(buf);
|
text = new TextDecoder(charset.trim()).decode(buf);
|
||||||
|
|||||||
Reference in New Issue
Block a user