Nick: fixed prettier

This commit is contained in:
Nicolas
2024-12-11 19:46:11 -03:00
parent e5fe9e1534
commit 00335e2ba9
134 changed files with 9565 additions and 7108 deletions
+137 -101
View File
@@ -1,114 +1,150 @@
import axios from 'axios';
import * as cheerio from 'cheerio';
import * as querystring from 'querystring';
import { SearchResult } from '../../src/lib/entities';
import { logger } from '../../src/lib/logger';
import axios from "axios";
import * as cheerio from "cheerio";
import * as querystring from "querystring";
import { SearchResult } from "../../src/lib/entities";
import { logger } from "../../src/lib/logger";
const _useragent_list = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0'
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"
];
function get_useragent(): string {
return _useragent_list[Math.floor(Math.random() * _useragent_list.length)];
return _useragent_list[Math.floor(Math.random() * _useragent_list.length)];
}
async function _req(term: string, results: number, lang: string, country: string, start: number, proxies: any, timeout: number, tbs: string | undefined = undefined, filter: string | undefined = undefined) {
const params = {
"q": term,
"num": results, // Number of results to return
"hl": lang,
"gl": country,
"start": start,
};
if (tbs) {
params["tbs"] = tbs;
async function _req(
term: string,
results: number,
lang: string,
country: string,
start: number,
proxies: any,
timeout: number,
tbs: string | undefined = undefined,
filter: string | undefined = undefined
) {
const params = {
q: term,
num: results, // Number of results to return
hl: lang,
gl: country,
start: start
};
if (tbs) {
params["tbs"] = tbs;
}
if (filter) {
params["filter"] = filter;
}
try {
const resp = await axios.get("https://www.google.com/search", {
headers: {
"User-Agent": get_useragent()
},
params: params,
proxy: proxies,
timeout: timeout
});
return resp;
} catch (error) {
if (error.response && error.response.status === 429) {
throw new Error("Google Search: Too many requests, try again later.");
}
if (filter) {
params["filter"] = filter;
throw error;
}
}
export async function googleSearch(
term: string,
advanced = false,
num_results = 7,
tbs = undefined as string | undefined,
filter = undefined as string | undefined,
lang = "en",
country = "us",
proxy = undefined as string | undefined,
sleep_interval = 0,
timeout = 5000
): Promise<SearchResult[]> {
let proxies: any = null;
if (proxy) {
if (proxy.startsWith("https")) {
proxies = { https: proxy };
} else {
proxies = { http: proxy };
}
}
// TODO: knowledge graph, answer box, etc.
let start = 0;
let results: SearchResult[] = [];
let attempts = 0;
const maxAttempts = 20; // Define a maximum number of attempts to prevent infinite loop
while (start < num_results && attempts < maxAttempts) {
try {
const resp = await axios.get("https://www.google.com/search", {
headers: {
"User-Agent": get_useragent()
},
params: params,
proxy: proxies,
timeout: timeout,
});
return resp;
const resp = await _req(
term,
num_results - start,
lang,
country,
start,
proxies,
timeout,
tbs,
filter
);
const $ = cheerio.load(resp.data);
const result_block = $("div.g");
if (result_block.length === 0) {
start += 1;
attempts += 1;
} else {
attempts = 0; // Reset attempts if we have results
}
result_block.each((index, element) => {
const linkElement = $(element).find("a");
const link =
linkElement && linkElement.attr("href")
? linkElement.attr("href")
: null;
const title = $(element).find("h3");
const ogImage = $(element).find("img").eq(1).attr("src");
const description_box = $(element).find(
"div[style='-webkit-line-clamp:2']"
);
const answerBox = $(element).find(".mod").text();
if (description_box) {
const description = description_box.text();
if (link && title && description) {
start += 1;
results.push(new SearchResult(link, title.text(), description));
}
}
});
await new Promise((resolve) =>
setTimeout(resolve, sleep_interval * 1000)
);
} catch (error) {
if (error.response && error.response.status === 429) {
throw new Error('Google Search: Too many requests, try again later.');
}
throw error;
if (error.message === "Too many requests") {
logger.warn("Too many requests, breaking the loop");
break;
}
throw error;
}
}
export async function googleSearch(term: string, advanced = false, num_results = 7, tbs = undefined as string | undefined, filter = undefined as string | undefined, lang = "en", country = "us", proxy = undefined as string | undefined, sleep_interval = 0, timeout = 5000, ) :Promise<SearchResult[]> {
let proxies: any = null;
if (proxy) {
if (proxy.startsWith("https")) {
proxies = {"https": proxy};
} else {
proxies = {"http": proxy};
}
}
// TODO: knowledge graph, answer box, etc.
let start = 0;
let results : SearchResult[] = [];
let attempts = 0;
const maxAttempts = 20; // Define a maximum number of attempts to prevent infinite loop
while (start < num_results && attempts < maxAttempts) {
try {
const resp = await _req(term, num_results - start, lang, country, start, proxies, timeout, tbs, filter);
const $ = cheerio.load(resp.data);
const result_block = $("div.g");
if (result_block.length === 0) {
start += 1;
attempts += 1;
} else {
attempts = 0; // Reset attempts if we have results
}
result_block.each((index, element) => {
const linkElement = $(element).find("a");
const link = linkElement && linkElement.attr("href") ? linkElement.attr("href") : null;
const title = $(element).find("h3");
const ogImage = $(element).find("img").eq(1).attr("src");
const description_box = $(element).find("div[style='-webkit-line-clamp:2']");
const answerBox = $(element).find(".mod").text();
if (description_box) {
const description = description_box.text();
if (link && title && description) {
start += 1;
results.push(new SearchResult(link, title.text(), description));
}
}
});
await new Promise(resolve => setTimeout(resolve, sleep_interval * 1000));
} catch (error) {
if (error.message === 'Too many requests') {
logger.warn('Too many requests, breaking the loop');
break;
}
throw error;
}
if (start === 0) {
return results;
}
}
if (attempts >= maxAttempts) {
logger.warn('Max attempts reached, breaking the loop');
}
return results
if (start === 0) {
return results;
}
}
if (attempts >= maxAttempts) {
logger.warn("Max attempts reached, breaking the loop");
}
return results;
}