feat: scrape event logging to DB

This commit is contained in:
Gergo Moricz
2024-07-24 14:31:25 +02:00
parent 6208ecdbc0
commit 7cd9bf92e3
12 changed files with 118 additions and 7 deletions
@@ -42,6 +42,7 @@ describe('WebCrawler', () => {
crawler = new WebCrawler({
jobId: "TEST",
initialUrl: initialUrl,
includes: [],
excludes: [],
@@ -76,6 +77,7 @@ describe('WebCrawler', () => {
crawler = new WebCrawler({
jobId: "TEST",
initialUrl: initialUrl,
includes: [],
excludes: [],
@@ -104,6 +106,7 @@ describe('WebCrawler', () => {
crawler = new WebCrawler({
jobId: "TEST",
initialUrl: initialUrl,
includes: [],
excludes: [],
@@ -133,6 +136,7 @@ describe('WebCrawler', () => {
crawler = new WebCrawler({
jobId: "TEST",
initialUrl: initialUrl,
includes: [],
excludes: [],
@@ -161,6 +165,7 @@ describe('WebCrawler', () => {
// Setup the crawler with the specific test case options
const crawler = new WebCrawler({
jobId: "TEST",
initialUrl: initialUrl,
includes: [],
excludes: [],
@@ -194,6 +199,7 @@ describe('WebCrawler', () => {
const limit = 2; // Set a limit for the number of links
crawler = new WebCrawler({
jobId: "TEST",
initialUrl: initialUrl,
includes: [],
excludes: [],
@@ -15,8 +15,8 @@ describe('scrapSingleUrl', () => {
const pageOptionsWithHtml: PageOptions = { includeHtml: true };
const pageOptionsWithoutHtml: PageOptions = { includeHtml: false };
const resultWithHtml = await scrapSingleUrl(url, pageOptionsWithHtml);
const resultWithoutHtml = await scrapSingleUrl(url, pageOptionsWithoutHtml);
const resultWithHtml = await scrapSingleUrl("TEST", url, pageOptionsWithHtml);
const resultWithoutHtml = await scrapSingleUrl("TEST", url, pageOptionsWithoutHtml);
expect(resultWithHtml.html).toBeDefined();
expect(resultWithoutHtml.html).toBeUndefined();
@@ -27,7 +27,7 @@ it('should return a list of links on the mendable.ai page', async () => {
const url = 'https://mendable.ai';
const pageOptions: PageOptions = { includeHtml: true };
const result = await scrapSingleUrl(url, pageOptions);
const result = await scrapSingleUrl("TEST", url, pageOptions);
// Check if the result contains a list of links
expect(result.linksOnPage).toBeDefined();