transcribed from e2e to unit tests for many cases

This commit is contained in:
rafaelsideguide
2024-06-17 17:09:44 -03:00
parent a20d002a6b
commit b2bd562bb2
9 changed files with 1635 additions and 707 deletions
@@ -7,7 +7,7 @@ import { getAdjustedMaxDepth } from '../utils/maxDepthUtils';
jest.mock('axios');
jest.mock('robots-parser');
describe('WebCrawler maxDepth and filterLinks', () => {
describe('WebCrawler', () => {
let crawler: WebCrawler;
const mockAxios = axios as jest.Mocked<typeof axios>;
const mockRobotsParser = robotsParser as jest.MockedFunction<typeof robotsParser>;
@@ -156,8 +156,37 @@ describe('WebCrawler maxDepth and filterLinks', () => {
]);
});
// Add more tests to cover other scenarios, such as checking includes and excludes
it('should handle allowBackwardCrawling option correctly', async () => {
const initialUrl = 'https://mendable.ai/blog';
// Setup the crawler with the specific test case options
const crawler = new WebCrawler({
initialUrl: initialUrl,
includes: [],
excludes: [],
limit: 100,
maxCrawledDepth: 3, // Example depth
allowBackwardCrawling: true
});
// Mock the sitemap fetching function to simulate backward crawling
crawler['tryFetchSitemapLinks'] = jest.fn().mockResolvedValue([
initialUrl,
'https://mendable.ai', // backward link
initialUrl + '/page1',
initialUrl + '/page1/page2'
]);
const results = await crawler.start();
expect(results).toEqual([
{ url: initialUrl, html: '' },
{ url: 'https://mendable.ai', html: '' }, // Expect the backward link to be included
{ url: initialUrl + '/page1', html: '' },
{ url: initialUrl + '/page1/page2', html: '' }
]);
// Check that the backward link is included if allowBackwardCrawling is true
expect(results.some(r => r.url === 'https://mendable.ai')).toBe(true);
});
});
@@ -0,0 +1,24 @@
jest.mock('../single_url', () => {
const originalModule = jest.requireActual('../single_url');
originalModule.fetchHtmlContent = jest.fn().mockResolvedValue('<html><head><title>Test</title></head><body><h1>Roast</h1></body></html>');
return originalModule;
});
import { scrapSingleUrl } from '../single_url';
import { PageOptions } from '../../../lib/entities';
describe('scrapSingleUrl', () => {
it('should handle includeHtml option correctly', async () => {
const url = 'https://roastmywebsite.ai';
const pageOptionsWithHtml: PageOptions = { includeHtml: true };
const pageOptionsWithoutHtml: PageOptions = { includeHtml: false };
const resultWithHtml = await scrapSingleUrl(url, pageOptionsWithHtml);
const resultWithoutHtml = await scrapSingleUrl(url, pageOptionsWithoutHtml);
expect(resultWithHtml.html).toBeDefined();
expect(resultWithoutHtml.html).toBeUndefined();
}, 10000);
});