transcribed from e2e to unit tests for many cases
This commit is contained in:
@@ -7,7 +7,7 @@ import { getAdjustedMaxDepth } from '../utils/maxDepthUtils';
|
||||
jest.mock('axios');
|
||||
jest.mock('robots-parser');
|
||||
|
||||
describe('WebCrawler maxDepth and filterLinks', () => {
|
||||
describe('WebCrawler', () => {
|
||||
let crawler: WebCrawler;
|
||||
const mockAxios = axios as jest.Mocked<typeof axios>;
|
||||
const mockRobotsParser = robotsParser as jest.MockedFunction<typeof robotsParser>;
|
||||
@@ -156,8 +156,37 @@ describe('WebCrawler maxDepth and filterLinks', () => {
|
||||
]);
|
||||
});
|
||||
|
||||
|
||||
|
||||
// Add more tests to cover other scenarios, such as checking includes and excludes
|
||||
it('should handle allowBackwardCrawling option correctly', async () => {
|
||||
const initialUrl = 'https://mendable.ai/blog';
|
||||
|
||||
// Setup the crawler with the specific test case options
|
||||
const crawler = new WebCrawler({
|
||||
initialUrl: initialUrl,
|
||||
includes: [],
|
||||
excludes: [],
|
||||
limit: 100,
|
||||
maxCrawledDepth: 3, // Example depth
|
||||
allowBackwardCrawling: true
|
||||
});
|
||||
|
||||
// Mock the sitemap fetching function to simulate backward crawling
|
||||
crawler['tryFetchSitemapLinks'] = jest.fn().mockResolvedValue([
|
||||
initialUrl,
|
||||
'https://mendable.ai', // backward link
|
||||
initialUrl + '/page1',
|
||||
initialUrl + '/page1/page2'
|
||||
]);
|
||||
|
||||
const results = await crawler.start();
|
||||
expect(results).toEqual([
|
||||
{ url: initialUrl, html: '' },
|
||||
{ url: 'https://mendable.ai', html: '' }, // Expect the backward link to be included
|
||||
{ url: initialUrl + '/page1', html: '' },
|
||||
{ url: initialUrl + '/page1/page2', html: '' }
|
||||
]);
|
||||
|
||||
// Check that the backward link is included if allowBackwardCrawling is true
|
||||
expect(results.some(r => r.url === 'https://mendable.ai')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
jest.mock('../single_url', () => {
|
||||
const originalModule = jest.requireActual('../single_url');
|
||||
originalModule.fetchHtmlContent = jest.fn().mockResolvedValue('<html><head><title>Test</title></head><body><h1>Roast</h1></body></html>');
|
||||
|
||||
return originalModule;
|
||||
});
|
||||
|
||||
import { scrapSingleUrl } from '../single_url';
|
||||
import { PageOptions } from '../../../lib/entities';
|
||||
|
||||
describe('scrapSingleUrl', () => {
|
||||
it('should handle includeHtml option correctly', async () => {
|
||||
const url = 'https://roastmywebsite.ai';
|
||||
const pageOptionsWithHtml: PageOptions = { includeHtml: true };
|
||||
const pageOptionsWithoutHtml: PageOptions = { includeHtml: false };
|
||||
|
||||
const resultWithHtml = await scrapSingleUrl(url, pageOptionsWithHtml);
|
||||
const resultWithoutHtml = await scrapSingleUrl(url, pageOptionsWithoutHtml);
|
||||
|
||||
expect(resultWithHtml.html).toBeDefined();
|
||||
expect(resultWithoutHtml.html).toBeUndefined();
|
||||
}, 10000);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user