transcribed from e2e to unit tests for many cases

2024-06-17 17:09:44 -03:00
parent a20d002a6b
commit b2bd562bb2
9 changed files with 1635 additions and 707 deletions
@@ -7,7 +7,7 @@ import { getAdjustedMaxDepth } from '../utils/maxDepthUtils';
 jest.mock('axios');
 jest.mock('robots-parser');

-describe('WebCrawler maxDepth and filterLinks', () => {
+describe('WebCrawler', () => {
  let crawler: WebCrawler;
  const mockAxios = axios as jest.Mocked<typeof axios>;
  const mockRobotsParser = robotsParser as jest.MockedFunction<typeof robotsParser>;
@@ -156,8 +156,37 @@ describe('WebCrawler maxDepth and filterLinks', () => {
    ]);   
  });

-
-
-  // Add more tests to cover other scenarios, such as checking includes and excludes
+  it('should handle allowBackwardCrawling option correctly', async () => {
+    const initialUrl = 'https://mendable.ai/blog';
+  
+    // Setup the crawler with the specific test case options
+    const crawler = new WebCrawler({
+      initialUrl: initialUrl,
+      includes: [],
+      excludes: [],
+      limit: 100,
+      maxCrawledDepth: 3, // Example depth
+      allowBackwardCrawling: true
+    });
+  
+    // Mock the sitemap fetching function to simulate backward crawling
+    crawler['tryFetchSitemapLinks'] = jest.fn().mockResolvedValue([
+      initialUrl,
+      'https://mendable.ai', // backward link
+      initialUrl + '/page1',
+      initialUrl + '/page1/page2'
+    ]);
+  
+    const results = await crawler.start();
+    expect(results).toEqual([
+      { url: initialUrl, html: '' },
+      { url: 'https://mendable.ai', html: '' }, // Expect the backward link to be included
+      { url: initialUrl + '/page1', html: '' },
+      { url: initialUrl + '/page1/page2', html: '' }
+    ]);
+  
+    // Check that the backward link is included if allowBackwardCrawling is true
+    expect(results.some(r => r.url === 'https://mendable.ai')).toBe(true);
+  });
 });

@@ -0,0 +1,24 @@
+jest.mock('../single_url', () => {
+  const originalModule = jest.requireActual('../single_url');
+  originalModule.fetchHtmlContent = jest.fn().mockResolvedValue('<html><head><title>Test</title></head><body><h1>Roast</h1></body></html>');
+
+  return originalModule;
+});
+
+import { scrapSingleUrl } from '../single_url';
+import { PageOptions } from '../../../lib/entities';
+
+describe('scrapSingleUrl', () => {
+  it('should handle includeHtml option correctly', async () => {
+    const url = 'https://roastmywebsite.ai';
+    const pageOptionsWithHtml: PageOptions = { includeHtml: true };
+    const pageOptionsWithoutHtml: PageOptions = { includeHtml: false };
+
+    const resultWithHtml = await scrapSingleUrl(url, pageOptionsWithHtml);
+    const resultWithoutHtml = await scrapSingleUrl(url, pageOptionsWithoutHtml);
+
+    expect(resultWithHtml.html).toBeDefined();
+    expect(resultWithoutHtml.html).toBeUndefined();
+  }, 10000);
+});
+