Nick: fixed prettier

2024-12-11 19:46:11 -03:00
parent e5fe9e1534
commit 00335e2ba9
134 changed files with 9565 additions and 7108 deletions
@@ -1,27 +1,29 @@
 // crawler.test.ts
-import { WebCrawler } from '../crawler';
-import axios from 'axios';
-import robotsParser from 'robots-parser';
+import { WebCrawler } from "../crawler";
+import axios from "axios";
+import robotsParser from "robots-parser";

-jest.mock('axios');
-jest.mock('robots-parser');
+jest.mock("axios");
+jest.mock("robots-parser");

-describe('WebCrawler', () => {
+describe("WebCrawler", () => {
  let crawler: WebCrawler;
  const mockAxios = axios as jest.Mocked<typeof axios>;
-  const mockRobotsParser = robotsParser as jest.MockedFunction<typeof robotsParser>;
+  const mockRobotsParser = robotsParser as jest.MockedFunction<
+    typeof robotsParser
+  >;

  let maxCrawledDepth: number;

  beforeEach(() => {
    // Setup default mocks
    mockAxios.get.mockImplementation((url) => {
-      if (url.includes('robots.txt')) {
-        return Promise.resolve({ data: 'User-agent: *\nAllow: /' });
-      } else if (url.includes('sitemap.xml')) {
-        return Promise.resolve({ data: 'sitemap content' }); // You would normally parse this to URLs
+      if (url.includes("robots.txt")) {
+        return Promise.resolve({ data: "User-agent: *\nAllow: /" });
+      } else if (url.includes("sitemap.xml")) {
+        return Promise.resolve({ data: "sitemap content" }); // You would normally parse this to URLs
      }
-      return Promise.resolve({ data: '<html></html>' });
+      return Promise.resolve({ data: "<html></html>" });
    });

    mockRobotsParser.mockReturnValue({
@@ -30,42 +32,45 @@ describe('WebCrawler', () => {
      getMatchingLineNumber: jest.fn().mockReturnValue(0),
      getCrawlDelay: jest.fn().mockReturnValue(0),
      getSitemaps: jest.fn().mockReturnValue([]),
-      getPreferredHost: jest.fn().mockReturnValue('example.com')
+      getPreferredHost: jest.fn().mockReturnValue("example.com")
    });
  });

-  it('should respect the limit parameter by not returning more links than specified', async () => {
-    const initialUrl = 'http://example.com';
-    const limit = 2;  // Set a limit for the number of links
+  it("should respect the limit parameter by not returning more links than specified", async () => {
+    const initialUrl = "http://example.com";
+    const limit = 2; // Set a limit for the number of links

    crawler = new WebCrawler({
      jobId: "TEST",
      initialUrl: initialUrl,
      includes: [],
      excludes: [],
-      limit: limit,  // Apply the limit
+      limit: limit, // Apply the limit
      maxCrawledDepth: 10
    });

    // Mock sitemap fetching function to return more links than the limit
-    crawler['tryFetchSitemapLinks'] = jest.fn().mockResolvedValue([
-      initialUrl,
-      initialUrl + '/page1',
-      initialUrl + '/page2',
-      initialUrl + '/page3'
-    ]);
+    crawler["tryFetchSitemapLinks"] = jest
+      .fn()
+      .mockResolvedValue([
+        initialUrl,
+        initialUrl + "/page1",
+        initialUrl + "/page2",
+        initialUrl + "/page3"
+      ]);

-    const filteredLinks = crawler['filterLinks'](
-      [initialUrl, initialUrl + '/page1', initialUrl + '/page2', initialUrl + '/page3'],
+    const filteredLinks = crawler["filterLinks"](
+      [
+        initialUrl,
+        initialUrl + "/page1",
+        initialUrl + "/page2",
+        initialUrl + "/page3"
+      ],
      limit,
      10
    );

-    expect(filteredLinks.length).toBe(limit);  // Check if the number of results respects the limit
-    expect(filteredLinks).toEqual([
-      initialUrl,
-      initialUrl + '/page1'
-    ]);
+    expect(filteredLinks.length).toBe(limit); // Check if the number of results respects the limit
+    expect(filteredLinks).toEqual([initialUrl, initialUrl + "/page1"]);
  });
 });
-