Nick: fixed prettier
This commit is contained in:
@@ -1,27 +1,29 @@
|
||||
// crawler.test.ts
|
||||
import { WebCrawler } from '../crawler';
|
||||
import axios from 'axios';
|
||||
import robotsParser from 'robots-parser';
|
||||
import { WebCrawler } from "../crawler";
|
||||
import axios from "axios";
|
||||
import robotsParser from "robots-parser";
|
||||
|
||||
jest.mock('axios');
|
||||
jest.mock('robots-parser');
|
||||
jest.mock("axios");
|
||||
jest.mock("robots-parser");
|
||||
|
||||
describe('WebCrawler', () => {
|
||||
describe("WebCrawler", () => {
|
||||
let crawler: WebCrawler;
|
||||
const mockAxios = axios as jest.Mocked<typeof axios>;
|
||||
const mockRobotsParser = robotsParser as jest.MockedFunction<typeof robotsParser>;
|
||||
const mockRobotsParser = robotsParser as jest.MockedFunction<
|
||||
typeof robotsParser
|
||||
>;
|
||||
|
||||
let maxCrawledDepth: number;
|
||||
|
||||
beforeEach(() => {
|
||||
// Setup default mocks
|
||||
mockAxios.get.mockImplementation((url) => {
|
||||
if (url.includes('robots.txt')) {
|
||||
return Promise.resolve({ data: 'User-agent: *\nAllow: /' });
|
||||
} else if (url.includes('sitemap.xml')) {
|
||||
return Promise.resolve({ data: 'sitemap content' }); // You would normally parse this to URLs
|
||||
if (url.includes("robots.txt")) {
|
||||
return Promise.resolve({ data: "User-agent: *\nAllow: /" });
|
||||
} else if (url.includes("sitemap.xml")) {
|
||||
return Promise.resolve({ data: "sitemap content" }); // You would normally parse this to URLs
|
||||
}
|
||||
return Promise.resolve({ data: '<html></html>' });
|
||||
return Promise.resolve({ data: "<html></html>" });
|
||||
});
|
||||
|
||||
mockRobotsParser.mockReturnValue({
|
||||
@@ -30,42 +32,45 @@ describe('WebCrawler', () => {
|
||||
getMatchingLineNumber: jest.fn().mockReturnValue(0),
|
||||
getCrawlDelay: jest.fn().mockReturnValue(0),
|
||||
getSitemaps: jest.fn().mockReturnValue([]),
|
||||
getPreferredHost: jest.fn().mockReturnValue('example.com')
|
||||
getPreferredHost: jest.fn().mockReturnValue("example.com")
|
||||
});
|
||||
});
|
||||
|
||||
it('should respect the limit parameter by not returning more links than specified', async () => {
|
||||
const initialUrl = 'http://example.com';
|
||||
const limit = 2; // Set a limit for the number of links
|
||||
it("should respect the limit parameter by not returning more links than specified", async () => {
|
||||
const initialUrl = "http://example.com";
|
||||
const limit = 2; // Set a limit for the number of links
|
||||
|
||||
crawler = new WebCrawler({
|
||||
jobId: "TEST",
|
||||
initialUrl: initialUrl,
|
||||
includes: [],
|
||||
excludes: [],
|
||||
limit: limit, // Apply the limit
|
||||
limit: limit, // Apply the limit
|
||||
maxCrawledDepth: 10
|
||||
});
|
||||
|
||||
// Mock sitemap fetching function to return more links than the limit
|
||||
crawler['tryFetchSitemapLinks'] = jest.fn().mockResolvedValue([
|
||||
initialUrl,
|
||||
initialUrl + '/page1',
|
||||
initialUrl + '/page2',
|
||||
initialUrl + '/page3'
|
||||
]);
|
||||
crawler["tryFetchSitemapLinks"] = jest
|
||||
.fn()
|
||||
.mockResolvedValue([
|
||||
initialUrl,
|
||||
initialUrl + "/page1",
|
||||
initialUrl + "/page2",
|
||||
initialUrl + "/page3"
|
||||
]);
|
||||
|
||||
const filteredLinks = crawler['filterLinks'](
|
||||
[initialUrl, initialUrl + '/page1', initialUrl + '/page2', initialUrl + '/page3'],
|
||||
const filteredLinks = crawler["filterLinks"](
|
||||
[
|
||||
initialUrl,
|
||||
initialUrl + "/page1",
|
||||
initialUrl + "/page2",
|
||||
initialUrl + "/page3"
|
||||
],
|
||||
limit,
|
||||
10
|
||||
);
|
||||
|
||||
expect(filteredLinks.length).toBe(limit); // Check if the number of results respects the limit
|
||||
expect(filteredLinks).toEqual([
|
||||
initialUrl,
|
||||
initialUrl + '/page1'
|
||||
]);
|
||||
expect(filteredLinks.length).toBe(limit); // Check if the number of results respects the limit
|
||||
expect(filteredLinks).toEqual([initialUrl, initialUrl + "/page1"]);
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user