Files
firecrawl/apps/api/src/lib/canonical-url.test.ts
T

92 lines
2.8 KiB
TypeScript
Raw Normal View History

2025-01-03 23:55:05 -03:00
import { normalizeUrl, normalizeUrlOnlyHostname } from './canonical-url';
describe('normalizeUrlOnlyHostname', () => {
it('should remove protocol and www from URL', () => {
const url = 'https://www.example.com';
const expected = 'example.com';
expect(normalizeUrlOnlyHostname(url)).toBe(expected);
});
it('should remove only protocol if www is not present', () => {
const url = 'https://example.com';
const expected = 'example.com';
expect(normalizeUrlOnlyHostname(url)).toBe(expected);
});
it('should handle URLs without protocol', () => {
const url = 'www.example.com';
const expected = 'example.com';
expect(normalizeUrlOnlyHostname(url)).toBe(expected);
});
it('should handle URLs without protocol and www', () => {
const url = 'example.com';
const expected = 'example.com';
expect(normalizeUrlOnlyHostname(url)).toBe(expected);
});
it('should handle URLs with paths', () => {
const url = 'https://www.example.com/path/to/resource';
const expected = 'example.com';
expect(normalizeUrlOnlyHostname(url)).toBe(expected);
});
it('should handle invalid URLs gracefully', () => {
const url = 'not a valid url';
const expected = 'not a valid url';
expect(normalizeUrlOnlyHostname(url)).toBe(expected);
});
});
2025-01-03 23:16:33 -03:00
describe('normalizeUrl', () => {
it('should remove protocol and www from URL', () => {
const url = 'https://www.example.com';
const expected = 'example.com';
expect(normalizeUrl(url)).toBe(expected);
});
it('should remove only protocol if www is not present', () => {
const url = 'https://example.com';
const expected = 'example.com';
expect(normalizeUrl(url)).toBe(expected);
});
it('should handle URLs without protocol', () => {
const url = 'www.example.com';
const expected = 'example.com';
expect(normalizeUrl(url)).toBe(expected);
});
it('should handle URLs without protocol and www', () => {
const url = 'example.com';
const expected = 'example.com';
expect(normalizeUrl(url)).toBe(expected);
});
it('should handle URLs with paths', () => {
const url = 'https://www.example.com/path/to/resource';
2025-01-03 23:55:05 -03:00
const expected = 'example.com/path/to/resource';
expect(normalizeUrl(url)).toBe(expected);
});
it('should handle URLs with trailing slash', () => {
const url = 'https://www.example.com/';
2025-01-03 23:16:33 -03:00
const expected = 'example.com';
expect(normalizeUrl(url)).toBe(expected);
});
2025-01-03 23:55:05 -03:00
it('should handle URLs with trailing slash and path', () => {
const url = 'https://www.example.com/path/';
const expected = 'example.com/path';
expect(normalizeUrl(url)).toBe(expected);
});
2025-01-03 23:16:33 -03:00
it('should handle invalid URLs gracefully', () => {
const url = 'not a valid url';
const expected = 'not a valid url';
expect(normalizeUrl(url)).toBe(expected);
});
});