Files
firecrawl/apps/api/src/lib/canonical-url.test.ts
T

90 lines
2.8 KiB
TypeScript
Raw Normal View History

2025-01-10 18:35:10 -03:00
import { normalizeUrl, normalizeUrlOnlyHostname } from "./canonical-url";
2025-01-03 23:55:05 -03:00
2025-01-10 18:35:10 -03:00
describe("normalizeUrlOnlyHostname", () => {
it("should remove protocol and www from URL", () => {
const url = "https://www.example.com";
const expected = "example.com";
2025-01-03 23:55:05 -03:00
expect(normalizeUrlOnlyHostname(url)).toBe(expected);
});
2025-01-10 18:35:10 -03:00
it("should remove only protocol if www is not present", () => {
const url = "https://example.com";
const expected = "example.com";
2025-01-03 23:55:05 -03:00
expect(normalizeUrlOnlyHostname(url)).toBe(expected);
});
2025-01-10 18:35:10 -03:00
it("should handle URLs without protocol", () => {
const url = "www.example.com";
const expected = "example.com";
2025-01-03 23:55:05 -03:00
expect(normalizeUrlOnlyHostname(url)).toBe(expected);
});
2025-01-10 18:35:10 -03:00
it("should handle URLs without protocol and www", () => {
const url = "example.com";
const expected = "example.com";
2025-01-03 23:55:05 -03:00
expect(normalizeUrlOnlyHostname(url)).toBe(expected);
});
2025-01-10 18:35:10 -03:00
it("should handle URLs with paths", () => {
const url = "https://www.example.com/path/to/resource";
const expected = "example.com";
2025-01-03 23:55:05 -03:00
expect(normalizeUrlOnlyHostname(url)).toBe(expected);
});
2025-01-10 18:35:10 -03:00
it("should handle invalid URLs gracefully", () => {
const url = "not a valid url";
const expected = "not a valid url";
2025-01-03 23:55:05 -03:00
expect(normalizeUrlOnlyHostname(url)).toBe(expected);
});
});
2025-01-10 18:35:10 -03:00
describe("normalizeUrl", () => {
it("should remove protocol and www from URL", () => {
const url = "https://www.example.com";
const expected = "example.com";
2025-01-03 23:16:33 -03:00
expect(normalizeUrl(url)).toBe(expected);
});
2025-01-10 18:35:10 -03:00
it("should remove only protocol if www is not present", () => {
const url = "https://example.com";
const expected = "example.com";
2025-01-03 23:16:33 -03:00
expect(normalizeUrl(url)).toBe(expected);
});
2025-01-10 18:35:10 -03:00
it("should handle URLs without protocol", () => {
const url = "www.example.com";
const expected = "example.com";
2025-01-03 23:16:33 -03:00
expect(normalizeUrl(url)).toBe(expected);
});
2025-01-10 18:35:10 -03:00
it("should handle URLs without protocol and www", () => {
const url = "example.com";
const expected = "example.com";
2025-01-03 23:16:33 -03:00
expect(normalizeUrl(url)).toBe(expected);
});
2025-01-10 18:35:10 -03:00
it("should handle URLs with paths", () => {
const url = "https://www.example.com/path/to/resource";
const expected = "example.com/path/to/resource";
2025-01-03 23:55:05 -03:00
expect(normalizeUrl(url)).toBe(expected);
});
2025-01-10 18:35:10 -03:00
it("should handle URLs with trailing slash", () => {
const url = "https://www.example.com/";
const expected = "example.com";
2025-01-03 23:16:33 -03:00
expect(normalizeUrl(url)).toBe(expected);
});
2025-01-10 18:35:10 -03:00
it("should handle URLs with trailing slash and path", () => {
const url = "https://www.example.com/path/";
const expected = "example.com/path";
2025-01-03 23:55:05 -03:00
expect(normalizeUrl(url)).toBe(expected);
});
2025-01-10 18:35:10 -03:00
it("should handle invalid URLs gracefully", () => {
const url = "not a valid url";
const expected = "not a valid url";
2025-01-03 23:16:33 -03:00
expect(normalizeUrl(url)).toBe(expected);
});
});