Files
firecrawl/apps/api/src/lib/__tests__/html-to-markdown.test.ts
T

51 lines
1.7 KiB
TypeScript
Raw Normal View History

2024-12-11 19:46:11 -03:00
import { parseMarkdown } from "../html-to-markdown";
2024-09-02 14:15:56 -03:00
2024-12-11 19:46:11 -03:00
describe("parseMarkdown", () => {
it("should correctly convert simple HTML to Markdown", async () => {
const html = "<p>Hello, world!</p>";
const expectedMarkdown = "Hello, world!";
2024-09-02 14:15:56 -03:00
await expect(parseMarkdown(html)).resolves.toBe(expectedMarkdown);
});
2024-12-11 19:46:11 -03:00
it("should convert complex HTML with nested elements to Markdown", async () => {
const html =
"<div><p>Hello <strong>bold</strong> world!</p><ul><li>List item</li></ul></div>";
const expectedMarkdown = "Hello **bold** world!\n\n- List item";
2024-09-02 14:15:56 -03:00
await expect(parseMarkdown(html)).resolves.toBe(expectedMarkdown);
});
2024-12-11 19:46:11 -03:00
it("should return empty string when input is empty", async () => {
const html = "";
const expectedMarkdown = "";
2024-09-02 14:15:56 -03:00
await expect(parseMarkdown(html)).resolves.toBe(expectedMarkdown);
});
2024-12-11 19:46:11 -03:00
it("should handle null input gracefully", async () => {
2024-09-02 14:15:56 -03:00
const html = null;
2024-12-11 19:46:11 -03:00
const expectedMarkdown = "";
2024-09-02 14:15:56 -03:00
await expect(parseMarkdown(html)).resolves.toBe(expectedMarkdown);
});
2024-12-11 19:46:11 -03:00
it("should handle various types of invalid HTML gracefully", async () => {
2024-09-03 15:21:45 -03:00
const invalidHtmls = [
2024-12-11 19:46:11 -03:00
{ html: "<html><p>Unclosed tag", expected: "Unclosed tag" },
{
html: "<div><span>Missing closing div",
2024-12-11 19:51:08 -03:00
expected: "Missing closing div",
2024-12-11 19:46:11 -03:00
},
{
html: "<p><strong>Wrong nesting</em></strong></p>",
2024-12-11 19:51:08 -03:00
expected: "**Wrong nesting**",
2024-12-11 19:46:11 -03:00
},
{
html: '<a href="http://example.com">Link without closing tag',
2024-12-11 19:51:08 -03:00
expected: "[Link without closing tag](http://example.com)",
},
2024-09-03 15:21:45 -03:00
];
for (const { html, expected } of invalidHtmls) {
await expect(parseMarkdown(html)).resolves.toBe(expected);
}
});
2024-09-02 14:15:56 -03:00
});