Files
firecrawl/apps/api/src/scraper/scrapeURL/engines/docx/index.ts
T

16 lines
473 B
TypeScript
Raw Normal View History

2024-11-07 20:57:33 +01:00
import { Meta } from "../..";
import { EngineScrapeResult } from "..";
import { downloadFile } from "../utils/downloadFile";
2024-11-07 20:57:33 +01:00
import mammoth from "mammoth";
export async function scrapeDOCX(meta: Meta): Promise<EngineScrapeResult> {
const { response, tempFilePath } = await downloadFile(meta.id, meta.url);
return {
url: response.url,
statusCode: response.status,
html: (await mammoth.convertToHtml({ path: tempFilePath })).value,
}
}