feat(crawl): includes/excludes fixes (FIR-1300) (#1303)
* feat(crawl): includes/excludes fixes pt. 1 * fix(snips): billing tests * drop tha logs * fix(ci): add replica url * feat(crawl): drop initial scrape if it's not included * feat(ci): more verbose logging * fix crawl path in test * fix(ci): wait for api * fix(snips/scrape/ad): test for more pixels * feat(js-sdk/crawl): add regexOnFullURL
This commit is contained in:
@@ -7,4 +7,50 @@ describe("Crawl tests", () => {
|
||||
limit: 10,
|
||||
});
|
||||
}, 120000);
|
||||
|
||||
it.concurrent("filters URLs properly", async () => {
|
||||
const res = await crawl({
|
||||
url: "https://firecrawl.dev/pricing",
|
||||
includePaths: ["^/pricing$"],
|
||||
limit: 10,
|
||||
});
|
||||
|
||||
expect(res.success).toBe(true);
|
||||
if (res.success) {
|
||||
expect(res.completed).toBe(1);
|
||||
expect(res.data[0].metadata.sourceURL).toBe("https://firecrawl.dev/pricing");
|
||||
}
|
||||
}, 120000);
|
||||
|
||||
it.concurrent("filters URLs properly when using regexOnFullURL", async () => {
|
||||
const res = await crawl({
|
||||
url: "https://firecrawl.dev/pricing",
|
||||
includePaths: ["^https://(www\\.)?firecrawl\\.dev/pricing$"],
|
||||
regexOnFullURL: true,
|
||||
limit: 10,
|
||||
});
|
||||
|
||||
expect(res.success).toBe(true);
|
||||
if (res.success) {
|
||||
expect(res.completed).toBe(1);
|
||||
expect(res.data[0].metadata.sourceURL).toBe("https://firecrawl.dev/pricing");
|
||||
}
|
||||
}, 120000);
|
||||
|
||||
it.concurrent("discovers URLs properly when origin is not included", async () => {
|
||||
const res = await crawl({
|
||||
url: "https://firecrawl.dev",
|
||||
includePaths: ["^/blog"],
|
||||
ignoreSitemap: true,
|
||||
limit: 10,
|
||||
});
|
||||
|
||||
expect(res.success).toBe(true);
|
||||
if (res.success) {
|
||||
expect(res.data.length).toBeGreaterThan(1);
|
||||
for (const page of res.data) {
|
||||
expect(page.metadata.url ?? page.metadata.sourceURL).toMatch(/^https:\/\/(www\.)?firecrawl\.dev\/blog/);
|
||||
}
|
||||
}
|
||||
}, 120000);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user