This commit is contained in:
Eric Ciarla
2024-08-26 15:16:50 -04:00
parent 5606fe5870
commit 0566e54d85
25 changed files with 5198 additions and 0 deletions
@@ -0,0 +1,211 @@
{
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/v0/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"description": "Crawling options.",
"properties": {
"excludes": {
"description": "URL patterns to exclude.",
"items": {
"type": "string"
},
"type": "array"
},
"includes": {
"description": "URL patterns to include.",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"description": "Maximum pages to crawl.",
"type": "integer"
},
"maxDepth": {
"description": "Maximum crawl depth.",
"type": "integer"
},
"mode": {
"description": "Crawling mode.",
"enum": [
"default",
"fast"
],
"type": "string"
},
"returnOnlyUrls": {
"description": "Return only URLs.",
"type": "boolean"
}
},
"type": "object"
},
"pageOptions": {
"description": "Page scraping options.",
"properties": {
"includeHtml": {
"description": "Include HTML content.",
"type": "boolean"
},
"includeRawHtml": {
"description": "Include raw HTML content.",
"type": "boolean"
},
"onlyMainContent": {
"description": "Only main content.",
"type": "boolean"
},
"screenshot": {
"description": "Include page screenshot.",
"type": "boolean"
},
"waitFor": {
"description": "Wait time in milliseconds.",
"type": "integer"
}
},
"type": "object"
},
"url": {
"description": "Base URL to crawl.",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "Crawl job ID.",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl job initiated."
}
},
"summary": "Crawl multiple pages."
}
},
"/v0/crawl/status/{jobId}": {
"get": {
"parameters": [
{
"description": "Crawl job ID.",
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"description": "Crawl job status."
}
},
"summary": "Check crawl job status."
}
},
"/v0/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"extractorOptions": {
"description": "Data extraction options.",
"properties": {
"extractionPrompt": {
"description": "Prompt for data extraction.",
"type": "string"
},
"extractionSchema": {
"description": "Schema for data extraction.",
"type": "object"
},
"mode": {
"description": "Extraction mode.",
"enum": [
"llm-extraction",
"llm-extraction-from-raw-html"
],
"type": "string"
}
},
"type": "object"
},
"pageOptions": {
"description": "Page scraping options.",
"properties": {
"includeHtml": {
"description": "Include HTML content.",
"type": "boolean"
},
"includeRawHtml": {
"description": "Include raw HTML content.",
"type": "boolean"
},
"onlyMainContent": {
"description": "Only main content.",
"type": "boolean"
},
"screenshot": {
"description": "Include page screenshot.",
"type": "boolean"
},
"waitFor": {
"description": "Wait time in milliseconds.",
"type": "integer"
}
},
"type": "object"
},
"timeout": {
"description": "Timeout in milliseconds.",
"type": "integer"
},
"url": {
"description": "URL to scrape.",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"description": "Successful scraping."
}
},
"summary": "Scrape a single page."
}
}
}
}
@@ -0,0 +1,165 @@
{
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"properties": {
"allowBackwardCrawling": {
"description": "Allow backward crawling",
"type": "boolean"
},
"allowExternalContentLinks": {
"description": "Allow external links",
"type": "boolean"
},
"excludes": {
"description": "URL patterns to exclude",
"items": {
"type": "string"
},
"type": "array"
},
"generateImgAltText": {
"description": "Generate alt text for images",
"type": "boolean"
},
"ignoreSitemap": {
"description": "Ignore website sitemap",
"type": "boolean"
},
"includes": {
"description": "URL patterns to include",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"description": "Maximum pages to crawl",
"type": "integer"
},
"maxDepth": {
"description": "Maximum crawl depth",
"type": "integer"
},
"mode": {
"description": "Crawling mode",
"enum": [
"default",
"fast"
],
"type": "string"
},
"returnOnlyUrls": {
"description": "Return only crawled URLs",
"type": "boolean"
}
},
"type": "object"
},
"pageOptions": {
"properties": {
"fullPageScreenshot": {
"description": "Include full page screenshot",
"type": "boolean"
},
"headers": {
"description": "Headers for requests",
"type": "object"
},
"includeHtml": {
"description": "Include HTML content",
"type": "boolean"
},
"includeRawHtml": {
"description": "Include raw HTML content",
"type": "boolean"
},
"onlyIncludeTags": {
"description": "Include only specific tags",
"items": {
"type": "string"
},
"type": "array"
},
"onlyMainContent": {
"description": "Return only main content",
"type": "boolean"
},
"removeTags": {
"description": "Remove specific tags",
"items": {
"type": "string"
},
"type": "array"
},
"replaceAllPathsWithAbsolutePaths": {
"description": "Use absolute paths",
"type": "boolean"
},
"screenshot": {
"description": "Include page screenshot",
"type": "boolean"
},
"waitFor": {
"description": "Wait for page load (ms)",
"type": "integer"
}
},
"type": "object"
},
"url": {
"description": "Base URL to crawl",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "Job ID of the crawl",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl request successful"
}
},
"security": [
{
"Bearer": []
}
],
"summary": "Crawl a website"
}
}
},
"securitySchemes": {
"Bearer": {
"bearerFormat": "JWT",
"scheme": "bearer",
"type": "http"
}
}
}
@@ -0,0 +1,93 @@
{
"info": {
"title": "Firecrawl API",
"version": "1.0.0"
},
"openapi": "3.0.0",
"paths": {
"/check_crawl_status": {
"post": {
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"current": {
"type": "integer"
},
"data": {
"items": {
"properties": {
"content": {
"type": "string"
},
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
}
},
"type": "object"
},
"provider": {
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"status": {
"type": "string"
},
"total": {
"type": "integer"
}
},
"type": "object"
}
}
},
"description": "Crawl job status"
}
},
"summary": "Check crawl job status"
}
},
"/crawl": {
"post": {
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Job ID"
}
},
"summary": "Crawl URL and subpages"
}
}
}
}
@@ -0,0 +1,131 @@
{
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"example": {
"extractorOptions": {
"extractionPrompt": "Based on the information on the page, extract the information from the schema. ",
"extractionSchema": {
"properties": {
"company_mission": {
"type": "string"
},
"is_in_yc": {
"type": "boolean"
},
"is_open_source": {
"type": "boolean"
},
"supports_sso": {
"type": "boolean"
}
},
"required": [
"company_mission",
"supports_sso",
"is_open_source",
"is_in_yc"
],
"type": "object"
},
"mode": "llm-extraction"
},
"url": "https://docs.firecrawl.dev/"
},
"schema": {
"properties": {
"extractorOptions": {
"properties": {
"extractionPrompt": {
"description": "Prompt for extraction",
"type": "string"
},
"extractionSchema": {
"description": "Schema for data extraction",
"type": "object"
},
"mode": {
"description": "Extraction mode",
"type": "string"
}
},
"type": "object"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"example": {
"data": {
"content": "Raw Content",
"llm_extraction": {
"company_mission": "Train a secure AI on your technical resources that answers customer and employee questions so your team doesn't have to",
"is_in_yc": true,
"is_open_source": false,
"supports_sso": true
},
"metadata": {
"description": "Mendable allows you to easily build AI chat applications. Ingest, customize, then deploy with one line of code anywhere you want. Brought to you by SideGuide",
"ogDescription": "Mendable allows you to easily build AI chat applications. Ingest, customize, then deploy with one line of code anywhere you want. Brought to you by SideGuide",
"ogImage": "https://docs.firecrawl.dev/mendable_new_og1.png",
"ogLocaleAlternate": [],
"ogSiteName": "Mendable",
"ogTitle": "Mendable",
"ogUrl": "https://docs.firecrawl.dev/",
"robots": "follow, index",
"sourceURL": "https://docs.firecrawl.dev/",
"title": "Mendable"
}
},
"success": true
},
"schema": {
"properties": {
"data": {
"properties": {
"content": {
"type": "string"
},
"llm_extraction": {
"type": "object"
},
"metadata": {
"type": "object"
}
},
"type": "object"
},
"success": {
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Successful scrape"
}
},
"summary": "Extract data from pages."
}
}
}
}
@@ -0,0 +1,87 @@
{
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/search": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"pageOptions": {
"properties": {
"fetchPageContent": {
"type": "boolean"
}
},
"type": "object"
},
"query": {
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"items": {
"properties": {
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
}
},
"type": "object"
},
"provider": {
"type": "string"
},
"url": {
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"success": {
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Successful search and scrape."
}
},
"summary": "Search web, scrape, return markdown."
}
}
}
}
@@ -0,0 +1,83 @@
{
"info": {
"title": "Firecrawl API",
"version": "1.0.0"
},
"openapi": "3.0.0",
"paths": {
"/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"url": {
"description": "Website URL to crawl.",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"items": {
"properties": {
"markdown": {
"description": "Markdown content.",
"type": "string"
}
},
"type": "object"
},
"type": "array"
}
}
},
"description": "Website crawled successfully."
}
},
"summary": "Crawl a website."
}
},
"/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"url": {
"description": "Page URL to scrape.",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"text/plain": {
"schema": {
"description": "Scraped content.",
"type": "string"
}
}
},
"description": "Page scraped successfully."
}
},
"summary": "Scrape a single page."
}
}
}
}
@@ -0,0 +1,200 @@
{
"info": {
"title": "Firecrawl API",
"version": "1.0.0"
},
"openapi": "3.0.0",
"paths": {
"/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawler_options": {
"properties": {
"exclude": {
"description": "URL patterns to exclude",
"items": {
"type": "string"
},
"type": "array"
},
"generateImgAltText": {
"description": "Generate alt text for images",
"type": "boolean"
},
"includes": {
"description": "URL patterns to include",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"description": "Max pages to crawl",
"type": "integer"
},
"maxDepth": {
"description": "Maximum crawl depth",
"type": "integer"
},
"mode": {
"description": "Crawling mode",
"type": "string"
},
"returnOnlyUrls": {
"description": "Return only URLs",
"type": "boolean"
},
"timeout": {
"description": "Timeout in milliseconds",
"type": "integer"
}
},
"type": "object"
},
"page_options": {
"properties": {
"includeHtml": {
"description": "Include raw HTML",
"type": "boolean"
},
"onlyMainContent": {
"description": "Only main content",
"type": "boolean"
}
},
"type": "object"
},
"url": {
"description": "Base URL to crawl",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"description": "Crawl successful."
}
},
"summary": "Crawl a website."
}
},
"/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"extractor_options": {
"properties": {
"extractionPrompt": {
"description": "Prompt for extraction",
"type": "string"
},
"extractionSchema": {
"description": "Schema for extraction",
"type": "string"
},
"mode": {
"description": "Extraction mode",
"type": "string"
}
},
"type": "object"
},
"page_options": {
"properties": {
"includeHtml": {
"description": "Include raw HTML",
"type": "boolean"
},
"onlyMainContent": {
"description": "Only main content",
"type": "boolean"
}
},
"type": "object"
},
"timeout": {
"description": "Timeout in milliseconds",
"type": "integer"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"description": "Scrape successful."
}
},
"summary": "Scrape a website."
}
},
"/search": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"page_options": {
"properties": {
"fetchPageContent": {
"description": "Fetch full content",
"type": "boolean"
},
"includeHtml": {
"description": "Include raw HTML",
"type": "boolean"
},
"onlyMainContent": {
"description": "Only main content",
"type": "boolean"
}
},
"type": "object"
},
"query": {
"description": "Search query string",
"type": "string"
},
"search_options": {
"properties": {
"limit": {
"description": "Max results",
"type": "integer"
}
},
"type": "object"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"description": "Search successful."
}
},
"summary": "Search Firecrawl index."
}
}
}
}
@@ -0,0 +1,54 @@
{
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/crawl/cancel/{jobId}": {
"delete": {
"parameters": [
{
"description": "ID of crawl job",
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"status": {
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Returns cancelled."
}
},
"security": [
{
"Bearer": []
}
],
"summary": "Cancel crawl job"
}
}
},
"securitySchemes": {
"Bearer": {
"bearerFormat": "Bearer <token>",
"scheme": "bearer",
"type": "http"
}
}
}
@@ -0,0 +1,166 @@
{
"info": {
"title": "Firecrawl API",
"version": "1.0.0"
},
"openapi": "3.0.0",
"paths": {
"/check-crawl-status/{jobId}": {
"get": {
"parameters": [
{
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"current": {
"description": "Current progress",
"type": "integer"
},
"data": {
"items": {
"properties": {
"content": {
"description": "Raw content",
"type": "string"
},
"markdown": {
"description": "Markdown content",
"type": "string"
},
"metadata": {
"description": "Page metadata",
"type": "object"
},
"provider": {
"description": "Data provider",
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"status": {
"description": "Job status",
"type": "string"
},
"total": {
"description": "Total pages",
"type": "integer"
}
},
"type": "object"
}
}
},
"description": "Crawl job status."
}
},
"summary": "Check crawl job status."
}
},
"/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"description": "Crawler options",
"type": "object"
},
"url": {
"description": "URL to crawl",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "Job ID",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl job submitted."
}
},
"summary": "Crawl a URL."
}
},
"/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"extractorOptions": {
"description": "Extractor options",
"type": "object"
},
"pageOptions": {
"description": "Page options",
"type": "object"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"description": "Scraped data",
"type": "object"
},
"success": {
"description": "Success flag",
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Scraped data."
}
},
"summary": "Scrape a single URL."
}
}
}
}
@@ -0,0 +1,229 @@
{
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/v0/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"properties": {
"excludes": {
"description": "Paths to exclude",
"items": {
"type": "string"
},
"type": "array"
},
"includes": {
"description": "Paths to include",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"description": "Maximum pages to crawl",
"type": "integer"
},
"maxDepth": {
"description": "Maximum crawl depth",
"type": "integer"
},
"returnOnlyUrls": {
"description": "Only return URLs",
"type": "boolean"
}
},
"type": "object"
},
"pageOptions": {
"properties": {
"onlyMainContent": {
"description": "Extract main content",
"type": "boolean"
}
},
"type": "object"
},
"url": {
"description": "URL to crawl",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "Job ID",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl job created"
}
},
"summary": "Crawl a website"
}
},
"/v0/crawl/status/{jobId}": {
"get": {
"parameters": [
{
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"current": {
"type": "integer"
},
"data": {
"items": {
"properties": {
"url": {
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"status": {
"description": "Job status",
"type": "string"
},
"total": {
"type": "integer"
}
},
"type": "object"
}
}
},
"description": "Crawl job status"
}
},
"summary": "Get crawl job status"
}
},
"/v0/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"pageOptions": {
"properties": {
"onlyMainContent": {
"description": "Extract main content",
"type": "boolean"
}
},
"type": "object"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"properties": {
"content": {
"type": "string"
},
"html": {
"type": "string"
},
"llm_extraction": {
"type": "object"
},
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"pageError": {
"type": "string"
},
"pageStatusCode": {
"type": "integer"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
}
},
"type": "object"
},
"rawHtml": {
"type": "string"
},
"warning": {
"type": "string"
}
},
"type": "object"
},
"success": {
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Scrape results"
}
},
"summary": "Scrape a webpage"
}
}
}
}
@@ -0,0 +1,115 @@
{
"info": {
"title": "Firecrawl API",
"version": "1.0.0"
},
"openapi": "3.0.0",
"paths": {
"/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"example": {
"extractorOptions": {
"extractionPrompt": "Extract company info.",
"extractionSchema": {
"properties": {
"company_description": {
"type": "string"
},
"company_industry": {
"type": "string"
},
"who_they_serve": {
"type": "string"
}
},
"required": [
"company_description",
"company_industry",
"who_they_serve"
],
"type": "object"
},
"mode": "llm-extraction"
},
"pageOptions": {
"onlyMainContent": true
},
"url": "https://example.com"
},
"schema": {
"properties": {
"extractorOptions": {
"properties": {
"extractionPrompt": {
"description": "Prompt for LLM extraction.",
"type": "string"
},
"extractionSchema": {
"properties": {
"properties": {
"company_description": {
"type": "string"
},
"company_industry": {
"type": "string"
},
"who_they_serve": {
"type": "string"
}
},
"required": [
"company_description",
"company_industry",
"who_they_serve"
],
"type": {
"type": "string"
}
},
"type": "object"
},
"mode": {
"description": "Extraction mode.",
"type": "string"
}
},
"type": "object"
},
"pageOptions": {
"properties": {
"onlyMainContent": {
"type": "boolean"
}
},
"type": "object"
},
"url": {
"description": "URL to scrape.",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"type": "object"
}
}
},
"description": "Successful scrape."
}
},
"summary": "Scrape data from URL."
}
}
}
}
@@ -0,0 +1,185 @@
{
"components": {
"securitySchemes": {
"bearerAuth": {
"scheme": "bearer",
"type": "http"
}
}
},
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/v0/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"extractorOptions": {
"description": "Options for extraction",
"properties": {
"extractionPrompt": {
"description": "Prompt for LLM extraction",
"type": "string"
},
"extractionSchema": {
"description": "Schema for LLM extraction",
"type": "object"
},
"mode": {
"description": "Extraction mode",
"enum": [
"markdown",
"llm-extraction",
"llm-extraction-from-raw-html",
"llm-extraction-from-markdown"
],
"type": "string"
}
},
"type": "object"
},
"pageOptions": {
"properties": {
"fullPageScreenshot": {
"description": "Include full page screenshot",
"type": "boolean"
},
"headers": {
"description": "Headers for request",
"type": "object"
},
"includeHtml": {
"description": "Include HTML content",
"type": "boolean"
},
"includeRawHtml": {
"description": "Include raw HTML content",
"type": "boolean"
},
"onlyIncludeTags": {
"description": "Include only these tags",
"items": {
"type": "string"
},
"type": "array"
},
"onlyMainContent": {
"description": "Only return main content",
"type": "boolean"
},
"removeTags": {
"description": "Remove these tags",
"items": {
"type": "string"
},
"type": "array"
},
"replaceAllPathsWithAbsolutePaths": {
"description": "Replace relative paths",
"type": "boolean"
},
"screenshot": {
"description": "Include screenshot",
"type": "boolean"
},
"waitFor": {
"description": "Wait time in ms",
"type": "integer"
}
},
"type": "object"
},
"timeout": {
"description": "Timeout in ms",
"type": "integer"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"type": "object"
}
}
},
"required": true
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"properties": {
"content": {
"type": "string"
},
"html": {
"type": "string"
},
"llm_extraction": {
"type": "object"
},
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"pageError": {
"type": "string"
},
"pageStatusCode": {
"type": "integer"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
}
},
"type": "object"
},
"rawHtml": {
"type": "string"
},
"warning": {
"type": "string"
}
},
"type": "object"
},
"success": {
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Successful scrape"
}
},
"security": [
{
"bearerAuth": []
}
],
"summary": "Scrape a webpage"
}
}
}
}
@@ -0,0 +1,212 @@
{
"info": {
"title": "Firecrawl API",
"version": "1.0.0"
},
"openapi": "3.0.0",
"paths": {
"/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"description": "Crawl job options",
"properties": {
"excludes": {
"description": "Pages to exclude",
"items": {
"type": "string"
},
"type": "array"
},
"includes": {
"description": "Pages to include",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"description": "Max pages to crawl",
"type": "integer"
}
},
"type": "object"
},
"pageOptions": {
"description": "Page scraping options",
"properties": {
"onlyMainContent": {
"description": "Only scrape main content",
"type": "boolean"
}
},
"type": "object"
},
"url": {
"description": "URL to crawl",
"type": "string"
}
},
"required": [
"url"
],
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"description": "Crawl job result",
"type": "object"
}
}
},
"description": "Crawl job result"
}
},
"summary": "Crawl a website"
}
},
"/crawl/{jobId}/cancel": {
"post": {
"parameters": [
{
"description": "Crawl job ID",
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"description": "Cancellation status",
"type": "object"
}
}
},
"description": "Cancellation status"
}
},
"summary": "Cancel crawl job"
}
},
"/crawl/{jobId}/status": {
"get": {
"parameters": [
{
"description": "Crawl job ID",
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"description": "Crawl status",
"type": "object"
}
}
},
"description": "Crawl status"
}
},
"summary": "Check crawl status"
}
},
"/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"extractorOptions": {
"description": "LLM extraction options",
"properties": {
"extractionSchema": {
"description": "JSON schema for extraction",
"type": "object"
}
},
"type": "object"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"required": [
"url"
],
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"description": "Scraped data",
"type": "object"
}
}
},
"description": "Scraped data"
}
},
"summary": "Scrape a single URL"
}
},
"/search": {
"get": {
"parameters": [
{
"description": "Search query",
"in": "query",
"name": "query",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"description": "Search results",
"type": "object"
}
}
},
"description": "Search results"
}
},
"summary": "Search and scrape"
}
}
}
}
@@ -0,0 +1,199 @@
{
"info": {
"title": "Firecrawl API",
"version": "1.0.0"
},
"openapi": "3.0.0",
"paths": {
"/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"properties": {
"excludes": {
"description": "Paths to exclude",
"items": {
"type": "string"
},
"type": "array"
},
"includes": {
"description": "Paths to include",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"description": "Maximum pages to crawl",
"type": "integer"
}
},
"type": "object"
},
"pageOptions": {
"properties": {
"onlyMainContent": {
"description": "Extract only main content",
"type": "boolean"
}
},
"type": "object"
},
"url": {
"description": "Starting URL for crawl",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "Unique job identifier",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl job started"
}
},
"summary": "Crawl a website"
}
},
"/crawl/{jobId}/status": {
"get": {
"parameters": [
{
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"status": {
"description": "Current job status",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl job status"
}
},
"summary": "Check crawl status"
}
},
"/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"extractorOptions": {
"properties": {
"extractionSchema": {
"description": "Zod schema for extraction",
"type": "object"
}
},
"type": "object"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"description": "Extracted data",
"type": "object"
}
},
"type": "object"
}
}
},
"description": "Scraped data"
}
},
"summary": "Scrape a single URL"
}
},
"/search": {
"get": {
"parameters": [
{
"in": "query",
"name": "query",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"items": {
"properties": {
"content": {
"description": "Page content (optional)",
"type": "string"
},
"url": {
"description": "Result URL",
"type": "string"
}
},
"type": "object"
},
"type": "array"
}
}
},
"description": "Search results"
}
},
"summary": "Search for a query"
}
}
}
}
@@ -0,0 +1,202 @@
{
"info": {
"title": "Firecrawl API",
"version": "1.0.0"
},
"openapi": "3.0.0",
"paths": {
"/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"description": "Options for crawling",
"properties": {
"excludes": {
"description": "URLs to exclude",
"items": {
"type": "string"
},
"type": "array"
},
"includes": {
"description": "URLs to include",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"description": "Maximum pages to crawl",
"type": "integer"
}
},
"type": "object"
},
"pageOptions": {
"description": "Options for page content",
"properties": {
"onlyMainContent": {
"description": "Extract only main content",
"type": "boolean"
}
},
"type": "object"
},
"url": {
"description": "URL to crawl",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "Unique crawl job ID",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl job started."
}
},
"summary": "Crawl a website."
}
},
"/crawl/{jobId}": {
"get": {
"parameters": [
{
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"status": {
"description": "Current job status",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl job status."
}
},
"summary": "Check crawl job status."
}
},
"/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"extractorOptions": {
"description": "Options for data extraction",
"properties": {
"extractionSchema": {
"description": "Pydantic schema",
"type": "object"
},
"mode": {
"description": "Extraction mode",
"type": "string"
}
},
"type": "object"
},
"pageOptions": {
"description": "Options for page content",
"properties": {
"onlyMainContent": {
"description": "Extract only main content",
"type": "boolean"
}
},
"type": "object"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"type": "object"
}
}
},
"description": "Scraped data."
}
},
"summary": "Scrape a single URL."
}
},
"/search": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"query": {
"description": "Search query",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"type": "object"
}
}
},
"description": "Search results."
}
},
"summary": "Search the web."
}
}
}
}
@@ -0,0 +1,201 @@
{
"info": {
"title": "Firecrawl API",
"version": "0.1"
},
"openapi": "3.0.0",
"paths": {
"/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"description": "Crawl job options",
"properties": {
"excludes": {
"description": "URLs to exclude",
"items": {
"type": "string"
},
"type": "array"
},
"includes": {
"description": "URLs to include",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"description": "Maximum pages to crawl",
"type": "integer"
}
},
"type": "object"
},
"pageOptions": {
"description": "Page scraping options",
"properties": {
"onlyMainContent": {
"description": "Only scrape main content",
"type": "boolean"
}
},
"type": "object"
},
"url": {
"description": "URL to crawl",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"type": "object"
}
}
},
"description": "Crawl job started"
}
},
"summary": "Crawl a website."
}
},
"/crawl/{job_id}/cancel": {
"post": {
"parameters": [
{
"description": "Crawl job ID",
"in": "path",
"name": "job_id",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"type": "object"
}
}
},
"description": "Cancellation status"
}
},
"summary": "Cancel crawl job."
}
},
"/crawl/{job_id}/status": {
"get": {
"parameters": [
{
"description": "Crawl job ID",
"in": "path",
"name": "job_id",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"type": "object"
}
}
},
"description": "Crawl status"
}
},
"summary": "Check crawl status."
}
},
"/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"extractorOptions": {
"description": "LLM extraction options",
"properties": {
"extractionSchema": {
"description": "JSON schema for extraction",
"type": "object"
}
},
"type": "object"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"type": "object"
}
}
},
"description": "Scraped data"
}
},
"summary": "Scrape a single URL."
}
},
"/search": {
"get": {
"parameters": [
{
"description": "Search query",
"in": "query",
"name": "query",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"type": "object"
}
}
},
"description": "Search results"
}
},
"summary": "Search and scrape results."
}
}
}
}
@@ -0,0 +1,245 @@
{
"info": {
"title": "Firecrawl API",
"version": "1.0.0"
},
"openapi": "3.0.0",
"paths": {
"/check-crawl-status": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "Crawl job ID",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"current": {
"description": "Current page count",
"type": "integer"
},
"data": {
"description": "Crawl data",
"items": {
"properties": {
"content": {
"description": "Raw content",
"type": "string"
},
"markdown": {
"description": "Markdown content",
"type": "string"
},
"metadata": {
"description": "Page metadata",
"properties": {
"description": {
"description": "Page description",
"type": "string"
},
"language": {
"description": "Page language",
"type": "string"
},
"sourceURL": {
"description": "Page URL",
"type": "string"
},
"title": {
"description": "Page title",
"type": "string"
}
},
"type": "object"
},
"provider": {
"description": "Content provider",
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"status": {
"description": "Crawl status",
"type": "string"
},
"total": {
"description": "Total page count",
"type": "integer"
}
},
"type": "object"
}
}
},
"description": "Crawl job status."
}
},
"summary": "Check crawl job status."
}
},
"/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"description": "Crawler options",
"properties": {
"excludes": {
"description": "URLs to exclude",
"items": {
"type": "string"
},
"type": "array"
}
},
"type": "object"
},
"url": {
"description": "URL to crawl",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "Job ID",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl job submitted."
}
},
"summary": "Crawl a URL."
}
},
"/scrape-url": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"extractorOptions": {
"description": "Extractor options",
"properties": {
"extractionSchema": {
"description": "Extraction schema",
"type": "string"
},
"mode": {
"description": "Extraction mode",
"type": "string"
}
},
"type": "object"
},
"pageOptions": {
"description": "Page options",
"properties": {
"onlyMainContent": {
"description": "Only main content",
"type": "boolean"
}
},
"type": "object"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"description": "Scraped data",
"properties": {
"content": {
"description": "Raw content",
"type": "string"
},
"html": {
"description": "HTML content",
"type": "string"
},
"llm_extraction": {
"description": "LLM extraction results",
"type": "object"
},
"markdown": {
"description": "Markdown content",
"type": "string"
},
"metadata": {
"description": "Page metadata",
"type": "object"
},
"rawHtml": {
"description": "Raw HTML content",
"type": "string"
},
"warning": {
"description": "Warning message",
"type": "string"
}
},
"type": "object"
},
"success": {
"description": "Request success",
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Scraped data."
}
},
"summary": "Scrape a single URL."
}
}
}
}
@@ -0,0 +1,129 @@
{
"components": {
"securitySchemes": {
"Bearer": {
"scheme": "bearer",
"type": "http"
}
}
},
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/search": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"pageOptions": {
"properties": {
"fetchPageContent": {
"description": "Fetch content of each page.",
"type": "boolean"
},
"includeHtml": {
"description": "Include HTML content.",
"type": "boolean"
},
"includeRawHtml": {
"description": "Include raw HTML content.",
"type": "boolean"
},
"onlyMainContent": {
"description": "Only return main content.",
"type": "boolean"
}
},
"type": "object"
},
"query": {
"description": "The query to search for",
"type": "string"
},
"searchOptions": {
"properties": {
"limit": {
"description": "Maximum number of results.",
"type": "integer"
}
},
"type": "object"
}
},
"type": "object"
}
}
},
"required": true
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"items": {
"properties": {
"content": {
"type": "string"
},
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
}
},
"type": "object"
},
"url": {
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"success": {
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Successful search."
}
},
"security": [
{
"Bearer": []
}
],
"summary": "Search the web."
}
}
},
"servers": [
{
"url": "https://api.firecrawl.dev/v0"
}
]
}
@@ -0,0 +1,186 @@
{
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/crawl/status/{jobId}": {
"get": {
"parameters": [
{
"description": "ID of crawl job",
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"current": {
"description": "Current page number",
"type": "integer"
},
"data": {
"description": "Data from the job",
"items": {
"properties": {
"content": {
"type": "string"
},
"html": {
"description": "HTML content",
"nullable": true,
"type": "string"
},
"index": {
"description": "Page number crawled",
"type": "integer"
},
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"nullable": true,
"type": "string"
},
"pageError": {
"description": "Error message of page",
"nullable": true,
"type": "string"
},
"pageStatusCode": {
"description": "Status code of page",
"type": "integer"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
},
"{any other metadata}": {
"type": "string"
}
},
"type": "object"
},
"rawHtml": {
"description": "Raw HTML content",
"nullable": true,
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"partial_data": {
"description": "Partial documents (streaming)",
"items": {
"properties": {
"content": {
"type": "string"
},
"html": {
"description": "HTML content",
"nullable": true,
"type": "string"
},
"index": {
"description": "Page number crawled",
"type": "integer"
},
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"nullable": true,
"type": "string"
},
"pageError": {
"description": "Error message of page",
"nullable": true,
"type": "string"
},
"pageStatusCode": {
"description": "Status code of page",
"type": "integer"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
},
"{any other metadata}": {
"type": "string"
}
},
"type": "object"
},
"rawHtml": {
"description": "Raw HTML content",
"nullable": true,
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"status": {
"description": "Status of the job",
"type": "string"
},
"total": {
"description": "Total number of pages",
"type": "integer"
}
},
"type": "object"
}
}
},
"description": "Successful operation"
}
},
"security": [
{
"Authorization": []
}
],
"summary": "Get crawl job status"
}
}
},
"securitySchemes": {
"Authorization": {
"bearerFormat": "Bearer <token>",
"scheme": "bearer",
"type": "http"
}
},
"servers": [
{
"url": "https://api.firecrawl.dev/v0"
}
]
}
@@ -0,0 +1,86 @@
{
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/v0/search": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"pageOptions": {
"properties": {
"fetchPageContent": {
"description": "Fetch page content",
"type": "boolean"
}
},
"type": "object"
},
"query": {
"description": "Search term",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"items": {
"properties": {
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
}
},
"type": "object"
},
"url": {
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"success": {
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Successful search"
}
},
"summary": "Search and extract content"
}
}
}
}
@@ -0,0 +1,59 @@
{
"info": {
"title": "Firecrawl API",
"version": "v0"
},
"openapi": "3.0.0",
"paths": {
"/test": {
"get": {
"description": "Returns a test message.",
"responses": {
"200": {
"content": {
"text/plain": {
"schema": {
"example": "Hello, world!",
"type": "string"
}
}
},
"description": "Successful operation"
}
},
"summary": "Test endpoint"
}
},
"/v0/crawl": {
"post": {
"description": "Processes crawl job for URL.",
"requestBody": {
"content": {
"application/json": {
"example": {
"url": "https://docs.firecrawl.dev"
},
"schema": {
"properties": {
"url": {
"description": "Website URL",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "URL to crawl",
"required": true
},
"responses": {
"200": {
"description": "Crawl initiated."
}
},
"summary": "Crawl a given URL."
}
}
}
}
@@ -0,0 +1,738 @@
{
"components": {
"schemas": {}
},
"info": {
"title": "https://docs.firecrawl.dev API Specification",
"version": "1.0.0"
},
"openapi": "3.0.0",
"paths": {
"/check_crawl_status": {
"post": {
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"current": {
"type": "integer"
},
"data": {
"items": {
"properties": {
"content": {
"type": "string"
},
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
}
},
"type": "object"
},
"provider": {
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"status": {
"type": "string"
},
"total": {
"type": "integer"
}
},
"type": "object"
}
}
},
"description": "Crawl job status"
}
},
"summary": "Check crawl job status"
}
},
"/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"properties": {
"allowBackwardCrawling": {
"description": "Allow backward crawling",
"type": "boolean"
},
"allowExternalContentLinks": {
"description": "Allow external links",
"type": "boolean"
},
"excludes": {
"description": "URL patterns to exclude",
"items": {
"type": "string"
},
"type": "array"
},
"generateImgAltText": {
"description": "Generate alt text for images",
"type": "boolean"
},
"ignoreSitemap": {
"description": "Ignore website sitemap",
"type": "boolean"
},
"includes": {
"description": "URL patterns to include",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"description": "Maximum pages to crawl",
"type": "integer"
},
"maxDepth": {
"description": "Maximum crawl depth",
"type": "integer"
},
"mode": {
"description": "Crawling mode",
"enum": [
"default",
"fast"
],
"type": "string"
},
"returnOnlyUrls": {
"description": "Return only crawled URLs",
"type": "boolean"
}
},
"type": "object"
},
"pageOptions": {
"properties": {
"fullPageScreenshot": {
"description": "Include full page screenshot",
"type": "boolean"
},
"headers": {
"description": "Headers for requests",
"type": "object"
},
"includeHtml": {
"description": "Include HTML content",
"type": "boolean"
},
"includeRawHtml": {
"description": "Include raw HTML content",
"type": "boolean"
},
"onlyIncludeTags": {
"description": "Include only specific tags",
"items": {
"type": "string"
},
"type": "array"
},
"onlyMainContent": {
"description": "Return only main content",
"type": "boolean"
},
"removeTags": {
"description": "Remove specific tags",
"items": {
"type": "string"
},
"type": "array"
},
"replaceAllPathsWithAbsolutePaths": {
"description": "Use absolute paths",
"type": "boolean"
},
"screenshot": {
"description": "Include page screenshot",
"type": "boolean"
},
"waitFor": {
"description": "Wait for page load (ms)",
"type": "integer"
}
},
"type": "object"
},
"url": {
"description": "Base URL to crawl",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "Job ID of the crawl",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl request successful"
}
},
"security": [
{
"Bearer": []
}
],
"summary": "Crawl a website"
}
},
"/crawl/cancel/{jobId}": {
"delete": {
"parameters": [
{
"description": "ID of crawl job",
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"status": {
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Returns cancelled."
}
},
"security": [
{
"Bearer": []
}
],
"summary": "Cancel crawl job"
}
},
"/search": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"pageOptions": {
"properties": {
"fetchPageContent": {
"description": "Fetch content of each page.",
"type": "boolean"
},
"includeHtml": {
"description": "Include HTML content.",
"type": "boolean"
},
"includeRawHtml": {
"description": "Include raw HTML content.",
"type": "boolean"
},
"onlyMainContent": {
"description": "Only return main content.",
"type": "boolean"
}
},
"type": "object"
},
"query": {
"description": "The query to search for",
"type": "string"
},
"searchOptions": {
"properties": {
"limit": {
"description": "Maximum number of results.",
"type": "integer"
}
},
"type": "object"
}
},
"type": "object"
}
}
},
"required": true
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"items": {
"properties": {
"content": {
"type": "string"
},
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
}
},
"type": "object"
},
"url": {
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"success": {
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Successful search."
}
},
"security": [
{
"Bearer": []
}
],
"summary": "Search the web."
}
},
"/v0/crawl": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"crawlerOptions": {
"description": "Crawling options.",
"properties": {
"excludes": {
"description": "URL patterns to exclude.",
"items": {
"type": "string"
},
"type": "array"
},
"includes": {
"description": "URL patterns to include.",
"items": {
"type": "string"
},
"type": "array"
},
"limit": {
"description": "Maximum pages to crawl.",
"type": "integer"
},
"maxDepth": {
"description": "Maximum crawl depth.",
"type": "integer"
},
"mode": {
"description": "Crawling mode.",
"enum": [
"default",
"fast"
],
"type": "string"
},
"returnOnlyUrls": {
"description": "Return only URLs.",
"type": "boolean"
}
},
"type": "object"
},
"pageOptions": {
"description": "Page scraping options.",
"properties": {
"includeHtml": {
"description": "Include HTML content.",
"type": "boolean"
},
"includeRawHtml": {
"description": "Include raw HTML content.",
"type": "boolean"
},
"onlyMainContent": {
"description": "Only main content.",
"type": "boolean"
},
"screenshot": {
"description": "Include page screenshot.",
"type": "boolean"
},
"waitFor": {
"description": "Wait time in milliseconds.",
"type": "integer"
}
},
"type": "object"
},
"url": {
"description": "Base URL to crawl.",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"jobId": {
"description": "Crawl job ID.",
"type": "string"
}
},
"type": "object"
}
}
},
"description": "Crawl job initiated."
}
},
"summary": "Crawl multiple pages."
}
},
"/v0/crawl/status/{jobId}": {
"get": {
"parameters": [
{
"description": "Crawl job ID.",
"in": "path",
"name": "jobId",
"required": true,
"schema": {
"type": "string"
}
}
],
"responses": {
"200": {
"description": "Crawl job status."
}
},
"summary": "Check crawl job status."
}
},
"/v0/scrape": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"extractorOptions": {
"description": "Options for extraction",
"properties": {
"extractionPrompt": {
"description": "Prompt for LLM extraction",
"type": "string"
},
"extractionSchema": {
"description": "Schema for LLM extraction",
"type": "object"
},
"mode": {
"description": "Extraction mode",
"enum": [
"markdown",
"llm-extraction",
"llm-extraction-from-raw-html",
"llm-extraction-from-markdown"
],
"type": "string"
}
},
"type": "object"
},
"pageOptions": {
"properties": {
"fullPageScreenshot": {
"description": "Include full page screenshot",
"type": "boolean"
},
"headers": {
"description": "Headers for request",
"type": "object"
},
"includeHtml": {
"description": "Include HTML content",
"type": "boolean"
},
"includeRawHtml": {
"description": "Include raw HTML content",
"type": "boolean"
},
"onlyIncludeTags": {
"description": "Include only these tags",
"items": {
"type": "string"
},
"type": "array"
},
"onlyMainContent": {
"description": "Only return main content",
"type": "boolean"
},
"removeTags": {
"description": "Remove these tags",
"items": {
"type": "string"
},
"type": "array"
},
"replaceAllPathsWithAbsolutePaths": {
"description": "Replace relative paths",
"type": "boolean"
},
"screenshot": {
"description": "Include screenshot",
"type": "boolean"
},
"waitFor": {
"description": "Wait time in ms",
"type": "integer"
}
},
"type": "object"
},
"timeout": {
"description": "Timeout in ms",
"type": "integer"
},
"url": {
"description": "URL to scrape",
"type": "string"
}
},
"type": "object"
}
}
},
"required": true
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"properties": {
"content": {
"type": "string"
},
"html": {
"type": "string"
},
"llm_extraction": {
"type": "object"
},
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"pageError": {
"type": "string"
},
"pageStatusCode": {
"type": "integer"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
}
},
"type": "object"
},
"rawHtml": {
"type": "string"
},
"warning": {
"type": "string"
}
},
"type": "object"
},
"success": {
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Successful scrape"
}
},
"security": [
{
"bearerAuth": []
}
],
"summary": "Scrape a webpage"
}
},
"/v0/search": {
"post": {
"requestBody": {
"content": {
"application/json": {
"schema": {
"properties": {
"pageOptions": {
"properties": {
"fetchPageContent": {
"description": "Fetch page content",
"type": "boolean"
}
},
"type": "object"
},
"query": {
"description": "Search term",
"type": "string"
}
},
"type": "object"
}
}
}
},
"responses": {
"200": {
"content": {
"application/json": {
"schema": {
"properties": {
"data": {
"items": {
"properties": {
"markdown": {
"type": "string"
},
"metadata": {
"properties": {
"description": {
"type": "string"
},
"language": {
"type": "string"
},
"sourceURL": {
"type": "string"
},
"title": {
"type": "string"
}
},
"type": "object"
},
"url": {
"type": "string"
}
},
"type": "object"
},
"type": "array"
},
"success": {
"type": "boolean"
}
},
"type": "object"
}
}
},
"description": "Successful search"
}
},
"summary": "Search and extract content"
}
}
}
}