diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index bb47b47f..bbc1e098 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,7 +1,7 @@ --- name: Bug report about: Create a report to help us improve -title: "[BUG]" +title: "[Bug] " labels: bug assignees: '' diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index b01699b7..6760afa8 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -1,7 +1,7 @@ --- name: Feature request about: Suggest an idea for this project -title: "[Feat]" +title: "[Feat] " labels: '' assignees: '' diff --git a/.github/ISSUE_TEMPLATE/self_host_issue.md b/.github/ISSUE_TEMPLATE/self_host_issue.md new file mode 100644 index 00000000..73a0ef9d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/self_host_issue.md @@ -0,0 +1,40 @@ +--- +name: Self-host issue +about: Report an issue with self-hosting Firecrawl +title: "[Self-Host] " +labels: self-host +assignees: '' + +--- + +**Describe the Issue** +Provide a clear and concise description of the self-hosting issue you're experiencing. + +**To Reproduce** +Steps to reproduce the issue: +1. Configure the environment or settings with '...' +2. Run the command '...' +3. Observe the error or unexpected output at '...' +4. Log output/error message + +**Expected Behavior** +A clear and concise description of what you expected to happen when self-hosting. + +**Screenshots** +If applicable, add screenshots or copies of the command line output to help explain the self-hosting issue. + +**Environment (please complete the following information):** +- OS: [e.g. macOS, Linux, Windows] +- Firecrawl Version: [e.g. 1.2.3] +- Node.js Version: [e.g. 14.x] +- Docker Version (if applicable): [e.g. 20.10.14] +- Database Type and Version: [e.g. PostgreSQL 13.4] + +**Logs** +If applicable, include detailed logs to help understand the self-hosting problem. + +**Configuration** +Provide relevant parts of your configuration files (with sensitive information redacted). + +**Additional Context** +Add any other context about the self-hosting issue here, such as specific infrastructure details, network setup, or any modifications made to the original Firecrawl setup. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b2e42e4a..8a9a74cc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,7 +28,8 @@ env: HYPERDX_API_KEY: ${{ secrets.HYPERDX_API_KEY }} HDX_NODE_BETA_MODE: 1 FIRE_ENGINE_BETA_URL: ${{ secrets.FIRE_ENGINE_BETA_URL }} - + USE_DB_AUTHENTICATION: ${{ secrets.USE_DB_AUTHENTICATION }} + ENV: ${{ secrets.ENV }} jobs: pre-deploy: diff --git a/.github/workflows/fly-direct.yml b/.github/workflows/fly-direct.yml index 8ec675fa..3ee460e6 100644 --- a/.github/workflows/fly-direct.yml +++ b/.github/workflows/fly-direct.yml @@ -1,7 +1,7 @@ name: Fly Deploy Direct on: schedule: - - cron: '0 */2 * * *' + - cron: '0 * * * *' env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} @@ -22,7 +22,13 @@ env: SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }} SUPABASE_URL: ${{ secrets.SUPABASE_URL }} TEST_API_KEY: ${{ secrets.TEST_API_KEY }} + PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }} + PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }} SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }} + USE_DB_AUTHENTICATION: ${{ secrets.USE_DB_AUTHENTICATION }} + ENV: ${{ secrets.ENV }} jobs: deploy: diff --git a/.github/workflows/fly.yml b/.github/workflows/fly.yml index 9209309f..ed8dade1 100644 --- a/.github/workflows/fly.yml +++ b/.github/workflows/fly.yml @@ -29,9 +29,10 @@ env: CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }} SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }} USE_DB_AUTHENTICATION: ${{ secrets.USE_DB_AUTHENTICATION }} + ENV: ${{ secrets.ENV }} jobs: - pre-deploy-e2e-tests: + pre-deploy: name: Pre-deploy checks runs-on: ubuntu-latest services: @@ -58,197 +59,15 @@ jobs: run: npm run workers & working-directory: ./apps/api id: start_workers - - name: Wait for the application to be ready - run: | - sleep 10 - name: Run E2E tests run: | npm run test:prod working-directory: ./apps/api - pre-deploy-test-suite: - name: Test Suite - needs: pre-deploy-e2e-tests - runs-on: ubuntu-latest - services: - redis: - image: redis - ports: - - 6379:6379 - steps: - - uses: actions/checkout@v3 - - name: Set up Node.js - uses: actions/setup-node@v3 - with: - node-version: "20" - - name: Install pnpm - run: npm install -g pnpm - - name: Install dependencies - run: pnpm install - working-directory: ./apps/api - - name: Start the application - run: npm start & - working-directory: ./apps/api - id: start_app - - name: Start workers - run: npm run workers & - working-directory: ./apps/api - id: start_workers - - name: Install dependencies - run: pnpm install - working-directory: ./apps/test-suite - - name: Run E2E tests - run: | - npm run test:suite - working-directory: ./apps/test-suite - - python-sdk-tests: - name: Python SDK Tests - needs: pre-deploy-e2e-tests - runs-on: ubuntu-latest - services: - redis: - image: redis - ports: - - 6379:6379 - steps: - - uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.x' - - name: Install pnpm - run: npm install -g pnpm - - name: Install dependencies - run: pnpm install - working-directory: ./apps/api - - name: Start the application - run: npm start & - working-directory: ./apps/api - id: start_app - - name: Start workers - run: npm run workers & - working-directory: ./apps/api - id: start_workers - - name: Install Python dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - working-directory: ./apps/python-sdk - - name: Run E2E tests for Python SDK - run: | - pytest firecrawl/__tests__/v1/e2e_withAuth/test.py - working-directory: ./apps/python-sdk - - js-sdk-tests: - name: JavaScript SDK Tests - needs: pre-deploy-e2e-tests - runs-on: ubuntu-latest - services: - redis: - image: redis - ports: - - 6379:6379 - steps: - - uses: actions/checkout@v3 - - name: Set up Node.js - uses: actions/setup-node@v3 - with: - node-version: "20" - - name: Install pnpm - run: npm install -g pnpm - - name: Install dependencies - run: pnpm install - working-directory: ./apps/api - - name: Start the application - run: npm start & - working-directory: ./apps/api - id: start_app - - name: Start workers - run: npm run workers & - working-directory: ./apps/api - id: start_workers - - name: Install dependencies for JavaScript SDK - run: pnpm install - working-directory: ./apps/js-sdk/firecrawl - - name: Run E2E tests for JavaScript SDK - run: npm run test - working-directory: ./apps/js-sdk/firecrawl - - go-sdk-tests: - name: Go SDK Tests - needs: pre-deploy-e2e-tests - runs-on: ubuntu-latest - services: - redis: - image: redis - ports: - - 6379:6379 - steps: - - uses: actions/checkout@v3 - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version-file: "go.mod" - - name: Install pnpm - run: npm install -g pnpm - - name: Install dependencies - run: pnpm install - working-directory: ./apps/api - - name: Start the application - run: npm start & - working-directory: ./apps/api - id: start_app - - name: Start workers - run: npm run workers & - working-directory: ./apps/api - id: start_workers - - name: Install dependencies for Go SDK - run: go mod tidy - working-directory: ./apps/go-sdk - - name: Run tests for Go SDK - run: go test -v ./... -timeout 180s - working-directory: ./apps/go-sdk/firecrawl - - rust-sdk-tests: - name: Rust SDK Tests - needs: pre-deploy-e2e-tests - runs-on: ubuntu-latest - services: - redis: - image: redis - ports: - - 6379:6379 - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - name: Install pnpm - run: npm install -g pnpm - - name: Install dependencies for API - run: pnpm install - working-directory: ./apps/api - - name: Start the application - run: npm start & - working-directory: ./apps/api - id: start_app - - name: Start workers - run: npm run workers & - working-directory: ./apps/api - id: start_workers - - name: Set up Rust - uses: actions/setup-rust@v1 - with: - rust-version: stable - - name: Try the lib build - working-directory: ./apps/rust-sdk - run: cargo build - - name: Run E2E tests for Rust SDK - run: cargo test --test e2e_with_auth - deploy: name: Deploy app + needs: pre-deploy runs-on: ubuntu-latest - needs: [pre-deploy-test-suite, python-sdk-tests, js-sdk-tests, rust-sdk-tests] steps: - uses: actions/checkout@v3 - uses: superfly/flyctl-actions/setup-flyctl@master @@ -259,119 +78,4 @@ jobs: BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }} - build-and-publish-python-sdk: - name: Build and publish Python SDK - runs-on: ubuntu-latest - needs: deploy - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.x' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install setuptools wheel twine build requests packaging - - - name: Run version check script - id: version_check_script - run: | - PYTHON_SDK_VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py python ./apps/python-sdk/firecrawl firecrawl-py) - echo "PYTHON_SDK_VERSION_INCREMENTED=$PYTHON_SDK_VERSION_INCREMENTED" >> $GITHUB_ENV - - - name: Build the package - if: ${{ env.PYTHON_SDK_VERSION_INCREMENTED == 'true' }} - run: | - python -m build - working-directory: ./apps/python-sdk - - - name: Publish to PyPI - if: ${{ env.PYTHON_SDK_VERSION_INCREMENTED == 'true' }} - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - run: | - twine upload dist/* - working-directory: ./apps/python-sdk - - build-and-publish-js-sdk: - name: Build and publish JavaScript SDK - runs-on: ubuntu-latest - needs: deploy - - steps: - - uses: actions/checkout@v3 - - name: Set up Node.js - uses: actions/setup-node@v3 - with: - node-version: '20' - registry-url: 'https://registry.npmjs.org/' - scope: '@mendable' - always-auth: true - - - name: Install pnpm - run: npm install -g pnpm - - - name: Install python for running version check script - run: | - python -m pip install --upgrade pip - pip install setuptools wheel requests packaging - - - name: Install dependencies for JavaScript SDK - run: pnpm install - working-directory: ./apps/js-sdk/firecrawl - - - name: Run version check script - id: version_check_script - run: | - VERSION_INCREMENTED=$(python .github/scripts/check_version_has_incremented.py js ./apps/js-sdk/firecrawl @mendable/firecrawl-js) - echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV - - - name: Build and publish to npm - if: ${{ env.VERSION_INCREMENTED == 'true' }} - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - run: | - npm run build-and-publish - working-directory: ./apps/js-sdk/firecrawl - build-and-publish-rust-sdk: - name: Build and publish Rust SDK - runs-on: ubuntu-latest - needs: deploy - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Set up Rust - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - default: true - profile: minimal - - - name: Install dependencies - run: cargo build --release - - - name: Run version check script - id: version_check_script - run: | - VERSION_INCREMENTED=$(cargo search --limit 1 my_crate_name | grep my_crate_name) - echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV - - - name: Build the package - if: ${{ env.VERSION_INCREMENTED == 'true' }} - run: cargo package - working-directory: ./apps/rust-sdk - - - name: Publish to crates.io - if: ${{ env.VERSION_INCREMENTED == 'true' }} - env: - CARGO_REG_TOKEN: ${{ secrets.CRATES_IO_TOKEN }} - run: cargo publish - working-directory: ./apps/rust-sdk \ No newline at end of file + \ No newline at end of file diff --git a/.gitignore b/.gitignore index 9eb551a9..42be56cf 100644 --- a/.gitignore +++ b/.gitignore @@ -19,5 +19,10 @@ apps/test-suite/load-test-results/test-run-report.json apps/playwright-service-ts/node_modules/ apps/playwright-service-ts/package-lock.json + +/examples/o1_web_crawler/venv *.pyc .rdb + +apps/js-sdk/firecrawl/dist + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d0145a6b..2a843aa8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -103,7 +103,7 @@ This should return the response Hello, world! If youโ€™d like to test the crawl endpoint, you can run this ```curl -curl -X POST http://localhost:3002/v0/crawl \ +curl -X POST http://localhost:3002/v1/crawl \ -H 'Content-Type: application/json' \ -d '{ "url": "https://mendable.ai" diff --git a/README.md b/README.md index 8a82c2ee..ee79b9f0 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,9 @@ # ๐Ÿ”ฅ Firecrawl -Crawl and convert any website into LLM-ready markdown or structured data. Built by [Mendable.ai](https://mendable.ai?ref=gfirecrawl) and the Firecrawl community. Includes powerful scraping, crawling and data extraction capabilities. +Empower your AI apps with clean data from any website. Featuring advanced scraping, crawling, and data extraction capabilities. -_This repository is in its early development stages. We are still merging custom modules in the mono repo. It's not completely yet ready for full self-host deployment, but you can already run it locally._ +_This repository is in development, and weโ€™re still integrating custom modules into the mono repo. It's not fully ready for self-hosted deployment yet, but you can run it locally._ ## What is Firecrawl? @@ -52,9 +52,12 @@ _Pst. hey, you, join our stargazers :)_ We provide an easy to use API with our hosted version. You can find the playground and documentation [here](https://firecrawl.dev/playground). You can also self host the backend if you'd like. -- [x] [API](https://firecrawl.dev/playground) -- [x] [Python SDK](https://github.com/mendableai/firecrawl/tree/main/apps/python-sdk) -- [x] [Node SDK](https://github.com/mendableai/firecrawl/tree/main/apps/js-sdk) +Check out the following resources to get started: +- [x] [API](https://docs.firecrawl.dev/api-reference/introduction) +- [x] [Python SDK](https://docs.firecrawl.dev/sdks/python) +- [x] [Node SDK](https://docs.firecrawl.dev/sdks/node) +- [x] [Go SDK](https://docs.firecrawl.dev/sdks/go) +- [x] [Rust SDK](https://docs.firecrawl.dev/sdks/rust) - [x] [Langchain Integration ๐Ÿฆœ๐Ÿ”—](https://python.langchain.com/docs/integrations/document_loaders/firecrawl/) - [x] [Langchain JS Integration ๐Ÿฆœ๐Ÿ”—](https://js.langchain.com/docs/integrations/document_loaders/web_loaders/firecrawl) - [x] [Llama Index Integration ๐Ÿฆ™](https://docs.llamaindex.ai/en/latest/examples/data_connectors/WebPageDemo/#using-firecrawl-reader) @@ -62,8 +65,12 @@ We provide an easy to use API with our hosted version. You can find the playgrou - [x] [Langflow Integration](https://docs.langflow.org/) - [x] [Crew.ai Integration](https://docs.crewai.com/) - [x] [Flowise AI Integration](https://docs.flowiseai.com/integrations/langchain/document-loaders/firecrawl) +- [x] [Composio Integration](https://composio.dev/tools/firecrawl/all) - [x] [PraisonAI Integration](https://docs.praison.ai/firecrawl/) - [x] [Zapier Integration](https://zapier.com/apps/firecrawl/integrations) +- [x] [Cargo Integration](https://docs.getcargo.io/integration/firecrawl) +- [x] [Pipedream Integration](https://pipedream.com/apps/firecrawl/) +- [x] [Pabbly Integration](https://www.pabbly.com/connect/integrations/firecrawl/) - [ ] Want an SDK or Integration? Let us know by opening an issue. To run locally, refer to guide [here](https://github.com/mendableai/firecrawl/blob/main/SELF_HOST.md). @@ -402,15 +409,12 @@ class TopArticlesSchema(BaseModel): top: List[ArticleSchema] = Field(..., max_items=5, description="Top 5 stories") data = app.scrape_url('https://news.ycombinator.com', { - 'extractorOptions': { - 'extractionSchema': TopArticlesSchema.model_json_schema(), - 'mode': 'llm-extraction' - }, - 'pageOptions':{ - 'onlyMainContent': True + 'formats': ['extract'], + 'extract': { + 'schema': TopArticlesSchema.model_json_schema() } }) -print(data["llm_extraction"]) +print(data["extract"]) ``` ## Using the Node SDK @@ -490,6 +494,17 @@ const scrapeResult = await app.scrapeUrl("https://news.ycombinator.com", { console.log(scrapeResult.data["llm_extraction"]); ``` +## Open Source vs Cloud Offering + +Firecrawl is open source available under the AGPL-3.0 license. + +To deliver the best possible product, we offer a hosted version of Firecrawl alongside our open-source offering. The cloud solution allows us to continuously innovate and maintain a high-quality, sustainable service for all users. + +Firecrawl Cloud is available at [firecrawl.dev](https://firecrawl.dev) and offers a range of features that are not available in the open source version: + +![Open Source vs Cloud Offering](https://raw.githubusercontent.com/mendableai/firecrawl/main/img/open-source-cloud.png) + + ## Contributing We love contributions! Please read our [contributing guide](CONTRIBUTING.md) before submitting a pull request. diff --git a/SELF_HOST.md b/SELF_HOST.md index f631cf18..2fa87776 100644 --- a/SELF_HOST.md +++ b/SELF_HOST.md @@ -106,7 +106,7 @@ You should be able to see the Bull Queue Manager UI on `http://localhost:3002/ad If youโ€™d like to test the crawl endpoint, you can run this: ```bash - curl -X POST http://localhost:3002/v0/crawl \ + curl -X POST http://localhost:3002/v1/crawl \ -H 'Content-Type: application/json' \ -d '{ "url": "https://mendable.ai" diff --git a/apps/api/Dockerfile b/apps/api/Dockerfile index 3ffede0d..527a6dc7 100644 --- a/apps/api/Dockerfile +++ b/apps/api/Dockerfile @@ -17,8 +17,15 @@ RUN pnpm install RUN --mount=type=secret,id=SENTRY_AUTH_TOKEN \ bash -c 'export SENTRY_AUTH_TOKEN="$(cat /run/secrets/SENTRY_AUTH_TOKEN)"; if [ -z $SENTRY_AUTH_TOKEN ]; then pnpm run build:nosentry; else pnpm run build; fi' -# Install packages needed for deployment +# Install Go +FROM golang:1.19 AS go-base +COPY src/lib/go-html-to-md /app/src/lib/go-html-to-md +# Install Go dependencies and build parser lib +RUN cd /app/src/lib/go-html-to-md && \ + go mod tidy && \ + go build -o html-to-markdown.so -buildmode=c-shared html-to-markdown.go && \ + chmod +x html-to-markdown.so FROM base RUN apt-get update -qq && \ @@ -26,10 +33,8 @@ RUN apt-get update -qq && \ rm -rf /var/lib/apt/lists /var/cache/apt/archives COPY --from=prod-deps /app/node_modules /app/node_modules COPY --from=build /app /app - - - +COPY --from=go-base /app/src/lib/go-html-to-md/html-to-markdown.so /app/dist/src/lib/go-html-to-md/html-to-markdown.so # Start the server by default, this can be overwritten at runtime EXPOSE 8080 -ENV PUPPETEER_EXECUTABLE_PATH="/usr/bin/chromium" +ENV PUPPETEER_EXECUTABLE_PATH="/usr/bin/chromium" \ No newline at end of file diff --git a/apps/api/package.json b/apps/api/package.json index bac13e79..dc26b34b 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -86,6 +86,7 @@ "joplin-turndown-plugin-gfm": "^1.0.12", "json-schema-to-zod": "^2.3.0", "keyword-extractor": "^0.0.28", + "koffi": "^2.9.0", "langchain": "^0.2.8", "languagedetect": "^2.0.0", "logsnag": "^1.0.0", diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml index 2762a84c..b8f876a8 100644 --- a/apps/api/pnpm-lock.yaml +++ b/apps/api/pnpm-lock.yaml @@ -122,6 +122,9 @@ importers: keyword-extractor: specifier: ^0.0.28 version: 0.0.28 + koffi: + specifier: ^2.9.0 + version: 2.9.0 langchain: specifier: ^0.2.8 version: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0) @@ -3170,6 +3173,9 @@ packages: resolution: {integrity: sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==} engines: {node: '>=6'} + koffi@2.9.0: + resolution: {integrity: sha512-KCsuJ2gM58n6bNdR2Z7gqsh/3TchxxQFbVgax2/UvAjRTgwNSYAJDx9E3jrkBP4jEDHWRCfE47Y2OG+/fiSvEw==} + langchain@0.2.8: resolution: {integrity: sha512-kb2IOMA71xH8e6EXFg0l4S+QSMC/c796pj1+7mPBkR91HHwoyHZhFRrBaZv4tV+Td+Ba91J2uEDBmySklZLpNQ==} engines: {node: '>=18'} @@ -8492,6 +8498,8 @@ snapshots: kleur@3.0.3: {} + koffi@2.9.0: {} + langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0): dependencies: '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) diff --git a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts index 880d34a1..8aabf748 100644 --- a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts @@ -1,11 +1,11 @@ import request from "supertest"; -import dotenv from "dotenv"; +import { configDotenv } from "dotenv"; import { ScrapeRequest, ScrapeResponseRequestTest, } from "../../controllers/v1/types"; -dotenv.config(); +configDotenv(); const TEST_URL = "http://127.0.0.1:3002"; describe("E2E Tests for v1 API Routes", () => { @@ -22,6 +22,13 @@ describe("E2E Tests for v1 API Routes", () => { const response: ScrapeResponseRequestTest = await request(TEST_URL).get( "/is-production" ); + + console.log('process.env.USE_DB_AUTHENTICATION', process.env.USE_DB_AUTHENTICATION); + console.log('?', process.env.USE_DB_AUTHENTICATION === 'true'); + const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true'; + console.log('!!useDbAuthentication', !!useDbAuthentication); + console.log('!useDbAuthentication', !useDbAuthentication); + expect(response.statusCode).toBe(200); expect(response.body).toHaveProperty("isProduction"); }); diff --git a/apps/api/src/controllers/v0/crawl-cancel.ts b/apps/api/src/controllers/v0/crawl-cancel.ts index bf1c2d0a..efcd454a 100644 --- a/apps/api/src/controllers/v0/crawl-cancel.ts +++ b/apps/api/src/controllers/v0/crawl-cancel.ts @@ -5,6 +5,8 @@ import { supabase_service } from "../../../src/services/supabase"; import { Logger } from "../../../src/lib/logger"; import { getCrawl, saveCrawl } from "../../../src/lib/crawl-redis"; import * as Sentry from "@sentry/node"; +import { configDotenv } from "dotenv"; +configDotenv(); export async function crawlCancelController(req: Request, res: Response) { try { diff --git a/apps/api/src/controllers/v0/crawl-status.ts b/apps/api/src/controllers/v0/crawl-status.ts index b0649cd0..41491f86 100644 --- a/apps/api/src/controllers/v0/crawl-status.ts +++ b/apps/api/src/controllers/v0/crawl-status.ts @@ -4,14 +4,16 @@ import { RateLimiterMode } from "../../../src/types"; import { getScrapeQueue } from "../../../src/services/queue-service"; import { Logger } from "../../../src/lib/logger"; import { getCrawl, getCrawlJobs } from "../../../src/lib/crawl-redis"; -import { supabaseGetJobsById } from "../../../src/lib/supabase-jobs"; +import { supabaseGetJobsByCrawlId } from "../../../src/lib/supabase-jobs"; import * as Sentry from "@sentry/node"; +import { configDotenv } from "dotenv"; +configDotenv(); -export async function getJobs(ids: string[]) { +export async function getJobs(crawlId: string, ids: string[]) { const jobs = (await Promise.all(ids.map(x => getScrapeQueue().getJob(x)))).filter(x => x); if (process.env.USE_DB_AUTHENTICATION === "true") { - const supabaseData = await supabaseGetJobsById(ids); + const supabaseData = await supabaseGetJobsByCrawlId(crawlId); supabaseData.forEach(x => { const job = jobs.find(y => y.id === x.job_id); @@ -50,7 +52,7 @@ export async function crawlStatusController(req: Request, res: Response) { const jobIDs = await getCrawlJobs(req.params.jobId); - const jobs = (await getJobs(jobIDs)).sort((a, b) => a.timestamp - b.timestamp); + const jobs = (await getJobs(req.params.jobId, jobIDs)).sort((a, b) => a.timestamp - b.timestamp); const jobStatuses = await Promise.all(jobs.map(x => x.getState())); const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "active"; diff --git a/apps/api/src/controllers/v0/scrape.ts b/apps/api/src/controllers/v0/scrape.ts index bc91da18..c46ebc62 100644 --- a/apps/api/src/controllers/v0/scrape.ts +++ b/apps/api/src/controllers/v0/scrape.ts @@ -39,7 +39,7 @@ export async function scrapeHelper( returnCode: number; }> { const url = req.body.url; - if (!url) { + if (typeof url !== "string") { return { success: false, error: "Url is required", returnCode: 400 }; } @@ -229,7 +229,7 @@ export async function scrapeController(req: Request, res: Response) { if (result.success) { let creditsToBeBilled = 1; - const creditsPerLLMExtract = 49; + const creditsPerLLMExtract = 4; if (extractorOptions.mode.includes("llm-extraction")) { // creditsToBeBilled = creditsToBeBilled + (creditsPerLLMExtract * filteredDocs.length); diff --git a/apps/api/src/controllers/v0/status.ts b/apps/api/src/controllers/v0/status.ts index 34ebb3c6..bf8d2834 100644 --- a/apps/api/src/controllers/v0/status.ts +++ b/apps/api/src/controllers/v0/status.ts @@ -22,7 +22,7 @@ export async function crawlJobStatusPreviewController(req: Request, res: Respons // } // } - const jobs = (await getJobs(jobIDs)).sort((a, b) => a.timestamp - b.timestamp); + const jobs = (await getJobs(req.params.jobId, jobIDs)).sort((a, b) => a.timestamp - b.timestamp); const jobStatuses = await Promise.all(jobs.map(x => x.getState())); const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "active"; diff --git a/apps/api/src/controllers/v1/crawl-cancel.ts b/apps/api/src/controllers/v1/crawl-cancel.ts index 06a5b26e..21fc7cf9 100644 --- a/apps/api/src/controllers/v1/crawl-cancel.ts +++ b/apps/api/src/controllers/v1/crawl-cancel.ts @@ -5,6 +5,8 @@ import { supabase_service } from "../../services/supabase"; import { Logger } from "../../lib/logger"; import { getCrawl, saveCrawl } from "../../lib/crawl-redis"; import * as Sentry from "@sentry/node"; +import { configDotenv } from "dotenv"; +configDotenv(); export async function crawlCancelController(req: Request, res: Response) { try { diff --git a/apps/api/src/controllers/v1/crawl-status-ws.ts b/apps/api/src/controllers/v1/crawl-status-ws.ts index 8d823096..16a67682 100644 --- a/apps/api/src/controllers/v1/crawl-status-ws.ts +++ b/apps/api/src/controllers/v1/crawl-status-ws.ts @@ -103,6 +103,7 @@ async function crawlStatusWS(ws: WebSocket, req: RequestWithAuth x.returnvalue); - const nextURL = new URL(`${req.protocol}://${req.get("host")}/v1/crawl/${req.params.jobId}`); + const protocol = process.env.ENV === "local" ? req.protocol : "https"; + const nextURL = new URL(`${protocol}://${req.get("host")}/v1/crawl/${req.params.jobId}`); nextURL.searchParams.set("skip", (start + data.length).toString()); @@ -111,6 +114,7 @@ export async function crawlStatusController(req: RequestWithAuth, @@ -30,8 +37,7 @@ export async function mapController( req.body = mapRequestSchema.parse(req.body); - - const limit : number = req.body.limit ?? 5000; + const limit: number = req.body.limit ?? MAX_MAP_LIMIT; const id = uuidv4(); let links: string[] = [req.body.url]; @@ -47,24 +53,61 @@ export async function mapController( const crawler = crawlToCrawler(id, sc); - const sitemap = req.body.ignoreSitemap ? null : await crawler.tryGetSitemap(); - - if (sitemap !== null) { - sitemap.map((x) => { - links.push(x.url); - }); - } - let urlWithoutWww = req.body.url.replace("www.", ""); let mapUrl = req.body.search ? `"${req.body.search}" site:${urlWithoutWww}` : `site:${req.body.url}`; - // www. seems to exclude subdomains in some cases - const mapResults = await fireEngineMap(mapUrl, { - // limit to 50 results (beta) - numResults: Math.min(limit, 50), - }); + + const resultsPerPage = 100; + const maxPages = Math.ceil(Math.min(MAX_FIRE_ENGINE_RESULTS, limit) / resultsPerPage); + + const cacheKey = `fireEngineMap:${mapUrl}`; + const cachedResult = await redis.get(cacheKey); + + let allResults: any[]; + let pagePromises: Promise[]; + + if (cachedResult) { + allResults = JSON.parse(cachedResult); + } else { + const fetchPage = async (page: number) => { + return fireEngineMap(mapUrl, { + numResults: resultsPerPage, + page: page, + }); + }; + + pagePromises = Array.from({ length: maxPages }, (_, i) => fetchPage(i + 1)); + allResults = await Promise.all(pagePromises); + + await redis.set(cacheKey, JSON.stringify(allResults), "EX", 24 * 60 * 60); // Cache for 24 hours + } + + // Parallelize sitemap fetch with serper search + const [sitemap, ...searchResults] = await Promise.all([ + req.body.ignoreSitemap ? null : crawler.tryGetSitemap(), + ...(cachedResult ? [] : pagePromises), + ]); + + if (!cachedResult) { + allResults = searchResults; + } + + if (sitemap !== null) { + sitemap.forEach((x) => { + links.push(x.url); + }); + } + + let mapResults = allResults + .flat() + .filter((result) => result !== null && result !== undefined); + + const minumumCutoff = Math.min(MAX_MAP_LIMIT, limit); + if (mapResults.length > minumumCutoff) { + mapResults = mapResults.slice(0, minumumCutoff); + } if (mapResults.length > 0) { if (req.body.search) { @@ -84,11 +127,19 @@ export async function mapController( // Perform cosine similarity between the search query and the list of links if (req.body.search) { const searchQuery = req.body.search.toLowerCase(); - + links = performCosineSimilarity(links, searchQuery); } - links = links.map((x) => checkAndUpdateURLForMap(x).url.trim()); + links = links + .map((x) => { + try { + return checkAndUpdateURLForMap(x).url.trim(); + } catch (_) { + return null; + } + }) + .filter((x) => x !== null); // allows for subdomains to be included links = links.filter((x) => isSameDomain(x, req.body.url)); @@ -101,8 +152,10 @@ export async function mapController( // remove duplicates that could be due to http/https or www links = removeDuplicateUrls(links); - billTeam(req.auth.team_id, 1).catch(error => { - Logger.error(`Failed to bill team ${req.auth.team_id} for 1 credit: ${error}`); + billTeam(req.auth.team_id, 1).catch((error) => { + Logger.error( + `Failed to bill team ${req.auth.team_id} for 1 credit: ${error}` + ); // Optionally, you could notify an admin or add to a retry queue here }); @@ -110,7 +163,7 @@ export async function mapController( const timeTakenInSeconds = (endTime - startTime) / 1000; const linksToReturn = links.slice(0, limit); - + logJob({ job_id: id, success: links.length > 0, @@ -134,3 +187,51 @@ export async function mapController( scrape_id: req.body.origin?.includes("website") ? id : undefined, }); } + +// Subdomain sitemap url checking + +// // For each result, check for subdomains, get their sitemaps and add them to the links +// const processedUrls = new Set(); +// const processedSubdomains = new Set(); + +// for (const result of links) { +// let url; +// let hostParts; +// try { +// url = new URL(result); +// hostParts = url.hostname.split('.'); +// } catch (e) { +// continue; +// } + +// console.log("hostParts", hostParts); +// // Check if it's a subdomain (more than 2 parts, and not 'www') +// if (hostParts.length > 2 && hostParts[0] !== 'www') { +// const subdomain = hostParts[0]; +// console.log("subdomain", subdomain); +// const subdomainUrl = `${url.protocol}//${subdomain}.${hostParts.slice(-2).join('.')}`; +// console.log("subdomainUrl", subdomainUrl); + +// if (!processedSubdomains.has(subdomainUrl)) { +// processedSubdomains.add(subdomainUrl); + +// const subdomainCrawl = crawlToCrawler(id, { +// originUrl: subdomainUrl, +// crawlerOptions: legacyCrawlerOptions(req.body), +// pageOptions: {}, +// team_id: req.auth.team_id, +// createdAt: Date.now(), +// plan: req.auth.plan, +// }); +// const subdomainSitemap = await subdomainCrawl.tryGetSitemap(); +// if (subdomainSitemap) { +// subdomainSitemap.forEach((x) => { +// if (!processedUrls.has(x.url)) { +// processedUrls.add(x.url); +// links.push(x.url); +// } +// }); +// } +// } +// } +// } diff --git a/apps/api/src/controllers/v1/scrape.ts b/apps/api/src/controllers/v1/scrape.ts index 0835cc2a..f0744c22 100644 --- a/apps/api/src/controllers/v1/scrape.ts +++ b/apps/api/src/controllers/v1/scrape.ts @@ -103,7 +103,7 @@ export async function scrapeController( return; } if(req.body.extract && req.body.formats.includes("extract")) { - creditsToBeBilled = 50; + creditsToBeBilled = 5; } billTeam(req.auth.team_id, creditsToBeBilled).catch(error => { diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index c4e0cf84..ab811067 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -30,7 +30,14 @@ export const url = z.preprocess( "URL must have a valid top-level domain or be a valid path" ) .refine( - (x) => checkUrl(x as string), + (x) => { + try { + checkUrl(x as string) + return true; + } catch (_) { + return false; + } + }, "Invalid URL" ) .refine( @@ -63,7 +70,8 @@ export const scrapeOptions = z.object({ ]) .array() .optional() - .default(["markdown"]), + .default(["markdown"]) + .refine(x => !(x.includes("screenshot") && x.includes("screenshot@fullPage")), "You may only specify either screenshot or screenshot@fullPage"), headers: z.record(z.string(), z.string()).optional(), includeTags: z.string().array().optional(), excludeTags: z.string().array().optional(), @@ -257,6 +265,7 @@ export type CrawlStatusParams = { export type CrawlStatusResponse = | ErrorResponse | { + success: true; status: "scraping" | "completed" | "failed" | "cancelled"; completed: number; total: number; @@ -322,6 +331,7 @@ export function legacyScrapeOptions(x: ScrapeOptions): PageOptions { removeTags: x.excludeTags, onlyMainContent: x.onlyMainContent, waitFor: x.waitFor, + headers: x.headers, includeLinks: x.formats.includes("links"), screenshot: x.formats.includes("screenshot"), fullPageScreenshot: x.formats.includes("screenshot@fullPage"), @@ -339,7 +349,7 @@ export function legacyExtractorOptions(x: ExtractOptions): ExtractorOptions { } export function legacyDocumentConverter(doc: any): Document { - if (doc === null || doc === undefined) return doc; + if (doc === null || doc === undefined) return null; if (doc.metadata) { if (doc.metadata.screenshot) { diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 58370158..7d8817af 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -201,16 +201,20 @@ if (cluster.isMaster) { Sentry.setupExpressErrorHandler(app); app.use((err: unknown, req: Request<{}, ErrorResponse, undefined>, res: ResponseWithSentry, next: NextFunction) => { + if (err instanceof SyntaxError && 'status' in err && err.status === 400 && 'body' in err) { + return res.status(400).json({ success: false, error: 'Bad request, malformed JSON' }); + } + const id = res.sentry ?? uuidv4(); let verbose = JSON.stringify(err); if (verbose === "{}") { - if (err instanceof Error) { - verbose = JSON.stringify({ - message: err.message, - name: err.name, - stack: err.stack, - }); - } + if (err instanceof Error) { + verbose = JSON.stringify({ + message: err.message, + name: err.name, + stack: err.stack, + }); + } } Logger.error("Error occurred in request! (" + req.path + ") -- ID " + id + " -- " + verbose); diff --git a/apps/api/src/lib/__tests__/html-to-markdown.test.ts b/apps/api/src/lib/__tests__/html-to-markdown.test.ts new file mode 100644 index 00000000..3c68c959 --- /dev/null +++ b/apps/api/src/lib/__tests__/html-to-markdown.test.ts @@ -0,0 +1,40 @@ +import { parseMarkdown } from '../html-to-markdown'; + +describe('parseMarkdown', () => { + it('should correctly convert simple HTML to Markdown', async () => { + const html = '

Hello, world!

'; + const expectedMarkdown = 'Hello, world!'; + await expect(parseMarkdown(html)).resolves.toBe(expectedMarkdown); + }); + + it('should convert complex HTML with nested elements to Markdown', async () => { + const html = '

Hello bold world!

  • List item
'; + const expectedMarkdown = 'Hello **bold** world!\n\n- List item'; + await expect(parseMarkdown(html)).resolves.toBe(expectedMarkdown); + }); + + it('should return empty string when input is empty', async () => { + const html = ''; + const expectedMarkdown = ''; + await expect(parseMarkdown(html)).resolves.toBe(expectedMarkdown); + }); + + it('should handle null input gracefully', async () => { + const html = null; + const expectedMarkdown = ''; + await expect(parseMarkdown(html)).resolves.toBe(expectedMarkdown); + }); + + it('should handle various types of invalid HTML gracefully', async () => { + const invalidHtmls = [ + { html: '

Unclosed tag', expected: 'Unclosed tag' }, + { html: '

Missing closing div', expected: 'Missing closing div' }, + { html: '

Wrong nesting

', expected: '**Wrong nesting**' }, + { html: 'Link without closing tag', expected: '[Link without closing tag](http://example.com)' } + ]; + + for (const { html, expected } of invalidHtmls) { + await expect(parseMarkdown(html)).resolves.toBe(expected); + } + }); +}); diff --git a/apps/api/src/lib/entities.ts b/apps/api/src/lib/entities.ts index dfd17c63..d7ec2a83 100644 --- a/apps/api/src/lib/entities.ts +++ b/apps/api/src/lib/entities.ts @@ -28,7 +28,7 @@ export type PageOptions = { onlyIncludeTags?: string | string[]; includeLinks?: boolean; useFastMode?: boolean; // beta - disableJSDom?: boolean; // beta + disableJsDom?: boolean; // beta atsv?: boolean; // beta }; diff --git a/apps/api/src/lib/go-html-to-md/README.md b/apps/api/src/lib/go-html-to-md/README.md new file mode 100644 index 00000000..4ad510c3 --- /dev/null +++ b/apps/api/src/lib/go-html-to-md/README.md @@ -0,0 +1,7 @@ +To build the go-html-to-md library, run the following command: + +```bash +cd apps/api/src/lib/go-html-to-md +go build -o html-to-markdown.so -buildmode=c-shared html-to-markdown.go +chmod +x html-to-markdown.so +``` \ No newline at end of file diff --git a/apps/api/src/lib/go-html-to-md/go.mod b/apps/api/src/lib/go-html-to-md/go.mod new file mode 100644 index 00000000..0836f441 --- /dev/null +++ b/apps/api/src/lib/go-html-to-md/go.mod @@ -0,0 +1,14 @@ +module html-to-markdown.go + +go 1.19 + +require github.com/JohannesKaufmann/html-to-markdown v1.6.0 + +require ( + github.com/PuerkitoBio/goquery v1.9.2 // indirect + github.com/andybalholm/cascadia v1.3.2 // indirect + github.com/kr/pretty v0.3.0 // indirect + golang.org/x/net v0.25.0 // indirect + gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect +) diff --git a/apps/api/src/lib/go-html-to-md/go.sum b/apps/api/src/lib/go-html-to-md/go.sum new file mode 100644 index 00000000..7961629d --- /dev/null +++ b/apps/api/src/lib/go-html-to-md/go.sum @@ -0,0 +1,93 @@ +github.com/JohannesKaufmann/html-to-markdown v1.6.0 h1:04VXMiE50YYfCfLboJCLcgqF5x+rHJnb1ssNmqpLH/k= +github.com/JohannesKaufmann/html-to-markdown v1.6.0/go.mod h1:NUI78lGg/a7vpEJTz/0uOcYMaibytE4BUOQS8k78yPQ= +github.com/PuerkitoBio/goquery v1.9.2 h1:4/wZksC3KgkQw7SQgkKotmKljk0M6V8TUvA8Wb4yPeE= +github.com/PuerkitoBio/goquery v1.9.2/go.mod h1:GHPCaP0ODyyxqcNoFGYlAprUFH81NuRPd0GX3Zu2Mvk= +github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss= +github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= +github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBOAvL+k= +github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= +github.com/sebdah/goldie/v2 v2.5.3 h1:9ES/mNN+HNUbNWpVAlrzuZ7jE+Nrczbj8uFRjM7624Y= +github.com/sebdah/goldie/v2 v2.5.3/go.mod h1:oZ9fp0+se1eapSRjfYbsV/0Hqhbuu3bJVvKI/NNtssI= +github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= +github.com/sergi/go-diff v1.3.1 h1:xkr+Oxo4BOQKmkn/B9eMK0g5Kg/983T9DqqPHwYqD+8= +github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/yuin/goldmark v1.7.1 h1:3bajkSilaCbjdKVsKdZjZCLBNPL9pYzrCakKaf4U49U= +github.com/yuin/goldmark v1.7.1/go.mod h1:uzxRWxtg69N339t3louHJ7+O03ezfj6PlliRlaOzY1E= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= +golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= +golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= +golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= +golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= +golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= diff --git a/apps/api/src/lib/go-html-to-md/html-to-markdown.go b/apps/api/src/lib/go-html-to-md/html-to-markdown.go new file mode 100644 index 00000000..9905a69a --- /dev/null +++ b/apps/api/src/lib/go-html-to-md/html-to-markdown.go @@ -0,0 +1,25 @@ +package main + +import ( + "C" + "log" + + md "github.com/JohannesKaufmann/html-to-markdown" + "github.com/JohannesKaufmann/html-to-markdown/plugin" +) + +//export ConvertHTMLToMarkdown +func ConvertHTMLToMarkdown(html *C.char) *C.char { + converter := md.NewConverter("", true, nil) + converter.Use(plugin.GitHubFlavored()) + + markdown, err := converter.ConvertString(C.GoString(html)) + if err != nil { + log.Fatal(err) + } + return C.CString(markdown) +} + +func main() { + // This function is required for the main package +} diff --git a/apps/api/src/lib/html-to-markdown.ts b/apps/api/src/lib/html-to-markdown.ts index 002cb7be..a542a434 100644 --- a/apps/api/src/lib/html-to-markdown.ts +++ b/apps/api/src/lib/html-to-markdown.ts @@ -1,8 +1,68 @@ -export async function parseMarkdown(html: string) { +import koffi from 'koffi'; +import { join } from 'path'; +import "../services/sentry" +import * as Sentry from "@sentry/node"; + +import dotenv from 'dotenv'; +import { Logger } from './logger'; +dotenv.config(); + +// TODO: add a timeout to the Go parser + +class GoMarkdownConverter { + private static instance: GoMarkdownConverter; + private convert: any; + + private constructor() { + const goExecutablePath = join(__dirname, 'go-html-to-md/html-to-markdown.so'); + const lib = koffi.load(goExecutablePath); + this.convert = lib.func('ConvertHTMLToMarkdown', 'string', ['string']); + } + + public static getInstance(): GoMarkdownConverter { + if (!GoMarkdownConverter.instance) { + GoMarkdownConverter.instance = new GoMarkdownConverter(); + } + return GoMarkdownConverter.instance; + } + + public async convertHTMLToMarkdown(html: string): Promise { + return new Promise((resolve, reject) => { + this.convert.async(html, (err: Error, res: string) => { + if (err) { + reject(err); + } else { + resolve(res); + } + }); + }); + } +} + +export async function parseMarkdown(html: string): Promise { + if (!html) { + return ''; + } + + try { + if (process.env.USE_GO_MARKDOWN_PARSER == "true") { + const converter = GoMarkdownConverter.getInstance(); + let markdownContent = await converter.convertHTMLToMarkdown(html); + + markdownContent = processMultiLineLinks(markdownContent); + markdownContent = removeSkipToContentLinks(markdownContent); + Logger.info(`HTML to Markdown conversion using Go parser successful`); + return markdownContent; + } + } catch (error) { + Sentry.captureException(error); + Logger.error(`Error converting HTML to Markdown with Go parser: ${error}`); + } + + // Fallback to TurndownService if Go parser fails or is not enabled var TurndownService = require("turndown"); - var turndownPluginGfm = require('joplin-turndown-plugin-gfm') - + var turndownPluginGfm = require('joplin-turndown-plugin-gfm'); const turndownService = new TurndownService(); turndownService.addRule("inlineLink", { @@ -21,29 +81,20 @@ export async function parseMarkdown(html: string) { }); var gfm = turndownPluginGfm.gfm; turndownService.use(gfm); - let markdownContent = ""; - const turndownPromise = new Promise((resolve, reject) => { - try { - const result = turndownService.turndown(html); - resolve(result); - } catch (error) { - reject("Error converting HTML to Markdown: " + error); - } - }); - - const timeoutPromise = new Promise((resolve, reject) => { - const timeout = 5000; // Timeout in milliseconds - setTimeout(() => reject("Conversion timed out after " + timeout + "ms"), timeout); - }); try { - markdownContent = await Promise.race([turndownPromise, timeoutPromise]); + let markdownContent = await turndownService.turndown(html); + markdownContent = processMultiLineLinks(markdownContent); + markdownContent = removeSkipToContentLinks(markdownContent); + + return markdownContent; } catch (error) { - console.error(error); + console.error("Error converting HTML to Markdown: ", error); return ""; // Optionally return an empty string or handle the error as needed } +} - // multiple line links +function processMultiLineLinks(markdownContent: string): string { let insideLinkContent = false; let newMarkdownContent = ""; let linkOpenCount = 0; @@ -63,12 +114,14 @@ export async function parseMarkdown(html: string) { newMarkdownContent += char; } } - markdownContent = newMarkdownContent; + return newMarkdownContent; +} +function removeSkipToContentLinks(markdownContent: string): string { // Remove [Skip to Content](#page) and [Skip to content](#skip) - markdownContent = markdownContent.replace( + const newMarkdownContent = markdownContent.replace( /\[Skip to Content\]\(#[^\)]*\)/gi, "" ); - return markdownContent; -} + return newMarkdownContent; +} \ No newline at end of file diff --git a/apps/api/src/lib/logger.ts b/apps/api/src/lib/logger.ts index fb0468c2..cb8b4119 100644 --- a/apps/api/src/lib/logger.ts +++ b/apps/api/src/lib/logger.ts @@ -1,3 +1,6 @@ +import { configDotenv } from "dotenv"; +configDotenv(); + enum LogLevel { NONE = 'NONE', // No logs will be output. ERROR = 'ERROR', // For logging error messages that indicate a failure in a specific operation. diff --git a/apps/api/src/lib/scrape-events.ts b/apps/api/src/lib/scrape-events.ts index ed011b78..ad70dfef 100644 --- a/apps/api/src/lib/scrape-events.ts +++ b/apps/api/src/lib/scrape-events.ts @@ -2,6 +2,8 @@ import { Job } from "bullmq"; import type { baseScrapers } from "../scraper/WebScraper/single_url"; import { supabase_service as supabase } from "../services/supabase"; import { Logger } from "./logger"; +import { configDotenv } from "dotenv"; +configDotenv(); export type ScrapeErrorEvent = { type: "error", diff --git a/apps/api/src/lib/supabase-jobs.ts b/apps/api/src/lib/supabase-jobs.ts index cda6fd46..c418a6e0 100644 --- a/apps/api/src/lib/supabase-jobs.ts +++ b/apps/api/src/lib/supabase-jobs.ts @@ -2,6 +2,11 @@ import { supabase_service } from "../services/supabase"; import { Logger } from "./logger"; import * as Sentry from "@sentry/node"; +/** + * Get a single firecrawl_job by ID + * @param jobId ID of Job + * @returns {any | null} Job + */ export const supabaseGetJobById = async (jobId: string) => { const { data, error } = await supabase_service .from("firecrawl_jobs") @@ -20,13 +25,43 @@ export const supabaseGetJobById = async (jobId: string) => { return data; }; +/** + * Get multiple firecrawl_jobs by ID. Use this if you're not requesting a lot (50+) of jobs at once. + * @param jobIds IDs of Jobs + * @returns {any[]} Jobs + */ export const supabaseGetJobsById = async (jobIds: string[]) => { - const { data, error } = await supabase_service.rpc("get_jobs_by_ids", { - job_ids: jobIds, - }); + const { data, error } = await supabase_service + .from("firecrawl_jobs") + .select() + .in("job_id", jobIds); if (error) { - Logger.error(`Error in get_jobs_by_ids: ${error}`); + Logger.error(`Error in supabaseGetJobsById: ${error}`); + Sentry.captureException(error); + return []; + } + + if (!data) { + return []; + } + + return data; +}; + +/** + * Get multiple firecrawl_jobs by crawl ID. Use this if you need a lot of jobs at once. + * @param crawlId ID of crawl + * @returns {any[]} Jobs + */ +export const supabaseGetJobsByCrawlId = async (crawlId: string) => { + const { data, error } = await supabase_service + .from("firecrawl_jobs") + .select() + .eq("crawl_id", crawlId) + + if (error) { + Logger.error(`Error in supabaseGetJobsByCrawlId: ${error}`); Sentry.captureException(error); return []; } diff --git a/apps/api/src/lib/withAuth.ts b/apps/api/src/lib/withAuth.ts index 90cfb449..b45b8973 100644 --- a/apps/api/src/lib/withAuth.ts +++ b/apps/api/src/lib/withAuth.ts @@ -1,6 +1,8 @@ import { AuthResponse } from "../../src/types"; import { Logger } from "./logger"; import * as Sentry from "@sentry/node"; +import { configDotenv } from "dotenv"; +configDotenv(); let warningCount = 0; diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index cd199fa1..f67a1cd0 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -12,6 +12,8 @@ import { Document } from "../lib/entities"; import { supabase_service } from "../services/supabase"; import { Logger } from "../lib/logger"; import { ScrapeEvents } from "../lib/scrape-events"; +import { configDotenv } from "dotenv"; +configDotenv(); export async function startWebScraperPipeline({ job, diff --git a/apps/api/src/routes/v1.ts b/apps/api/src/routes/v1.ts index daa9bf43..484ab5dc 100644 --- a/apps/api/src/routes/v1.ts +++ b/apps/api/src/routes/v1.ts @@ -83,7 +83,7 @@ function idempotencyMiddleware(req: Request, res: Response, next: NextFunction) } function blocklistMiddleware(req: Request, res: Response, next: NextFunction) { - if (req.body.url && isUrlBlocked(req.body.url)) { + if (typeof req.body.url === "string" && isUrlBlocked(req.body.url)) { if (!res.headersSent) { return res.status(403).json({ success: false, error: "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." }); } diff --git a/apps/api/src/scraper/WebScraper/index.ts b/apps/api/src/scraper/WebScraper/index.ts index fc828224..2f7efa47 100644 --- a/apps/api/src/scraper/WebScraper/index.ts +++ b/apps/api/src/scraper/WebScraper/index.ts @@ -589,6 +589,9 @@ export class WebScraperDataProvider { includeLinks: options.pageOptions?.includeLinks ?? true, fullPageScreenshot: options.pageOptions?.fullPageScreenshot ?? false, screenshot: options.pageOptions?.screenshot ?? false, + useFastMode: options.pageOptions?.useFastMode ?? false, + disableJsDom: options.pageOptions?.disableJsDom ?? false, + atsv: options.pageOptions?.atsv ?? false }; this.extractorOptions = options.extractorOptions ?? { mode: "markdown" }; this.replaceAllPathsWithAbsolutePaths = diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index aa86ad5e..80ac7924 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -55,7 +55,7 @@ export async function scrapWithFireEngine({ try { const reqParams = await generateRequestParams(url); let waitParam = reqParams["params"]?.wait ?? waitFor; - let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "playwright"; + let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "chrome-cdp"; let screenshotParam = reqParams["params"]?.screenshot ?? screenshot; let fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot; let fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions; @@ -69,15 +69,15 @@ export async function scrapWithFireEngine({ let engine = engineParam; // do we want fireEngineOptions as first choice? - Logger.info( - `โ›๏ธ Fire-Engine (${engine}): Scraping ${url} | params: { wait: ${waitParam}, screenshot: ${screenshotParam}, fullPageScreenshot: ${fullPageScreenshot}, method: ${fireEngineOptionsParam?.method ?? "null"} }` - ); - if (pageOptions?.useFastMode) { fireEngineOptionsParam.engine = "tlsclient"; engine = "tlsclient"; } + Logger.info( + `โ›๏ธ Fire-Engine (${engine}): Scraping ${url} | params: { wait: ${waitParam}, screenshot: ${screenshotParam}, fullPageScreenshot: ${fullPageScreenshot}, method: ${fireEngineOptionsParam?.method ?? "null"} }` + ); + // atsv is only available for beta customers const betaCustomersString = process.env.BETA_CUSTOMERS; const betaCustomers = betaCustomersString ? betaCustomersString.split(",") : []; @@ -96,6 +96,7 @@ export async function scrapWithFireEngine({ const _response = await Sentry.startSpan({ name: "Call to fire-engine" }, async span => { + return await axiosInstance.post( process.env.FIRE_ENGINE_BETA_URL + endpoint, { @@ -104,12 +105,13 @@ export async function scrapWithFireEngine({ screenshot: screenshotParam, fullPageScreenshot: fullPageScreenshotParam, headers: headers, - pageOptions: pageOptions, disableJsDom: pageOptions?.disableJsDom ?? false, priority, engine, instantReturn: true, ...fireEngineOptionsParam, + atsv: pageOptions?.atsv ?? false, + scrollXPaths: pageOptions?.scrollXPaths ?? [], }, { headers: { @@ -125,7 +127,7 @@ export async function scrapWithFireEngine({ let checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${_response.data.jobId}`); while (checkStatusResponse.data.processing && Date.now() - startTime < universalTimeout + waitParam) { - await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second + await new Promise(resolve => setTimeout(resolve, 250)); // wait 0.25 seconds checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${_response.data.jobId}`); } diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index 11e1fe37..8143bab0 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -28,8 +28,8 @@ const useFireEngine = process.env.FIRE_ENGINE_BETA_URL !== '' && process.env.FIR export const baseScrapers = [ useFireEngine ? "fire-engine;chrome-cdp" : undefined, - useFireEngine ? "fire-engine" : undefined, useScrapingBee ? "scrapingBee" : undefined, + useFireEngine ? "fire-engine" : undefined, useFireEngine ? undefined : "playwright", useScrapingBee ? "scrapingBeeLoad" : undefined, "fetch", @@ -89,22 +89,22 @@ function getScrapingFallbackOrder( let defaultOrder = [ useFireEngine ? "fire-engine;chrome-cdp" : undefined, - useFireEngine ? "fire-engine" : undefined, useScrapingBee ? "scrapingBee" : undefined, + useFireEngine ? "fire-engine" : undefined, useScrapingBee ? "scrapingBeeLoad" : undefined, useFireEngine ? undefined : "playwright", "fetch", ].filter(Boolean); - if (isWaitPresent || isScreenshotPresent || isHeadersPresent) { - defaultOrder = [ - "fire-engine", - useFireEngine ? undefined : "playwright", - ...defaultOrder.filter( - (scraper) => scraper !== "fire-engine" && scraper !== "playwright" - ), - ].filter(Boolean); - } + // if (isWaitPresent || isScreenshotPresent || isHeadersPresent) { + // defaultOrder = [ + // "fire-engine", + // useFireEngine ? undefined : "playwright", + // ...defaultOrder.filter( + // (scraper) => scraper !== "fire-engine" && scraper !== "playwright" + // ), + // ].filter(Boolean); + // } const filteredDefaultOrder = defaultOrder.filter( (scraper: (typeof baseScrapers)[number]) => @@ -146,6 +146,9 @@ export async function scrapSingleUrl( parsePDF: pageOptions.parsePDF ?? true, removeTags: pageOptions.removeTags ?? [], onlyIncludeTags: pageOptions.onlyIncludeTags ?? [], + useFastMode: pageOptions.useFastMode ?? false, + disableJsDom: pageOptions.disableJsDom ?? false, + atsv: pageOptions.atsv ?? false } if (extractorOptions) { @@ -200,6 +203,7 @@ export async function scrapSingleUrl( fireEngineOptions: { engine: engine, atsv: pageOptions.atsv, + disableJsDom: pageOptions.disableJsDom, }, priority, teamId, diff --git a/apps/api/src/scraper/WebScraper/sitemap.ts b/apps/api/src/scraper/WebScraper/sitemap.ts index b1a6a6ff..13dfc26e 100644 --- a/apps/api/src/scraper/WebScraper/sitemap.ts +++ b/apps/api/src/scraper/WebScraper/sitemap.ts @@ -36,17 +36,15 @@ export async function getLinksFromSitemap( const root = parsed.urlset || parsed.sitemapindex; if (root && root.sitemap) { - for (const sitemap of root.sitemap) { - if (sitemap.loc && sitemap.loc.length > 0) { - await getLinksFromSitemap({ sitemapUrl: sitemap.loc[0], allUrls, mode }); - } - } + const sitemapPromises = root.sitemap + .filter(sitemap => sitemap.loc && sitemap.loc.length > 0) + .map(sitemap => getLinksFromSitemap({ sitemapUrl: sitemap.loc[0], allUrls, mode })); + await Promise.all(sitemapPromises); } else if (root && root.url) { - for (const url of root.url) { - if (url.loc && url.loc.length > 0 && !WebCrawler.prototype.isFile(url.loc[0])) { - allUrls.push(url.loc[0]); - } - } + const validUrls = root.url + .filter(url => url.loc && url.loc.length > 0 && !WebCrawler.prototype.isFile(url.loc[0])) + .map(url => url.loc[0]); + allUrls.push(...validUrls); } } catch (error) { Logger.debug(`Error processing sitemapUrl: ${sitemapUrl} | Error: ${error.message}`); diff --git a/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts b/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts index af8d1f34..8169d9d3 100644 --- a/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts +++ b/apps/api/src/scraper/WebScraper/utils/custom/website_params.ts @@ -242,5 +242,13 @@ export const urlSpecificParams = { engine: "chrome-cdp", }, }, + }, + "lorealparis.hu":{ + defaultScraper: "fire-engine", + params:{ + fireEngineOptions:{ + engine: "tlsclient", + }, + }, } }; diff --git a/apps/api/src/scraper/WebScraper/utils/excludeTags.ts b/apps/api/src/scraper/WebScraper/utils/excludeTags.ts index bb9c5194..400ef84f 100644 --- a/apps/api/src/scraper/WebScraper/utils/excludeTags.ts +++ b/apps/api/src/scraper/WebScraper/utils/excludeTags.ts @@ -39,16 +39,8 @@ export const excludeNonMainTags = [ "#search", ".share", "#share", - ".pagination", - "#pagination", ".widget", "#widget", - ".related", - "#related", - ".tag", - "#tag", - ".category", - "#category", ".cookie", "#cookie" ]; diff --git a/apps/api/src/search/fireEngine.ts b/apps/api/src/search/fireEngine.ts index 7c6d8a4d..d5e15656 100644 --- a/apps/api/src/search/fireEngine.ts +++ b/apps/api/src/search/fireEngine.ts @@ -1,10 +1,14 @@ import axios from "axios"; import dotenv from "dotenv"; import { SearchResult } from "../../src/lib/entities"; +import * as Sentry from "@sentry/node"; +import { Logger } from "../lib/logger"; dotenv.config(); -export async function fireEngineMap(q: string, options: { +export async function fireEngineMap( + q: string, + options: { tbs?: string; filter?: string; lang?: string; @@ -12,34 +16,43 @@ export async function fireEngineMap(q: string, options: { location?: string; numResults: number; page?: number; -}): Promise { - let data = JSON.stringify({ - query: q, - lang: options.lang, - country: options.country, - location: options.location, - tbs: options.tbs, - numResults: options.numResults, - page: options.page ?? 1, - }); - - if (!process.env.FIRE_ENGINE_BETA_URL) { - console.warn("(v1/map Beta) Results might differ from cloud offering currently."); - return []; } +): Promise { + try { + let data = JSON.stringify({ + query: q, + lang: options.lang, + country: options.country, + location: options.location, + tbs: options.tbs, + numResults: options.numResults, + page: options.page ?? 1, + }); - let config = { - method: "POST", - url: `${process.env.FIRE_ENGINE_BETA_URL}/search`, - headers: { - "Content-Type": "application/json", - }, - data: data, - }; - const response = await axios(config); - if (response && response) { - return response.data - } else { + if (!process.env.FIRE_ENGINE_BETA_URL) { + console.warn( + "(v1/map Beta) Results might differ from cloud offering currently." + ); + return []; + } + + let config = { + method: "POST", + url: `${process.env.FIRE_ENGINE_BETA_URL}/search`, + headers: { + "Content-Type": "application/json", + }, + data: data, + }; + const response = await axios(config); + if (response && response) { + return response.data; + } else { + return []; + } + } catch (error) { + Logger.error(error); + Sentry.captureException(error); return []; } } diff --git a/apps/api/src/services/billing/credit_billing.ts b/apps/api/src/services/billing/credit_billing.ts index ab00eab9..6a71b40a 100644 --- a/apps/api/src/services/billing/credit_billing.ts +++ b/apps/api/src/services/billing/credit_billing.ts @@ -5,7 +5,7 @@ import { supabase_service } from "../supabase"; import { Logger } from "../../lib/logger"; import { getValue, setValue } from "../redis"; import { redlock } from "../redlock"; - +import * as Sentry from "@sentry/node"; const FREE_CREDITS = 500; @@ -176,9 +176,25 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) { return { success: true, message: "Preview team, no credits used", remainingCredits: Infinity }; } - // Retrieve the team's active subscription and check for available coupons concurrently - const [{ data: subscription, error: subscriptionError }, { data: coupons }] = - await Promise.all([ + + let cacheKeySubscription = `subscription_${team_id}`; + let cacheKeyCoupons = `coupons_${team_id}`; + + // Try to get data from cache first + const [cachedSubscription, cachedCoupons] = await Promise.all([ + getValue(cacheKeySubscription), + getValue(cacheKeyCoupons) + ]); + + let subscription, subscriptionError; + let coupons : {credits: number}[]; + + if (cachedSubscription && cachedCoupons) { + subscription = JSON.parse(cachedSubscription); + coupons = JSON.parse(cachedCoupons); + } else { + // If not in cache, retrieve from database + const [subscriptionResult, couponsResult] = await Promise.all([ supabase_service .from("subscriptions") .select("id, price_id, current_period_start, current_period_end") @@ -192,6 +208,16 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) { .eq("status", "active"), ]); + subscription = subscriptionResult.data; + subscriptionError = subscriptionResult.error; + coupons = couponsResult.data; + + // Cache the results for a minute, sub can be null and that's fine + await setValue(cacheKeySubscription, JSON.stringify(subscription), 60); // Cache for 1 minute, even if null + await setValue(cacheKeyCoupons, JSON.stringify(coupons), 60); // Cache for 1 minute + + } + let couponCredits = 0; if (coupons && coupons.length > 0) { couponCredits = coupons.reduce( @@ -200,53 +226,67 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) { ); } + + // If there are available coupons and they are enough for the operation + if (couponCredits >= credits) { + return { success: true, message: "Sufficient credits available", remainingCredits: couponCredits }; + } // Free credits, no coupons if (!subscription || subscriptionError) { - // If there is no active subscription but there are available coupons - if (couponCredits >= credits) { - return { success: true, message: "Sufficient credits available", remainingCredits: couponCredits }; - } - let creditUsages; let creditUsageError; - let retries = 0; - const maxRetries = 3; - const retryInterval = 2000; // 2 seconds + let totalCreditsUsed = 0; + const cacheKeyCreditUsage = `credit_usage_${team_id}`; - while (retries < maxRetries) { - const result = await supabase_service - .from("credit_usage") - .select("credits_used") - .is("subscription_id", null) - .eq("team_id", team_id); + // Try to get credit usage from cache + const cachedCreditUsage = await getValue(cacheKeyCreditUsage); - creditUsages = result.data; - creditUsageError = result.error; + if (cachedCreditUsage) { + totalCreditsUsed = parseInt(cachedCreditUsage); + } else { + let retries = 0; + const maxRetries = 3; + const retryInterval = 2000; // 2 seconds - if (!creditUsageError) { - break; + while (retries < maxRetries) { + // Reminder, this has an 1000 limit. + const result = await supabase_service + .from("credit_usage") + .select("credits_used") + .is("subscription_id", null) + .eq("team_id", team_id); + + creditUsages = result.data; + creditUsageError = result.error; + + if (!creditUsageError) { + break; + } + + retries++; + if (retries < maxRetries) { + await new Promise(resolve => setTimeout(resolve, retryInterval)); + } } - retries++; - if (retries < maxRetries) { - await new Promise(resolve => setTimeout(resolve, retryInterval)); + if (creditUsageError) { + Logger.error(`Credit usage error after ${maxRetries} attempts: ${creditUsageError}`); + throw new Error( + `Failed to retrieve credit usage for team_id: ${team_id}` + ); } - } - if (creditUsageError) { - Logger.error(`Credit usage error after ${maxRetries} attempts: ${creditUsageError}`); - throw new Error( - `Failed to retrieve credit usage for team_id: ${team_id}` + totalCreditsUsed = creditUsages.reduce( + (acc, usage) => acc + usage.credits_used, + 0 ); - } - const totalCreditsUsed = creditUsages.reduce( - (acc, usage) => acc + usage.credits_used, - 0 - ); + // Cache the result for 30 seconds + await setValue(cacheKeyCreditUsage, totalCreditsUsed.toString(), 30); + } Logger.info(`totalCreditsUsed: ${totalCreditsUsed}`); @@ -254,7 +294,7 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) { end.setDate(end.getDate() + 30); // check if usage is within 80% of the limit const creditLimit = FREE_CREDITS; - const creditUsagePercentage = (totalCreditsUsed + credits) / creditLimit; + const creditUsagePercentage = totalCreditsUsed / creditLimit; // Add a check to ensure totalCreditsUsed is greater than 0 if (totalCreditsUsed > 0 && creditUsagePercentage >= 0.8 && creditUsagePercentage < 1) { @@ -268,7 +308,7 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) { } // 5. Compare the total credits used with the credits allowed by the plan. - if (totalCreditsUsed + credits > FREE_CREDITS) { + if (totalCreditsUsed >= FREE_CREDITS) { // Send email notification for insufficient credits await sendNotification( team_id, @@ -312,7 +352,7 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) { if (creditUsages && creditUsages.length > 0) { totalCreditsUsed = creditUsages[0].total_credits_used; - await setValue(cacheKey, totalCreditsUsed.toString(), 1800); // Cache for 30 minutes + await setValue(cacheKey, totalCreditsUsed.toString(), 500); // Cache for 8 minutes // Logger.info(`Cache set for credit usage: ${totalCreditsUsed}`); } } @@ -325,39 +365,62 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) { // Adjust total credits used by subtracting coupon value const adjustedCreditsUsed = Math.max(0, totalCreditsUsed - couponCredits); - // Get the price details - const { data: price, error: priceError } = await supabase_service - .from("prices") - .select("credits") - .eq("id", subscription.price_id) - .single(); - if (priceError) { - throw new Error( - `Failed to retrieve price for price_id: ${subscription.price_id}` - ); + // Get the price details from cache or database + const priceCacheKey = `price_${subscription.price_id}`; + let price : {credits: number}; + + try { + const cachedPrice = await getValue(priceCacheKey); + if (cachedPrice) { + price = JSON.parse(cachedPrice); + } else { + const { data, error: priceError } = await supabase_service + .from("prices") + .select("credits") + .eq("id", subscription.price_id) + .single(); + + if (priceError) { + throw new Error( + `Failed to retrieve price for price_id: ${subscription.price_id}` + ); + } + + price = data; + // There are only 21 records, so this is super fine + // Cache the price for a long time (e.g., 1 day) + await setValue(priceCacheKey, JSON.stringify(price), 86400); + } + } catch (error) { + Logger.error(`Error retrieving or caching price: ${error}`); + Sentry.captureException(error); + // If errors, just assume it's a big number so user don't get an error + price = { credits: 10000000 }; } const creditLimit = price.credits; - const creditUsagePercentage = (adjustedCreditsUsed + credits) / creditLimit; + + // Removal of + credits + const creditUsagePercentage = adjustedCreditsUsed / creditLimit; // Compare the adjusted total credits used with the credits allowed by the plan - if (adjustedCreditsUsed + credits > price.credits) { - // await sendNotification( - // team_id, - // NotificationType.LIMIT_REACHED, - // subscription.current_period_start, - // subscription.current_period_end - // ); + if (adjustedCreditsUsed >= price.credits) { + await sendNotification( + team_id, + NotificationType.LIMIT_REACHED, + subscription.current_period_start, + subscription.current_period_end + ); return { success: false, message: "Insufficient credits, please upgrade!", remainingCredits: creditLimit - adjustedCreditsUsed }; - } else if (creditUsagePercentage >= 0.8) { + } else if (creditUsagePercentage >= 0.8 && creditUsagePercentage < 1) { // Send email notification for approaching credit limit - // await sendNotification( - // team_id, - // NotificationType.APPROACHING_LIMIT, - // subscription.current_period_start, - // subscription.current_period_end - // ); + await sendNotification( + team_id, + NotificationType.APPROACHING_LIMIT, + subscription.current_period_start, + subscription.current_period_end + ); } return { success: true, message: "Sufficient credits available", remainingCredits: creditLimit - adjustedCreditsUsed }; diff --git a/apps/api/src/services/logging/crawl_log.ts b/apps/api/src/services/logging/crawl_log.ts index f19b0297..3850e05b 100644 --- a/apps/api/src/services/logging/crawl_log.ts +++ b/apps/api/src/services/logging/crawl_log.ts @@ -1,6 +1,7 @@ import { supabase_service } from "../supabase"; import { Logger } from "../../../src/lib/logger"; -import "dotenv/config"; +import { configDotenv } from "dotenv"; +configDotenv(); export async function logCrawl(job_id: string, team_id: string) { const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === 'true'; diff --git a/apps/api/src/services/logging/log_job.ts b/apps/api/src/services/logging/log_job.ts index d4494f09..4d8ee014 100644 --- a/apps/api/src/services/logging/log_job.ts +++ b/apps/api/src/services/logging/log_job.ts @@ -4,6 +4,8 @@ import { FirecrawlJob } from "../../types"; import { posthog } from "../posthog"; import "dotenv/config"; import { Logger } from "../../lib/logger"; +import { configDotenv } from "dotenv"; +configDotenv(); export async function logJob(job: FirecrawlJob) { try { diff --git a/apps/api/src/services/logging/scrape_log.ts b/apps/api/src/services/logging/scrape_log.ts index 30d8fd1e..fbe41653 100644 --- a/apps/api/src/services/logging/scrape_log.ts +++ b/apps/api/src/services/logging/scrape_log.ts @@ -3,6 +3,8 @@ import { ScrapeLog } from "../../types"; import { supabase_service } from "../supabase"; import { PageOptions } from "../../lib/entities"; import { Logger } from "../../lib/logger"; +import { configDotenv } from "dotenv"; +configDotenv(); export async function logScrape( scrapeLog: ScrapeLog, diff --git a/apps/api/src/services/queue-jobs.ts b/apps/api/src/services/queue-jobs.ts index 941b571d..7a698772 100644 --- a/apps/api/src/services/queue-jobs.ts +++ b/apps/api/src/services/queue-jobs.ts @@ -67,6 +67,6 @@ export function waitForJob(jobId: string, timeout: number) { reject((await getScrapeQueue().getJob(jobId)).failedReason); } } - }, 1000); + }, 500); }) } diff --git a/apps/api/src/services/queue-service.ts b/apps/api/src/services/queue-service.ts index 113b3fa3..14dddebe 100644 --- a/apps/api/src/services/queue-service.ts +++ b/apps/api/src/services/queue-service.ts @@ -16,6 +16,14 @@ export function getScrapeQueue() { scrapeQueueName, { connection: redisConnection, + defaultJobOptions: { + removeOnComplete: { + age: 90000, // 25 hours + }, + removeOnFail: { + age: 90000, // 25 hours + }, + }, } // { // settings: { diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index 6488759f..37e14baf 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -36,6 +36,8 @@ import { } from "../../src/lib/job-priority"; import { PlanType } from "../types"; import { getJobs } from "../../src/controllers/v1/crawl-status"; +import { configDotenv } from "dotenv"; +configDotenv(); if (process.env.ENV === "production") { initSDK({ @@ -446,11 +448,13 @@ async function processJob(job: Job, token: string) { } catch (error) { Logger.error(`๐Ÿ‚ Job errored ${job.id} - ${error}`); - Sentry.captureException(error, { - data: { - job: job.id, - }, - }); + if (!(error instanceof Error && error.message.includes("JSON parsing error(s): "))) { + Sentry.captureException(error, { + data: { + job: job.id, + }, + }); + } if (error instanceof CustomError) { // Here we handle the error, then save the failed job diff --git a/apps/api/src/services/rate-limiter.ts b/apps/api/src/services/rate-limiter.ts index dade8493..51a0ecfa 100644 --- a/apps/api/src/services/rate-limiter.ts +++ b/apps/api/src/services/rate-limiter.ts @@ -6,7 +6,7 @@ const RATE_LIMITS = { crawl: { default: 3, free: 2, - starter: 3, + starter: 10, standard: 5, standardOld: 40, scale: 50, @@ -19,9 +19,9 @@ const RATE_LIMITS = { scrape: { default: 20, free: 10, - starter: 20, + starter: 100, standard: 100, - standardOld: 40, + standardOld: 100, scale: 500, hobby: 20, standardNew: 100, @@ -32,8 +32,8 @@ const RATE_LIMITS = { search: { default: 20, free: 5, - starter: 20, - standard: 40, + starter: 50, + standard: 50, standardOld: 40, scale: 500, hobby: 10, @@ -45,9 +45,9 @@ const RATE_LIMITS = { map:{ default: 20, free: 5, - starter: 20, - standard: 40, - standardOld: 40, + starter: 50, + standard: 50, + standardOld: 50, scale: 500, hobby: 10, standardNew: 50, @@ -104,6 +104,13 @@ export const devBRateLimiter = new RateLimiterRedis({ duration: 60, // Duration in seconds }); +export const manualRateLimiter = new RateLimiterRedis({ + storeClient: redisRateLimitClient, + keyPrefix: "manual", + points: 2000, + duration: 60, // Duration in seconds +}); + export const scrapeStatusRateLimiter = new RateLimiterRedis({ storeClient: redisRateLimitClient, @@ -112,14 +119,18 @@ export const scrapeStatusRateLimiter = new RateLimiterRedis({ duration: 60, // Duration in seconds }); +const testSuiteTokens = ["a01ccae", "6254cf9", "0f96e673", "23befa1b", "69141c4"]; + +const manual = ["69be9e74-7624-4990-b20d-08e0acc70cf6"]; + export function getRateLimiter( mode: RateLimiterMode, token: string, plan?: string, teamId?: string ) { - - if (token.includes("a01ccae") || token.includes("6254cf9") || token.includes("0f96e673") || token.includes("23befa1b")) { + + if (testSuiteTokens.some(testToken => token.includes(testToken))) { return testSuiteRateLimiter; } @@ -127,6 +138,10 @@ export function getRateLimiter( return devBRateLimiter; } + if(teamId && manual.includes(teamId)) { + return manualRateLimiter; + } + const rateLimitConfig = RATE_LIMITS[mode]; // {default : 5} if (!rateLimitConfig) return serverRateLimiter; diff --git a/apps/api/src/services/supabase.ts b/apps/api/src/services/supabase.ts index 414d1925..7636717e 100644 --- a/apps/api/src/services/supabase.ts +++ b/apps/api/src/services/supabase.ts @@ -1,5 +1,7 @@ import { createClient, SupabaseClient } from "@supabase/supabase-js"; import { Logger } from "../lib/logger"; +import { configDotenv } from "dotenv"; +configDotenv(); // SupabaseService class initializes the Supabase client conditionally based on environment variables. class SupabaseService { diff --git a/apps/api/src/services/webhook.ts b/apps/api/src/services/webhook.ts index 56dd5c58..06e5649d 100644 --- a/apps/api/src/services/webhook.ts +++ b/apps/api/src/services/webhook.ts @@ -3,6 +3,8 @@ import { legacyDocumentConverter } from "../../src/controllers/v1/types"; import { Logger } from "../../src/lib/logger"; import { supabase_service } from "./supabase"; import { WebhookEventType } from "../types"; +import { configDotenv } from "dotenv"; +configDotenv(); export const callWebhook = async ( teamId: string, diff --git a/apps/api/v1-openapi.json b/apps/api/v1-openapi.json new file mode 100644 index 00000000..1ff0fb9b --- /dev/null +++ b/apps/api/v1-openapi.json @@ -0,0 +1,823 @@ +{ + "openapi": "3.0.0", + "info": { + "title": "Firecrawl API", + "version": "v1", + "description": "API for interacting with Firecrawl services to perform web scraping and crawling tasks.", + "contact": { + "name": "Firecrawl Support", + "url": "https://firecrawl.dev", + "email": "support@firecrawl.dev" + } + }, + "servers": [ + { + "url": "https://api.firecrawl.dev/v1" + } + ], + "paths": { + "/scrape": { + "post": { + "summary": "Scrape a single URL and optionally extract information using an LLM", + "operationId": "scrapeAndExtractFromUrl", + "tags": ["Scraping"], + "security": [ + { + "bearerAuth": [] + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "url": { + "type": "string", + "format": "uri", + "description": "The URL to scrape" + }, + "formats": { + "type": "array", + "items": { + "type": "string", + "enum": ["markdown", "html", "rawHtml", "links", "screenshot", "extract", "screenshot@fullPage"] + }, + "description": "Formats to include in the output.", + "default": ["markdown"] + }, + "onlyMainContent": { + "type": "boolean", + "description": "Only return the main content of the page excluding headers, navs, footers, etc.", + "default": true + }, + "includeTags": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Tags to include in the output." + }, + "excludeTags": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Tags to exclude from the output." + }, + "headers": { + "type": "object", + "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc." + }, + "waitFor": { + "type": "integer", + "description": "Specify a delay in milliseconds before fetching the content, allowing the page sufficient time to load.", + "default": 0 + }, + "timeout": { + "type": "integer", + "description": "Timeout in milliseconds for the request", + "default": 30000 + }, + "extract": { + "type": "object", + "description": "Extract object", + "properties": { + "schema": { + "type": "object", + "description": "The schema to use for the extraction (Optional)" + }, + "systemPrompt": { + "type": "string", + "description": "The system prompt to use for the extraction (Optional)" + }, + "prompt": { + "type": "string", + "description": "The prompt to use for the extraction without a schema (Optional)" + } + } + } + }, + "required": ["url"] + } + } + } + }, + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ScrapeResponse" + } + } + } + }, + "402": { + "description": "Payment required", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "string", + "example": "Payment required to access this resource." + } + } + } + } + } + }, + "429": { + "description": "Too many requests", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "string", + "example": "Request rate limit exceeded. Please wait and try again later." + } + } + } + } + } + }, + "500": { + "description": "Server error", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "string", + "example": "An unexpected error occurred on the server." + } + } + } + } + } + } + } + } + }, + "/crawl/{id}": { + "parameters": [ + { + "name": "id", + "in": "path", + "description": "The ID of the crawl job", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "get": { + "summary": "Get the status of a crawl job", + "operationId": "getCrawlStatus", + "tags": ["Crawling"], + "security": [ + { + "bearerAuth": [] + } + ], + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CrawlStatusResponseObj" + } + } + } + }, + "402": { + "description": "Payment required", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "string", + "example": "Payment required to access this resource." + } + } + } + } + } + }, + "429": { + "description": "Too many requests", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "string", + "example": "Request rate limit exceeded. Please wait and try again later." + } + } + } + } + } + }, + "500": { + "description": "Server error", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "string", + "example": "An unexpected error occurred on the server." + } + } + } + } + } + } + } + }, + "delete": { + "summary": "Cancel a crawl job", + "operationId": "cancelCrawl", + "tags": ["Crawling"], + "security": [ + { + "bearerAuth": [] + } + ], + "responses": { + "200": { + "description": "Successful cancellation", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean", + "example": true + }, + "message": { + "type": "string", + "example": "Crawl job successfully cancelled." + } + } + } + } + } + }, + "404": { + "description": "Crawl job not found", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "string", + "example": "Crawl job not found." + } + } + } + } + } + }, + "500": { + "description": "Server error", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "string", + "example": "An unexpected error occurred on the server." + } + } + } + } + } + } + } + } + }, + "/crawl": { + "post": { + "summary": "Crawl multiple URLs based on options", + "operationId": "crawlUrls", + "tags": ["Crawling"], + "security": [ + { + "bearerAuth": [] + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "url": { + "type": "string", + "format": "uri", + "description": "The base URL to start crawling from" + }, + "excludePaths": { + "type": "array", + "items": { + "type": "string" + }, + "description": "URL patterns to exclude" + }, + "includePaths": { + "type": "array", + "items": { + "type": "string" + }, + "description": "URL patterns to include" + }, + "maxDepth": { + "type": "integer", + "description": "Maximum depth to crawl relative to the entered URL.", + "default": 2 + }, + "ignoreSitemap": { + "type": "boolean", + "description": "Ignore the website sitemap when crawling", + "default": true + }, + "limit": { + "type": "integer", + "description": "Maximum number of pages to crawl", + "default": 10 + }, + "allowBackwardLinks": { + "type": "boolean", + "description": "Enables the crawler to navigate from a specific URL to previously linked pages.", + "default": false + }, + "allowExternalLinks": { + "type": "boolean", + "description": "Allows the crawler to follow links to external websites.", + "default": false + }, + "webhook": { + "type": "string", + "description": "The URL to send the webhook to. This will trigger for crawl started (crawl.started) ,every page crawled (crawl.page) and when the crawl is completed (crawl.completed or crawl.failed). The response will be the same as the `/scrape` endpoint." + }, + "scrapeOptions": { + "type": "object", + "properties": { + "formats": { + "type": "array", + "items": { + "type": "string", + "enum": ["markdown", "html", "rawHtml", "links", "screenshot"] + }, + "description": "Formats to include in the output.", + "default": ["markdown"] + }, + "headers": { + "type": "object", + "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc." + }, + "includeTags": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Tags to include in the output." + }, + "excludeTags": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Tags to exclude from the output." + }, + "onlyMainContent": { + "type": "boolean", + "description": "Only return the main content of the page excluding headers, navs, footers, etc.", + "default": true + }, + "waitFor": { + "type": "integer", + "description": "Wait x amount of milliseconds for the page to load to fetch content", + "default": 123 + } + } + } + }, + "required": ["url"] + } + } + } + }, + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CrawlResponse" + } + } + } + }, + "402": { + "description": "Payment required", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "string", + "example": "Payment required to access this resource." + } + } + } + } + } + }, + "429": { + "description": "Too many requests", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "string", + "example": "Request rate limit exceeded. Please wait and try again later." + } + } + } + } + } + }, + "500": { + "description": "Server error", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "string", + "example": "An unexpected error occurred on the server." + } + } + } + } + } + } + } + } + }, + "/map": { + "post": { + "summary": "Map multiple URLs based on options", + "operationId": "mapUrls", + "tags": ["Mapping"], + "security": [ + { + "bearerAuth": [] + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "url": { + "type": "string", + "format": "uri", + "description": "The base URL to start crawling from" + }, + "search": { + "type": "string", + "description": "Search query to use for mapping. During the Alpha phase, the 'smart' part of the search functionality is limited to 100 search results. However, if map finds more results, there is no limit applied." + }, + "ignoreSitemap": { + "type": "boolean", + "description": "Ignore the website sitemap when crawling", + "default": true + }, + "includeSubdomains": { + "type": "boolean", + "description": "Include subdomains of the website", + "default": false + }, + "limit": { + "type": "integer", + "description": "Maximum number of links to return", + "default": 5000, + "maximum": 5000 + } + }, + "required": ["url"] + } + } + } + }, + "responses": { + "200": { + "description": "Successful response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MapResponse" + } + } + } + }, + "402": { + "description": "Payment required", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "string", + "example": "Payment required to access this resource." + } + } + } + } + } + }, + "429": { + "description": "Too many requests", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "string", + "example": "Request rate limit exceeded. Please wait and try again later." + } + } + } + } + } + }, + "500": { + "description": "Server error", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "string", + "example": "An unexpected error occurred on the server." + } + } + } + } + } + } + } + } + } + }, + "components": { + "securitySchemes": { + "bearerAuth": { + "type": "http", + "scheme": "bearer" + } + }, + "schemas": { + "ScrapeResponse": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + }, + "data": { + "type": "object", + "properties": { + "markdown": { + "type": "string" + }, + "html": { + "type": "string", + "nullable": true, + "description": "HTML version of the content on page if `html` is in `formats`" + }, + "rawHtml": { + "type": "string", + "nullable": true, + "description": "Raw HTML content of the page if `rawHtml` is in `formats`" + }, + "screenshot": { + "type": "string", + "nullable": true, + "description": "Screenshot of the page if `screenshot` is in `formats`" + }, + "links": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of links on the page if `links` is in `formats`" + }, + "metadata": { + "type": "object", + "properties": { + "title": { + "type": "string" + }, + "description": { + "type": "string" + }, + "language": { + "type": "string", + "nullable": true + }, + "sourceURL": { + "type": "string", + "format": "uri" + }, + " ": { + "type": "string" + }, + "statusCode": { + "type": "integer", + "description": "The status code of the page" + }, + "error": { + "type": "string", + "nullable": true, + "description": "The error message of the page" + } + + } + }, + "llm_extraction": { + "type": "object", + "description": "Displayed when using LLM Extraction. Extracted data from the page following the schema defined.", + "nullable": true + }, + "warning": { + "type": "string", + "nullable": true, + "description": "Can be displayed when using LLM Extraction. Warning message will let you know any issues with the extraction." + } + } + } + } + }, + "CrawlStatusResponseObj": { + "type": "object", + "properties": { + "status": { + "type": "string", + "description": "The current status of the crawl. Can be `scraping`, `completed`, or `failed`." + }, + "total": { + "type": "integer", + "description": "The total number of pages that were attempted to be crawled." + }, + "completed": { + "type": "integer", + "description": "The number of pages that have been successfully crawled." + }, + "creditsUsed": { + "type": "integer", + "description": "The number of credits used for the crawl." + }, + "expiresAt": { + "type": "string", + "format": "date-time", + "description": "The date and time when the crawl will expire." + }, + "next": { + "type": "string", + "nullable": true, + "description": "The URL to retrieve the next 10MB of data. Returned if the crawl is not completed or if the response is larger than 10MB." + }, + "data": { + "type": "array", + "description": "The data of the crawl.", + "items": { + "type": "object", + "properties": { + "markdown": { + "type": "string" + }, + "html": { + "type": "string", + "nullable": true, + "description": "HTML version of the content on page if `includeHtml` is true" + }, + "rawHtml": { + "type": "string", + "nullable": true, + "description": "Raw HTML content of the page if `includeRawHtml` is true" + }, + "links": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of links on the page if `includeLinks` is true" + }, + "screenshot": { + "type": "string", + "nullable": true, + "description": "Screenshot of the page if `includeScreenshot` is true" + }, + "metadata": { + "type": "object", + "properties": { + "title": { + "type": "string" + }, + "description": { + "type": "string" + }, + "language": { + "type": "string", + "nullable": true + }, + "sourceURL": { + "type": "string", + "format": "uri" + }, + " ": { + "type": "string" + }, + "statusCode": { + "type": "integer", + "description": "The status code of the page" + }, + "error": { + "type": "string", + "nullable": true, + "description": "The error message of the page" + } + } + } + } + } + } + } + }, + "CrawlResponse": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + }, + "id": { + "type": "string" + }, + "url": { + "type": "string", + "format": "uri" + } + } + }, + "MapResponse": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + }, + "links": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + } + }, + "security": [ + { + "bearerAuth": [] + } + ] +} \ No newline at end of file diff --git a/apps/js-sdk/example.js b/apps/js-sdk/example.js index eb4bc489..c4b21d5f 100644 --- a/apps/js-sdk/example.js +++ b/apps/js-sdk/example.js @@ -1,4 +1,4 @@ -import FirecrawlApp from '@mendable/firecrawl-js'; +import FirecrawlApp from 'firecrawl'; const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"}); diff --git a/apps/js-sdk/example.ts b/apps/js-sdk/example.ts index 4142416f..7412e479 100644 --- a/apps/js-sdk/example.ts +++ b/apps/js-sdk/example.ts @@ -1,4 +1,4 @@ -import FirecrawlApp, { CrawlStatusResponse, ErrorResponse } from '@mendable/firecrawl-js'; +import FirecrawlApp, { CrawlStatusResponse, ErrorResponse } from 'firecrawl'; const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"}); diff --git a/apps/js-sdk/firecrawl/build/cjs/index.js b/apps/js-sdk/firecrawl/build/cjs/index.js deleted file mode 100644 index 2908b09d..00000000 --- a/apps/js-sdk/firecrawl/build/cjs/index.js +++ /dev/null @@ -1,347 +0,0 @@ -"use strict"; -var __importDefault = (this && this.__importDefault) || function (mod) { - return (mod && mod.__esModule) ? mod : { "default": mod }; -}; -Object.defineProperty(exports, "__esModule", { value: true }); -exports.CrawlWatcher = void 0; -const axios_1 = __importDefault(require("axios")); -const zod_to_json_schema_1 = require("zod-to-json-schema"); -const isows_1 = require("isows"); -const typescript_event_target_1 = require("typescript-event-target"); -/** - * Main class for interacting with the Firecrawl API. - * Provides methods for scraping, searching, crawling, and mapping web content. - */ -class FirecrawlApp { - /** - * Initializes a new instance of the FirecrawlApp class. - * @param config - Configuration options for the FirecrawlApp instance. - */ - constructor({ apiKey = null, apiUrl = null }) { - this.apiKey = apiKey || ""; - this.apiUrl = apiUrl || "https://api.firecrawl.dev"; - } - /** - * Scrapes a URL using the Firecrawl API. - * @param url - The URL to scrape. - * @param params - Additional parameters for the scrape request. - * @returns The response from the scrape operation. - */ - async scrapeUrl(url, params) { - const headers = { - "Content-Type": "application/json", - Authorization: `Bearer ${this.apiKey}`, - }; - let jsonData = { url, ...params }; - if (jsonData?.extract?.schema) { - let schema = jsonData.extract.schema; - // Try parsing the schema as a Zod schema - try { - schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema); - } - catch (error) { - } - jsonData = { - ...jsonData, - extract: { - ...jsonData.extract, - schema: schema, - }, - }; - } - try { - const response = await axios_1.default.post(this.apiUrl + `/v1/scrape`, jsonData, { headers }); - if (response.status === 200) { - const responseData = response.data; - if (responseData.success) { - return { - success: true, - warning: responseData.warning, - error: responseData.error, - ...responseData.data - }; - } - else { - throw new Error(`Failed to scrape URL. Error: ${responseData.error}`); - } - } - else { - this.handleError(response, "scrape URL"); - } - } - catch (error) { - throw new Error(error.message); - } - return { success: false, error: "Internal server error." }; - } - /** - * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API. - * @param query - The search query string. - * @param params - Additional parameters for the search. - * @returns Throws an error advising to use version 0 of the API. - */ - async search(query, params) { - throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0."); - } - /** - * Initiates a crawl job for a URL using the Firecrawl API. - * @param url - The URL to crawl. - * @param params - Additional parameters for the crawl request. - * @param pollInterval - Time in seconds for job status checks. - * @param idempotencyKey - Optional idempotency key for the request. - * @returns The response from the crawl operation. - */ - async crawlUrl(url, params, pollInterval = 2, idempotencyKey) { - const headers = this.prepareHeaders(idempotencyKey); - let jsonData = { url, ...params }; - try { - const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers); - if (response.status === 200) { - const id = response.data.id; - return this.monitorJobStatus(id, headers, pollInterval); - } - else { - this.handleError(response, "start crawl job"); - } - } - catch (error) { - if (error.response?.data?.error) { - throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`); - } - else { - throw new Error(error.message); - } - } - return { success: false, error: "Internal server error." }; - } - async asyncCrawlUrl(url, params, idempotencyKey) { - const headers = this.prepareHeaders(idempotencyKey); - let jsonData = { url, ...params }; - try { - const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers); - if (response.status === 200) { - return response.data; - } - else { - this.handleError(response, "start crawl job"); - } - } - catch (error) { - if (error.response?.data?.error) { - throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`); - } - else { - throw new Error(error.message); - } - } - return { success: false, error: "Internal server error." }; - } - /** - * Checks the status of a crawl job using the Firecrawl API. - * @param id - The ID of the crawl operation. - * @returns The response containing the job status. - */ - async checkCrawlStatus(id) { - if (!id) { - throw new Error("No crawl ID provided"); - } - const headers = this.prepareHeaders(); - try { - const response = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers); - if (response.status === 200) { - return ({ - success: true, - status: response.data.status, - total: response.data.total, - completed: response.data.completed, - creditsUsed: response.data.creditsUsed, - expiresAt: new Date(response.data.expiresAt), - next: response.data.next, - data: response.data.data, - error: response.data.error - }); - } - else { - this.handleError(response, "check crawl status"); - } - } - catch (error) { - throw new Error(error.message); - } - return { success: false, error: "Internal server error." }; - } - async crawlUrlAndWatch(url, params, idempotencyKey) { - const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey); - if (crawl.success && crawl.id) { - const id = crawl.id; - return new CrawlWatcher(id, this); - } - throw new Error("Crawl job failed to start"); - } - async mapUrl(url, params) { - const headers = this.prepareHeaders(); - let jsonData = { url, ...params }; - try { - const response = await this.postRequest(this.apiUrl + `/v1/map`, jsonData, headers); - if (response.status === 200) { - return response.data; - } - else { - this.handleError(response, "map"); - } - } - catch (error) { - throw new Error(error.message); - } - return { success: false, error: "Internal server error." }; - } - /** - * Prepares the headers for an API request. - * @param idempotencyKey - Optional key to ensure idempotency. - * @returns The prepared headers. - */ - prepareHeaders(idempotencyKey) { - return { - "Content-Type": "application/json", - Authorization: `Bearer ${this.apiKey}`, - ...(idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}), - }; - } - /** - * Sends a POST request to the specified URL. - * @param url - The URL to send the request to. - * @param data - The data to send in the request. - * @param headers - The headers for the request. - * @returns The response from the POST request. - */ - postRequest(url, data, headers) { - return axios_1.default.post(url, data, { headers }); - } - /** - * Sends a GET request to the specified URL. - * @param url - The URL to send the request to. - * @param headers - The headers for the request. - * @returns The response from the GET request. - */ - getRequest(url, headers) { - return axios_1.default.get(url, { headers }); - } - /** - * Monitors the status of a crawl job until completion or failure. - * @param id - The ID of the crawl operation. - * @param headers - The headers for the request. - * @param checkInterval - Interval in seconds for job status checks. - * @param checkUrl - Optional URL to check the status (used for v1 API) - * @returns The final job status or data. - */ - async monitorJobStatus(id, headers, checkInterval) { - while (true) { - const statusResponse = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers); - if (statusResponse.status === 200) { - const statusData = statusResponse.data; - if (statusData.status === "completed") { - if ("data" in statusData) { - return statusData; - } - else { - throw new Error("Crawl job completed but no data was returned"); - } - } - else if (["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)) { - checkInterval = Math.max(checkInterval, 2); - await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); - } - else { - throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`); - } - } - else { - this.handleError(statusResponse, "check crawl status"); - } - } - } - /** - * Handles errors from API responses. - * @param {AxiosResponse} response - The response from the API. - * @param {string} action - The action being performed when the error occurred. - */ - handleError(response, action) { - if ([402, 408, 409, 500].includes(response.status)) { - const errorMessage = response.data.error || "Unknown error occurred"; - throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`); - } - else { - throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`); - } - } -} -exports.default = FirecrawlApp; -class CrawlWatcher extends typescript_event_target_1.TypedEventTarget { - constructor(id, app) { - super(); - this.ws = new isows_1.WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey); - this.status = "scraping"; - this.data = []; - const messageHandler = (msg) => { - if (msg.type === "done") { - this.status = "completed"; - this.dispatchTypedEvent("done", new CustomEvent("done", { - detail: { - status: this.status, - data: this.data, - }, - })); - } - else if (msg.type === "error") { - this.status = "failed"; - this.dispatchTypedEvent("error", new CustomEvent("error", { - detail: { - status: this.status, - data: this.data, - error: msg.error, - }, - })); - } - else if (msg.type === "catchup") { - this.status = msg.data.status; - this.data.push(...(msg.data.data ?? [])); - for (const doc of this.data) { - this.dispatchTypedEvent("document", new CustomEvent("document", { - detail: doc, - })); - } - } - else if (msg.type === "document") { - this.dispatchTypedEvent("document", new CustomEvent("document", { - detail: msg.data, - })); - } - }; - this.ws.onmessage = ((ev) => { - if (typeof ev.data !== "string") { - this.ws.close(); - return; - } - const msg = JSON.parse(ev.data); - messageHandler(msg); - }).bind(this); - this.ws.onclose = ((ev) => { - const msg = JSON.parse(ev.reason); - messageHandler(msg); - }).bind(this); - this.ws.onerror = ((_) => { - this.status = "failed"; - this.dispatchTypedEvent("error", new CustomEvent("error", { - detail: { - status: this.status, - data: this.data, - error: "WebSocket error", - }, - })); - }).bind(this); - } - close() { - this.ws.close(); - } -} -exports.CrawlWatcher = CrawlWatcher; diff --git a/apps/js-sdk/firecrawl/build/cjs/package.json b/apps/js-sdk/firecrawl/build/cjs/package.json deleted file mode 100644 index b731bd61..00000000 --- a/apps/js-sdk/firecrawl/build/cjs/package.json +++ /dev/null @@ -1 +0,0 @@ -{"type": "commonjs"} diff --git a/apps/js-sdk/firecrawl/build/esm/index.js b/apps/js-sdk/firecrawl/build/esm/index.js deleted file mode 100644 index 4245cc37..00000000 --- a/apps/js-sdk/firecrawl/build/esm/index.js +++ /dev/null @@ -1,339 +0,0 @@ -import axios from "axios"; -import { zodToJsonSchema } from "zod-to-json-schema"; -import { WebSocket } from "isows"; -import { TypedEventTarget } from "typescript-event-target"; -/** - * Main class for interacting with the Firecrawl API. - * Provides methods for scraping, searching, crawling, and mapping web content. - */ -export default class FirecrawlApp { - /** - * Initializes a new instance of the FirecrawlApp class. - * @param config - Configuration options for the FirecrawlApp instance. - */ - constructor({ apiKey = null, apiUrl = null }) { - this.apiKey = apiKey || ""; - this.apiUrl = apiUrl || "https://api.firecrawl.dev"; - } - /** - * Scrapes a URL using the Firecrawl API. - * @param url - The URL to scrape. - * @param params - Additional parameters for the scrape request. - * @returns The response from the scrape operation. - */ - async scrapeUrl(url, params) { - const headers = { - "Content-Type": "application/json", - Authorization: `Bearer ${this.apiKey}`, - }; - let jsonData = { url, ...params }; - if (jsonData?.extract?.schema) { - let schema = jsonData.extract.schema; - // Try parsing the schema as a Zod schema - try { - schema = zodToJsonSchema(schema); - } - catch (error) { - } - jsonData = { - ...jsonData, - extract: { - ...jsonData.extract, - schema: schema, - }, - }; - } - try { - const response = await axios.post(this.apiUrl + `/v1/scrape`, jsonData, { headers }); - if (response.status === 200) { - const responseData = response.data; - if (responseData.success) { - return { - success: true, - warning: responseData.warning, - error: responseData.error, - ...responseData.data - }; - } - else { - throw new Error(`Failed to scrape URL. Error: ${responseData.error}`); - } - } - else { - this.handleError(response, "scrape URL"); - } - } - catch (error) { - throw new Error(error.message); - } - return { success: false, error: "Internal server error." }; - } - /** - * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API. - * @param query - The search query string. - * @param params - Additional parameters for the search. - * @returns Throws an error advising to use version 0 of the API. - */ - async search(query, params) { - throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0."); - } - /** - * Initiates a crawl job for a URL using the Firecrawl API. - * @param url - The URL to crawl. - * @param params - Additional parameters for the crawl request. - * @param pollInterval - Time in seconds for job status checks. - * @param idempotencyKey - Optional idempotency key for the request. - * @returns The response from the crawl operation. - */ - async crawlUrl(url, params, pollInterval = 2, idempotencyKey) { - const headers = this.prepareHeaders(idempotencyKey); - let jsonData = { url, ...params }; - try { - const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers); - if (response.status === 200) { - const id = response.data.id; - return this.monitorJobStatus(id, headers, pollInterval); - } - else { - this.handleError(response, "start crawl job"); - } - } - catch (error) { - if (error.response?.data?.error) { - throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`); - } - else { - throw new Error(error.message); - } - } - return { success: false, error: "Internal server error." }; - } - async asyncCrawlUrl(url, params, idempotencyKey) { - const headers = this.prepareHeaders(idempotencyKey); - let jsonData = { url, ...params }; - try { - const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers); - if (response.status === 200) { - return response.data; - } - else { - this.handleError(response, "start crawl job"); - } - } - catch (error) { - if (error.response?.data?.error) { - throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`); - } - else { - throw new Error(error.message); - } - } - return { success: false, error: "Internal server error." }; - } - /** - * Checks the status of a crawl job using the Firecrawl API. - * @param id - The ID of the crawl operation. - * @returns The response containing the job status. - */ - async checkCrawlStatus(id) { - if (!id) { - throw new Error("No crawl ID provided"); - } - const headers = this.prepareHeaders(); - try { - const response = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers); - if (response.status === 200) { - return ({ - success: true, - status: response.data.status, - total: response.data.total, - completed: response.data.completed, - creditsUsed: response.data.creditsUsed, - expiresAt: new Date(response.data.expiresAt), - next: response.data.next, - data: response.data.data, - error: response.data.error - }); - } - else { - this.handleError(response, "check crawl status"); - } - } - catch (error) { - throw new Error(error.message); - } - return { success: false, error: "Internal server error." }; - } - async crawlUrlAndWatch(url, params, idempotencyKey) { - const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey); - if (crawl.success && crawl.id) { - const id = crawl.id; - return new CrawlWatcher(id, this); - } - throw new Error("Crawl job failed to start"); - } - async mapUrl(url, params) { - const headers = this.prepareHeaders(); - let jsonData = { url, ...params }; - try { - const response = await this.postRequest(this.apiUrl + `/v1/map`, jsonData, headers); - if (response.status === 200) { - return response.data; - } - else { - this.handleError(response, "map"); - } - } - catch (error) { - throw new Error(error.message); - } - return { success: false, error: "Internal server error." }; - } - /** - * Prepares the headers for an API request. - * @param idempotencyKey - Optional key to ensure idempotency. - * @returns The prepared headers. - */ - prepareHeaders(idempotencyKey) { - return { - "Content-Type": "application/json", - Authorization: `Bearer ${this.apiKey}`, - ...(idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}), - }; - } - /** - * Sends a POST request to the specified URL. - * @param url - The URL to send the request to. - * @param data - The data to send in the request. - * @param headers - The headers for the request. - * @returns The response from the POST request. - */ - postRequest(url, data, headers) { - return axios.post(url, data, { headers }); - } - /** - * Sends a GET request to the specified URL. - * @param url - The URL to send the request to. - * @param headers - The headers for the request. - * @returns The response from the GET request. - */ - getRequest(url, headers) { - return axios.get(url, { headers }); - } - /** - * Monitors the status of a crawl job until completion or failure. - * @param id - The ID of the crawl operation. - * @param headers - The headers for the request. - * @param checkInterval - Interval in seconds for job status checks. - * @param checkUrl - Optional URL to check the status (used for v1 API) - * @returns The final job status or data. - */ - async monitorJobStatus(id, headers, checkInterval) { - while (true) { - const statusResponse = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers); - if (statusResponse.status === 200) { - const statusData = statusResponse.data; - if (statusData.status === "completed") { - if ("data" in statusData) { - return statusData; - } - else { - throw new Error("Crawl job completed but no data was returned"); - } - } - else if (["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)) { - checkInterval = Math.max(checkInterval, 2); - await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); - } - else { - throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`); - } - } - else { - this.handleError(statusResponse, "check crawl status"); - } - } - } - /** - * Handles errors from API responses. - * @param {AxiosResponse} response - The response from the API. - * @param {string} action - The action being performed when the error occurred. - */ - handleError(response, action) { - if ([402, 408, 409, 500].includes(response.status)) { - const errorMessage = response.data.error || "Unknown error occurred"; - throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`); - } - else { - throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`); - } - } -} -export class CrawlWatcher extends TypedEventTarget { - constructor(id, app) { - super(); - this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey); - this.status = "scraping"; - this.data = []; - const messageHandler = (msg) => { - if (msg.type === "done") { - this.status = "completed"; - this.dispatchTypedEvent("done", new CustomEvent("done", { - detail: { - status: this.status, - data: this.data, - }, - })); - } - else if (msg.type === "error") { - this.status = "failed"; - this.dispatchTypedEvent("error", new CustomEvent("error", { - detail: { - status: this.status, - data: this.data, - error: msg.error, - }, - })); - } - else if (msg.type === "catchup") { - this.status = msg.data.status; - this.data.push(...(msg.data.data ?? [])); - for (const doc of this.data) { - this.dispatchTypedEvent("document", new CustomEvent("document", { - detail: doc, - })); - } - } - else if (msg.type === "document") { - this.dispatchTypedEvent("document", new CustomEvent("document", { - detail: msg.data, - })); - } - }; - this.ws.onmessage = ((ev) => { - if (typeof ev.data !== "string") { - this.ws.close(); - return; - } - const msg = JSON.parse(ev.data); - messageHandler(msg); - }).bind(this); - this.ws.onclose = ((ev) => { - const msg = JSON.parse(ev.reason); - messageHandler(msg); - }).bind(this); - this.ws.onerror = ((_) => { - this.status = "failed"; - this.dispatchTypedEvent("error", new CustomEvent("error", { - detail: { - status: this.status, - data: this.data, - error: "WebSocket error", - }, - })); - }).bind(this); - } - close() { - this.ws.close(); - } -} diff --git a/apps/js-sdk/firecrawl/build/esm/package.json b/apps/js-sdk/firecrawl/build/esm/package.json deleted file mode 100644 index 6990891f..00000000 --- a/apps/js-sdk/firecrawl/build/esm/package.json +++ /dev/null @@ -1 +0,0 @@ -{"type": "module"} diff --git a/apps/js-sdk/firecrawl/package-lock.json b/apps/js-sdk/firecrawl/package-lock.json index ce6a1a4a..81a4a146 100644 --- a/apps/js-sdk/firecrawl/package-lock.json +++ b/apps/js-sdk/firecrawl/package-lock.json @@ -1,19 +1,17 @@ { "name": "@mendable/firecrawl-js", - "version": "1.1.0", + "version": "1.4.4", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@mendable/firecrawl-js", - "version": "1.1.0", + "version": "1.4.4", "license": "MIT", "dependencies": { "axios": "^1.6.8", - "dotenv": "^16.4.5", "isows": "^1.0.4", "typescript-event-target": "^1.1.1", - "uuid": "^9.0.1", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.0" }, @@ -25,9 +23,12 @@ "@types/mocha": "^10.0.6", "@types/node": "^20.12.12", "@types/uuid": "^9.0.8", + "dotenv": "^16.4.5", "jest": "^29.7.0", "ts-jest": "^29.2.2", - "typescript": "^5.4.5" + "tsup": "^8.2.4", + "typescript": "^5.4.5", + "uuid": "^9.0.1" } }, "node_modules/@ampproject/remapping": { @@ -600,6 +601,486 @@ "integrity": "sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==", "dev": true }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.23.1.tgz", + "integrity": "sha512-6VhYk1diRqrhBAqpJEdjASR/+WVRtfjpqKuNw11cLiaWpAT/Uu+nokB+UJnevzy/P9C/ty6AOe0dwueMrGh/iQ==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.23.1.tgz", + "integrity": "sha512-uz6/tEy2IFm9RYOyvKl88zdzZfwEfKZmnX9Cj1BHjeSGNuGLuMD1kR8y5bteYmwqKm1tj8m4cb/aKEorr6fHWQ==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.23.1.tgz", + "integrity": "sha512-xw50ipykXcLstLeWH7WRdQuysJqejuAGPd30vd1i5zSyKK3WE+ijzHmLKxdiCMtH1pHz78rOg0BKSYOSB/2Khw==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.23.1.tgz", + "integrity": "sha512-nlN9B69St9BwUoB+jkyU090bru8L0NA3yFvAd7k8dNsVH8bi9a8cUAUSEcEEgTp2z3dbEDGJGfP6VUnkQnlReg==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.23.1.tgz", + "integrity": "sha512-YsS2e3Wtgnw7Wq53XXBLcV6JhRsEq8hkfg91ESVadIrzr9wO6jJDMZnCQbHm1Guc5t/CdDiFSSfWP58FNuvT3Q==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.23.1.tgz", + "integrity": "sha512-aClqdgTDVPSEGgoCS8QDG37Gu8yc9lTHNAQlsztQ6ENetKEO//b8y31MMu2ZaPbn4kVsIABzVLXYLhCGekGDqw==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.23.1.tgz", + "integrity": "sha512-h1k6yS8/pN/NHlMl5+v4XPfikhJulk4G+tKGFIOwURBSFzE8bixw1ebjluLOjfwtLqY0kewfjLSrO6tN2MgIhA==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.23.1.tgz", + "integrity": "sha512-lK1eJeyk1ZX8UklqFd/3A60UuZ/6UVfGT2LuGo3Wp4/z7eRTRYY+0xOu2kpClP+vMTi9wKOfXi2vjUpO1Ro76g==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.23.1.tgz", + "integrity": "sha512-CXXkzgn+dXAPs3WBwE+Kvnrf4WECwBdfjfeYHpMeVxWE0EceB6vhWGShs6wi0IYEqMSIzdOF1XjQ/Mkm5d7ZdQ==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.23.1.tgz", + "integrity": "sha512-/93bf2yxencYDnItMYV/v116zff6UyTjo4EtEQjUBeGiVpMmffDNUyD9UN2zV+V3LRV3/on4xdZ26NKzn6754g==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.23.1.tgz", + "integrity": "sha512-VTN4EuOHwXEkXzX5nTvVY4s7E/Krz7COC8xkftbbKRYAl96vPiUssGkeMELQMOnLOJ8k3BY1+ZY52tttZnHcXQ==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.23.1.tgz", + "integrity": "sha512-Vx09LzEoBa5zDnieH8LSMRToj7ir/Jeq0Gu6qJ/1GcBq9GkfoEAoXvLiW1U9J1qE/Y/Oyaq33w5p2ZWrNNHNEw==", + "cpu": [ + "loong64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.23.1.tgz", + "integrity": "sha512-nrFzzMQ7W4WRLNUOU5dlWAqa6yVeI0P78WKGUo7lg2HShq/yx+UYkeNSE0SSfSure0SqgnsxPvmAUu/vu0E+3Q==", + "cpu": [ + "mips64el" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.23.1.tgz", + "integrity": "sha512-dKN8fgVqd0vUIjxuJI6P/9SSSe/mB9rvA98CSH2sJnlZ/OCZWO1DJvxj8jvKTfYUdGfcq2dDxoKaC6bHuTlgcw==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.23.1.tgz", + "integrity": "sha512-5AV4Pzp80fhHL83JM6LoA6pTQVWgB1HovMBsLQ9OZWLDqVY8MVobBXNSmAJi//Csh6tcY7e7Lny2Hg1tElMjIA==", + "cpu": [ + "riscv64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.23.1.tgz", + "integrity": "sha512-9ygs73tuFCe6f6m/Tb+9LtYxWR4c9yg7zjt2cYkjDbDpV/xVn+68cQxMXCjUpYwEkze2RcU/rMnfIXNRFmSoDw==", + "cpu": [ + "s390x" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.23.1.tgz", + "integrity": "sha512-EV6+ovTsEXCPAp58g2dD68LxoP/wK5pRvgy0J/HxPGB009omFPv3Yet0HiaqvrIrgPTBuC6wCH1LTOY91EO5hQ==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.23.1.tgz", + "integrity": "sha512-aevEkCNu7KlPRpYLjwmdcuNz6bDFiE7Z8XC4CPqExjTvrHugh28QzUXVOZtiYghciKUacNktqxdpymplil1beA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-arm64": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.23.1.tgz", + "integrity": "sha512-3x37szhLexNA4bXhLrCC/LImN/YtWis6WXr1VESlfVtVeoFJBRINPJ3f0a/6LV8zpikqoUg4hyXw0sFBt5Cr+Q==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.23.1.tgz", + "integrity": "sha512-aY2gMmKmPhxfU+0EdnN+XNtGbjfQgwZj43k8G3fyrDM/UdZww6xrWxmDkuz2eCZchqVeABjV5BpildOrUbBTqA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.23.1.tgz", + "integrity": "sha512-RBRT2gqEl0IKQABT4XTj78tpk9v7ehp+mazn2HbUeZl1YMdaGAQqhapjGTCe7uw7y0frDi4gS0uHzhvpFuI1sA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.23.1.tgz", + "integrity": "sha512-4O+gPR5rEBe2FpKOVyiJ7wNDPA8nGzDuJ6gN4okSA1gEOYZ67N8JPk58tkWtdtPeLz7lBnY6I5L3jdsr3S+A6A==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.23.1.tgz", + "integrity": "sha512-BcaL0Vn6QwCwre3Y717nVHZbAa4UBEigzFm6VdsVdT/MbZ38xoj1X9HPkZhbmaBGUD1W8vxAfffbDe8bA6AKnQ==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.23.1.tgz", + "integrity": "sha512-BHpFFeslkWrXWyUPnbKm+xYYVYruCinGcftSBaa8zoF9hZO4BcSCFUvHVTtzpIY6YzUnYtuEhZ+C9iEXjxnasg==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@isaacs/cliui": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", + "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==", + "dev": true, + "dependencies": { + "string-width": "^5.1.2", + "string-width-cjs": "npm:string-width@^4.2.0", + "strip-ansi": "^7.0.1", + "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", + "wrap-ansi": "^8.1.0", + "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@isaacs/cliui/node_modules/ansi-regex": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.1.0.tgz", + "integrity": "sha512-7HSX4QQb4CspciLpVFwyRe79O3xsIZDDLER21kERQ71oaPodF8jL725AgJMFAYbooIqolJoRLuM81SpeUkpkvA==", + "dev": true, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" + } + }, + "node_modules/@isaacs/cliui/node_modules/ansi-styles": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz", + "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==", + "dev": true, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/@isaacs/cliui/node_modules/emoji-regex": { + "version": "9.2.2", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", + "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", + "dev": true + }, + "node_modules/@isaacs/cliui/node_modules/string-width": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", + "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==", + "dev": true, + "dependencies": { + "eastasianwidth": "^0.2.0", + "emoji-regex": "^9.2.2", + "strip-ansi": "^7.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/@isaacs/cliui/node_modules/strip-ansi": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", + "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", + "dev": true, + "dependencies": { + "ansi-regex": "^6.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, + "node_modules/@isaacs/cliui/node_modules/wrap-ansi": { + "version": "8.1.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz", + "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==", + "dev": true, + "dependencies": { + "ansi-styles": "^6.1.0", + "string-width": "^5.0.1", + "strip-ansi": "^7.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, "node_modules/@istanbuljs/load-nyc-config": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz", @@ -951,6 +1432,259 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@nodelib/fs.scandir": { + "version": "2.1.5", + "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", + "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==", + "dev": true, + "dependencies": { + "@nodelib/fs.stat": "2.0.5", + "run-parallel": "^1.1.9" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@nodelib/fs.stat": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz", + "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==", + "dev": true, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@nodelib/fs.walk": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz", + "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==", + "dev": true, + "dependencies": { + "@nodelib/fs.scandir": "2.1.5", + "fastq": "^1.6.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@pkgjs/parseargs": { + "version": "0.11.0", + "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", + "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", + "dev": true, + "optional": true, + "engines": { + "node": ">=14" + } + }, + "node_modules/@rollup/rollup-android-arm-eabi": { + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.21.2.tgz", + "integrity": "sha512-fSuPrt0ZO8uXeS+xP3b+yYTCBUd05MoSp2N/MFOgjhhUhMmchXlpTQrTpI8T+YAwAQuK7MafsCOxW7VrPMrJcg==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ] + }, + "node_modules/@rollup/rollup-android-arm64": { + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.21.2.tgz", + "integrity": "sha512-xGU5ZQmPlsjQS6tzTTGwMsnKUtu0WVbl0hYpTPauvbRAnmIvpInhJtgjj3mcuJpEiuUw4v1s4BimkdfDWlh7gA==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ] + }, + "node_modules/@rollup/rollup-darwin-arm64": { + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.21.2.tgz", + "integrity": "sha512-99AhQ3/ZMxU7jw34Sq8brzXqWH/bMnf7ZVhvLk9QU2cOepbQSVTns6qoErJmSiAvU3InRqC2RRZ5ovh1KN0d0Q==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rollup/rollup-darwin-x64": { + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.21.2.tgz", + "integrity": "sha512-ZbRaUvw2iN/y37x6dY50D8m2BnDbBjlnMPotDi/qITMJ4sIxNY33HArjikDyakhSv0+ybdUxhWxE6kTI4oX26w==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rollup/rollup-linux-arm-gnueabihf": { + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.21.2.tgz", + "integrity": "sha512-ztRJJMiE8nnU1YFcdbd9BcH6bGWG1z+jP+IPW2oDUAPxPjo9dverIOyXz76m6IPA6udEL12reYeLojzW2cYL7w==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm-musleabihf": { + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.21.2.tgz", + "integrity": "sha512-flOcGHDZajGKYpLV0JNc0VFH361M7rnV1ee+NTeC/BQQ1/0pllYcFmxpagltANYt8FYf9+kL6RSk80Ziwyhr7w==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm64-gnu": { + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.21.2.tgz", + "integrity": "sha512-69CF19Kp3TdMopyteO/LJbWufOzqqXzkrv4L2sP8kfMaAQ6iwky7NoXTp7bD6/irKgknDKM0P9E/1l5XxVQAhw==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm64-musl": { + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.21.2.tgz", + "integrity": "sha512-48pD/fJkTiHAZTnZwR0VzHrao70/4MlzJrq0ZsILjLW/Ab/1XlVUStYyGt7tdyIiVSlGZbnliqmult/QGA2O2w==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-powerpc64le-gnu": { + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.21.2.tgz", + "integrity": "sha512-cZdyuInj0ofc7mAQpKcPR2a2iu4YM4FQfuUzCVA2u4HI95lCwzjoPtdWjdpDKyHxI0UO82bLDoOaLfpZ/wviyQ==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-riscv64-gnu": { + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.21.2.tgz", + "integrity": "sha512-RL56JMT6NwQ0lXIQmMIWr1SW28z4E4pOhRRNqwWZeXpRlykRIlEpSWdsgNWJbYBEWD84eocjSGDu/XxbYeCmwg==", + "cpu": [ + "riscv64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-s390x-gnu": { + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.21.2.tgz", + "integrity": "sha512-PMxkrWS9z38bCr3rWvDFVGD6sFeZJw4iQlhrup7ReGmfn7Oukrr/zweLhYX6v2/8J6Cep9IEA/SmjXjCmSbrMQ==", + "cpu": [ + "s390x" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-x64-gnu": { + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.21.2.tgz", + "integrity": "sha512-B90tYAUoLhU22olrafY3JQCFLnT3NglazdwkHyxNDYF/zAxJt5fJUB/yBoWFoIQ7SQj+KLe3iL4BhOMa9fzgpw==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-x64-musl": { + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.21.2.tgz", + "integrity": "sha512-7twFizNXudESmC9oneLGIUmoHiiLppz/Xs5uJQ4ShvE6234K0VB1/aJYU3f/4g7PhssLGKBVCC37uRkkOi8wjg==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-win32-arm64-msvc": { + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.21.2.tgz", + "integrity": "sha512-9rRero0E7qTeYf6+rFh3AErTNU1VCQg2mn7CQcI44vNUWM9Ze7MSRS/9RFuSsox+vstRt97+x3sOhEey024FRQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-ia32-msvc": { + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.21.2.tgz", + "integrity": "sha512-5rA4vjlqgrpbFVVHX3qkrCo/fZTj1q0Xxpg+Z7yIo3J2AilW7t2+n6Q8Jrx+4MrYpAnjttTYF8rr7bP46BPzRw==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-x64-msvc": { + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.21.2.tgz", + "integrity": "sha512-6UUxd0+SKomjdzuAcp+HAmxw1FlGBnl1v2yEPSabtx4lBfdXHDVsW7+lQkgz9cNFJGY3AWR7+V8P5BqkD9L9nA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ] + }, "node_modules/@sinclair/typebox": { "version": "0.27.8", "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz", @@ -1036,6 +1770,12 @@ "dotenv": "*" } }, + "node_modules/@types/estree": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.5.tgz", + "integrity": "sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==", + "dev": true + }, "node_modules/@types/graceful-fs": { "version": "4.1.9", "resolved": "https://registry.npmjs.org/@types/graceful-fs/-/graceful-fs-4.1.9.tgz", @@ -1160,6 +1900,12 @@ "url": "https://github.com/chalk/ansi-styles?sponsor=1" } }, + "node_modules/any-promise": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/any-promise/-/any-promise-1.3.0.tgz", + "integrity": "sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==", + "dev": true + }, "node_modules/anymatch": { "version": "3.1.3", "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz", @@ -1182,6 +1928,15 @@ "sprintf-js": "~1.0.2" } }, + "node_modules/array-union": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/array-union/-/array-union-2.1.0.tgz", + "integrity": "sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw==", + "dev": true, + "engines": { + "node": ">=8" + } + }, "node_modules/async": { "version": "3.2.5", "resolved": "https://registry.npmjs.org/async/-/async-3.2.5.tgz", @@ -1316,6 +2071,18 @@ "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", "dev": true }, + "node_modules/binary-extensions": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz", + "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==", + "dev": true, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/brace-expansion": { "version": "1.1.11", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", @@ -1397,6 +2164,30 @@ "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==", "dev": true }, + "node_modules/bundle-require": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/bundle-require/-/bundle-require-5.0.0.tgz", + "integrity": "sha512-GuziW3fSSmopcx4KRymQEJVbZUfqlCqcq7dvs6TYwKRZiegK/2buMxQTPs6MGlNv50wms1699qYO54R8XfRX4w==", + "dev": true, + "dependencies": { + "load-tsconfig": "^0.2.3" + }, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "peerDependencies": { + "esbuild": ">=0.18" + } + }, + "node_modules/cac": { + "version": "6.7.14", + "resolved": "https://registry.npmjs.org/cac/-/cac-6.7.14.tgz", + "integrity": "sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ==", + "dev": true, + "engines": { + "node": ">=8" + } + }, "node_modules/callsites": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", @@ -1460,6 +2251,30 @@ "node": ">=10" } }, + "node_modules/chokidar": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz", + "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==", + "dev": true, + "dependencies": { + "anymatch": "~3.1.2", + "braces": "~3.0.2", + "glob-parent": "~5.1.2", + "is-binary-path": "~2.1.0", + "is-glob": "~4.0.1", + "normalize-path": "~3.0.0", + "readdirp": "~3.6.0" + }, + "engines": { + "node": ">= 8.10.0" + }, + "funding": { + "url": "https://paulmillr.com/funding/" + }, + "optionalDependencies": { + "fsevents": "~2.3.2" + } + }, "node_modules/ci-info": { "version": "3.9.0", "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-3.9.0.tgz", @@ -1540,12 +2355,30 @@ "node": ">= 0.8" } }, + "node_modules/commander": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz", + "integrity": "sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==", + "dev": true, + "engines": { + "node": ">= 6" + } + }, "node_modules/concat-map": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", "dev": true }, + "node_modules/consola": { + "version": "3.2.3", + "resolved": "https://registry.npmjs.org/consola/-/consola-3.2.3.tgz", + "integrity": "sha512-I5qxpzLv+sJhTVEoLYNcTW+bThDCPsit0vLNKShZx6rLtpilNpmmeTPaeqJb9ZE9dV3DGaeby6Vuhrw38WjeyQ==", + "dev": true, + "engines": { + "node": "^14.18.0 || >=16.10.0" + } + }, "node_modules/convert-source-map": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz", @@ -1588,12 +2421,12 @@ } }, "node_modules/debug": { - "version": "4.3.4", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", - "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "version": "4.3.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz", + "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==", "dev": true, "dependencies": { - "ms": "2.1.2" + "ms": "^2.1.3" }, "engines": { "node": ">=6.0" @@ -1653,10 +2486,23 @@ "node": "^14.15.0 || ^16.10.0 || >=18.0.0" } }, + "node_modules/dir-glob": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz", + "integrity": "sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA==", + "dev": true, + "dependencies": { + "path-type": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/dotenv": { "version": "16.4.5", "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz", "integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==", + "dev": true, "engines": { "node": ">=12" }, @@ -1664,6 +2510,12 @@ "url": "https://dotenvx.com" } }, + "node_modules/eastasianwidth": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz", + "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", + "dev": true + }, "node_modules/ejs": { "version": "3.1.10", "resolved": "https://registry.npmjs.org/ejs/-/ejs-3.1.10.tgz", @@ -1712,6 +2564,45 @@ "is-arrayish": "^0.2.1" } }, + "node_modules/esbuild": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.23.1.tgz", + "integrity": "sha512-VVNz/9Sa0bs5SELtn3f7qhJCDPCF5oMEl5cO9/SSinpE9hbPVvxbd572HH5AKiP7WD8INO53GgfDDhRjkylHEg==", + "dev": true, + "hasInstallScript": true, + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.23.1", + "@esbuild/android-arm": "0.23.1", + "@esbuild/android-arm64": "0.23.1", + "@esbuild/android-x64": "0.23.1", + "@esbuild/darwin-arm64": "0.23.1", + "@esbuild/darwin-x64": "0.23.1", + "@esbuild/freebsd-arm64": "0.23.1", + "@esbuild/freebsd-x64": "0.23.1", + "@esbuild/linux-arm": "0.23.1", + "@esbuild/linux-arm64": "0.23.1", + "@esbuild/linux-ia32": "0.23.1", + "@esbuild/linux-loong64": "0.23.1", + "@esbuild/linux-mips64el": "0.23.1", + "@esbuild/linux-ppc64": "0.23.1", + "@esbuild/linux-riscv64": "0.23.1", + "@esbuild/linux-s390x": "0.23.1", + "@esbuild/linux-x64": "0.23.1", + "@esbuild/netbsd-x64": "0.23.1", + "@esbuild/openbsd-arm64": "0.23.1", + "@esbuild/openbsd-x64": "0.23.1", + "@esbuild/sunos-x64": "0.23.1", + "@esbuild/win32-arm64": "0.23.1", + "@esbuild/win32-ia32": "0.23.1", + "@esbuild/win32-x64": "0.23.1" + } + }, "node_modules/escalade": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.2.tgz", @@ -1791,12 +2682,37 @@ "node": "^14.15.0 || ^16.10.0 || >=18.0.0" } }, + "node_modules/fast-glob": { + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.2.tgz", + "integrity": "sha512-oX2ruAFQwf/Orj8m737Y5adxDQO0LAB7/S5MnxCdTNDd4p6BsyIVsv9JQsATbTSq8KHRpLwIHbVlUNatxd+1Ow==", + "dev": true, + "dependencies": { + "@nodelib/fs.stat": "^2.0.2", + "@nodelib/fs.walk": "^1.2.3", + "glob-parent": "^5.1.2", + "merge2": "^1.3.0", + "micromatch": "^4.0.4" + }, + "engines": { + "node": ">=8.6.0" + } + }, "node_modules/fast-json-stable-stringify": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==", "dev": true }, + "node_modules/fastq": { + "version": "1.17.1", + "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.17.1.tgz", + "integrity": "sha512-sRVD3lWVIXWg6By68ZN7vho9a1pQcN/WBFaAAsDDFzlJjvoGx0P8z7V1t72grFJfJhu3YPZBuu25f7Kaw2jN1w==", + "dev": true, + "dependencies": { + "reusify": "^1.0.4" + } + }, "node_modules/fb-watchman": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/fb-watchman/-/fb-watchman-2.0.2.tgz", @@ -1880,6 +2796,34 @@ } } }, + "node_modules/foreground-child": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.0.tgz", + "integrity": "sha512-Ld2g8rrAyMYFXBhEqMz8ZAHBi4J4uS1i/CxGMDnjyFWddMXLVcDp051DZfu+t7+ab7Wv6SMqpWmyFIj5UbfFvg==", + "dev": true, + "dependencies": { + "cross-spawn": "^7.0.0", + "signal-exit": "^4.0.1" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/foreground-child/node_modules/signal-exit": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", + "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==", + "dev": true, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, "node_modules/form-data": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", @@ -1981,6 +2925,18 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/glob-parent": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", + "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", + "dev": true, + "dependencies": { + "is-glob": "^4.0.1" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/globals": { "version": "11.12.0", "resolved": "https://registry.npmjs.org/globals/-/globals-11.12.0.tgz", @@ -1990,6 +2946,26 @@ "node": ">=4" } }, + "node_modules/globby": { + "version": "11.1.0", + "resolved": "https://registry.npmjs.org/globby/-/globby-11.1.0.tgz", + "integrity": "sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g==", + "dev": true, + "dependencies": { + "array-union": "^2.1.0", + "dir-glob": "^3.0.1", + "fast-glob": "^3.2.9", + "ignore": "^5.2.0", + "merge2": "^1.4.1", + "slash": "^3.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/graceful-fs": { "version": "4.2.11", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", @@ -2032,6 +3008,15 @@ "node": ">=10.17.0" } }, + "node_modules/ignore": { + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz", + "integrity": "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g==", + "dev": true, + "engines": { + "node": ">= 4" + } + }, "node_modules/import-local": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/import-local/-/import-local-3.1.0.tgz", @@ -2082,6 +3067,18 @@ "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==", "dev": true }, + "node_modules/is-binary-path": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", + "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==", + "dev": true, + "dependencies": { + "binary-extensions": "^2.0.0" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/is-core-module": { "version": "2.13.1", "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.13.1.tgz", @@ -2094,6 +3091,15 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/is-extglob": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/is-fullwidth-code-point": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", @@ -2112,6 +3118,18 @@ "node": ">=6" } }, + "node_modules/is-glob": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", + "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", + "dev": true, + "dependencies": { + "is-extglob": "^2.1.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/is-number": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", @@ -2252,6 +3270,21 @@ "node": ">=8" } }, + "node_modules/jackspeak": { + "version": "3.4.3", + "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz", + "integrity": "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==", + "dev": true, + "dependencies": { + "@isaacs/cliui": "^8.0.2" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + }, + "optionalDependencies": { + "@pkgjs/parseargs": "^0.11.0" + } + }, "node_modules/jake": { "version": "10.9.1", "resolved": "https://registry.npmjs.org/jake/-/jake-10.9.1.tgz", @@ -2858,6 +3891,15 @@ "url": "https://github.com/chalk/supports-color?sponsor=1" } }, + "node_modules/joycon": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/joycon/-/joycon-3.1.1.tgz", + "integrity": "sha512-34wB/Y7MW7bzjKRjUKTa46I2Z7eV62Rkhva+KkopW7Qvv/OSWBqvkSY7vusOPrNuZcUG3tApvdVgNB8POj3SPw==", + "dev": true, + "engines": { + "node": ">=10" + } + }, "node_modules/js-tokens": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", @@ -2925,12 +3967,33 @@ "node": ">=6" } }, + "node_modules/lilconfig": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.2.tgz", + "integrity": "sha512-eop+wDAvpItUys0FWkHIKeC9ybYrTGbU41U5K7+bttZZeohvnY7M9dZ5kB21GNWiFT2q1OoPTvncPCgSOVO5ow==", + "dev": true, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/antonk52" + } + }, "node_modules/lines-and-columns": { "version": "1.2.4", "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==", "dev": true }, + "node_modules/load-tsconfig": { + "version": "0.2.5", + "resolved": "https://registry.npmjs.org/load-tsconfig/-/load-tsconfig-0.2.5.tgz", + "integrity": "sha512-IXO6OCs9yg8tMKzfPZ1YmheJbZCiEsnBdcB03l0OcfK9prKnJb96siuHCr5Fl37/yo9DnKU+TLpxzTUspw9shg==", + "dev": true, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + } + }, "node_modules/locate-path": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz", @@ -2949,6 +4012,12 @@ "integrity": "sha512-t7j+NzmgnQzTAYXcsHYLgimltOV1MXHtlOWf6GjL9Kj8GK5FInw5JotxvbOs+IvV1/Dzo04/fCGfLVs7aXb4Ag==", "dev": true }, + "node_modules/lodash.sortby": { + "version": "4.7.0", + "resolved": "https://registry.npmjs.org/lodash.sortby/-/lodash.sortby-4.7.0.tgz", + "integrity": "sha512-HDWXG8isMntAyRF5vZ7xKuEvOhT4AhlRt/3czTSjvGUxjYCBVRQY48ViDHyfYz9VIoBkW4TMGQNapx+l3RUwdA==", + "dev": true + }, "node_modules/lru-cache": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", @@ -3027,6 +4096,15 @@ "integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==", "dev": true }, + "node_modules/merge2": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", + "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", + "dev": true, + "engines": { + "node": ">= 8" + } + }, "node_modules/micromatch": { "version": "4.0.5", "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.5.tgz", @@ -3080,12 +4158,32 @@ "node": "*" } }, + "node_modules/minipass": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", + "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", + "dev": true, + "engines": { + "node": ">=16 || 14 >=14.17" + } + }, "node_modules/ms": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", - "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", "dev": true }, + "node_modules/mz": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/mz/-/mz-2.7.0.tgz", + "integrity": "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q==", + "dev": true, + "dependencies": { + "any-promise": "^1.0.0", + "object-assign": "^4.0.1", + "thenify-all": "^1.0.0" + } + }, "node_modules/natural-compare": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", @@ -3125,6 +4223,15 @@ "node": ">=8" } }, + "node_modules/object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/once": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", @@ -3200,6 +4307,12 @@ "node": ">=6" } }, + "node_modules/package-json-from-dist": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.0.tgz", + "integrity": "sha512-dATvCeZN/8wQsGywez1mzHtTlP22H8OEfPrVMLNr4/eGa+ijtLn/6M5f0dY8UKNrC2O9UCU6SSoG3qRKnt7STw==", + "dev": true + }, "node_modules/parse-json": { "version": "5.2.0", "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz", @@ -3251,6 +4364,37 @@ "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==", "dev": true }, + "node_modules/path-scurry": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz", + "integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==", + "dev": true, + "dependencies": { + "lru-cache": "^10.2.0", + "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0" + }, + "engines": { + "node": ">=16 || 14 >=14.18" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/path-scurry/node_modules/lru-cache": { + "version": "10.4.3", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", + "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", + "dev": true + }, + "node_modules/path-type": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz", + "integrity": "sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==", + "dev": true, + "engines": { + "node": ">=8" + } + }, "node_modules/picocolors": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.1.tgz", @@ -3290,6 +4434,48 @@ "node": ">=8" } }, + "node_modules/postcss-load-config": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/postcss-load-config/-/postcss-load-config-6.0.1.tgz", + "integrity": "sha512-oPtTM4oerL+UXmx+93ytZVN82RrlY/wPUV8IeDxFrzIjXOLF1pN+EmKPLbubvKHT2HC20xXsCAH2Z+CKV6Oz/g==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "dependencies": { + "lilconfig": "^3.1.1" + }, + "engines": { + "node": ">= 18" + }, + "peerDependencies": { + "jiti": ">=1.21.0", + "postcss": ">=8.0.9", + "tsx": "^4.8.1", + "yaml": "^2.4.2" + }, + "peerDependenciesMeta": { + "jiti": { + "optional": true + }, + "postcss": { + "optional": true + }, + "tsx": { + "optional": true + }, + "yaml": { + "optional": true + } + } + }, "node_modules/pretty-format": { "version": "29.7.0", "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.7.0.tgz", @@ -3334,6 +4520,15 @@ "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" }, + "node_modules/punycode": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", + "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", + "dev": true, + "engines": { + "node": ">=6" + } + }, "node_modules/pure-rand": { "version": "6.1.0", "resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-6.1.0.tgz", @@ -3350,12 +4545,44 @@ } ] }, + "node_modules/queue-microtask": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", + "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, "node_modules/react-is": { "version": "18.2.0", "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.2.0.tgz", "integrity": "sha512-xWGDIW6x921xtzPkhiULtthJHoJvBbF3q26fzloPCK0hsvxtPVelvftw3zjbHWSkR2km9Z+4uxbDDK/6Zw9B8w==", "dev": true }, + "node_modules/readdirp": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", + "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==", + "dev": true, + "dependencies": { + "picomatch": "^2.2.1" + }, + "engines": { + "node": ">=8.10.0" + } + }, "node_modules/require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", @@ -3412,6 +4639,74 @@ "node": ">=10" } }, + "node_modules/reusify": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz", + "integrity": "sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==", + "dev": true, + "engines": { + "iojs": ">=1.0.0", + "node": ">=0.10.0" + } + }, + "node_modules/rollup": { + "version": "4.21.2", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.21.2.tgz", + "integrity": "sha512-e3TapAgYf9xjdLvKQCkQTnbTKd4a6jwlpQSJJFokHGaX2IVjoEqkIIhiQfqsi0cdwlOD+tQGuOd5AJkc5RngBw==", + "dev": true, + "dependencies": { + "@types/estree": "1.0.5" + }, + "bin": { + "rollup": "dist/bin/rollup" + }, + "engines": { + "node": ">=18.0.0", + "npm": ">=8.0.0" + }, + "optionalDependencies": { + "@rollup/rollup-android-arm-eabi": "4.21.2", + "@rollup/rollup-android-arm64": "4.21.2", + "@rollup/rollup-darwin-arm64": "4.21.2", + "@rollup/rollup-darwin-x64": "4.21.2", + "@rollup/rollup-linux-arm-gnueabihf": "4.21.2", + "@rollup/rollup-linux-arm-musleabihf": "4.21.2", + "@rollup/rollup-linux-arm64-gnu": "4.21.2", + "@rollup/rollup-linux-arm64-musl": "4.21.2", + "@rollup/rollup-linux-powerpc64le-gnu": "4.21.2", + "@rollup/rollup-linux-riscv64-gnu": "4.21.2", + "@rollup/rollup-linux-s390x-gnu": "4.21.2", + "@rollup/rollup-linux-x64-gnu": "4.21.2", + "@rollup/rollup-linux-x64-musl": "4.21.2", + "@rollup/rollup-win32-arm64-msvc": "4.21.2", + "@rollup/rollup-win32-ia32-msvc": "4.21.2", + "@rollup/rollup-win32-x64-msvc": "4.21.2", + "fsevents": "~2.3.2" + } + }, + "node_modules/run-parallel": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", + "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "dependencies": { + "queue-microtask": "^1.2.2" + } + }, "node_modules/semver": { "version": "6.3.1", "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", @@ -3527,6 +4822,21 @@ "node": ">=8" } }, + "node_modules/string-width-cjs": { + "name": "string-width", + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dev": true, + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/strip-ansi": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", @@ -3539,6 +4849,19 @@ "node": ">=8" } }, + "node_modules/strip-ansi-cjs": { + "name": "strip-ansi", + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/strip-bom": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-4.0.0.tgz", @@ -3569,6 +4892,72 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/sucrase": { + "version": "3.35.0", + "resolved": "https://registry.npmjs.org/sucrase/-/sucrase-3.35.0.tgz", + "integrity": "sha512-8EbVDiu9iN/nESwxeSxDKe0dunta1GOlHufmSSXxMD2z2/tMZpDMpvXQGsc+ajGo8y2uYUmixaSRUc/QPoQ0GA==", + "dev": true, + "dependencies": { + "@jridgewell/gen-mapping": "^0.3.2", + "commander": "^4.0.0", + "glob": "^10.3.10", + "lines-and-columns": "^1.1.6", + "mz": "^2.7.0", + "pirates": "^4.0.1", + "ts-interface-checker": "^0.1.9" + }, + "bin": { + "sucrase": "bin/sucrase", + "sucrase-node": "bin/sucrase-node" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + } + }, + "node_modules/sucrase/node_modules/brace-expansion": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", + "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", + "dev": true, + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/sucrase/node_modules/glob": { + "version": "10.4.5", + "resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz", + "integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==", + "dev": true, + "dependencies": { + "foreground-child": "^3.1.0", + "jackspeak": "^3.1.2", + "minimatch": "^9.0.4", + "minipass": "^7.1.2", + "package-json-from-dist": "^1.0.0", + "path-scurry": "^1.11.1" + }, + "bin": { + "glob": "dist/esm/bin.mjs" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/sucrase/node_modules/minimatch": { + "version": "9.0.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", + "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", + "dev": true, + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, "node_modules/supports-color": { "version": "7.2.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", @@ -3607,6 +4996,27 @@ "node": ">=8" } }, + "node_modules/thenify": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz", + "integrity": "sha512-RVZSIV5IG10Hk3enotrhvz0T9em6cyHBLkH/YAZuKqd8hRkKhSfCGIcP2KUY0EPxndzANBmNllzWPwak+bheSw==", + "dev": true, + "dependencies": { + "any-promise": "^1.0.0" + } + }, + "node_modules/thenify-all": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/thenify-all/-/thenify-all-1.6.0.tgz", + "integrity": "sha512-RNxQH/qI8/t3thXJDwcstUO4zeqo64+Uy/+sNVRBx4Xn2OX+OZ9oP+iJnNFqplFra2ZUVeKCSa2oVWi3T4uVmA==", + "dev": true, + "dependencies": { + "thenify": ">= 3.1.0 < 4" + }, + "engines": { + "node": ">=0.8" + } + }, "node_modules/tmpl": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz", @@ -3634,6 +5044,30 @@ "node": ">=8.0" } }, + "node_modules/tr46": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-1.0.1.tgz", + "integrity": "sha512-dTpowEjclQ7Kgx5SdBkqRzVhERQXov8/l9Ft9dVM9fmg0W0KQSVaXX9T4i6twCPNtYiZM53lpSSUAwJbFPOHxA==", + "dev": true, + "dependencies": { + "punycode": "^2.1.0" + } + }, + "node_modules/tree-kill": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/tree-kill/-/tree-kill-1.2.2.tgz", + "integrity": "sha512-L0Orpi8qGpRG//Nd+H90vFB+3iHnue1zSSGmNOOCh1GLJ7rUKVwV2HvijphGQS2UmhUZewS9VgvxYIdgr+fG1A==", + "dev": true, + "bin": { + "tree-kill": "cli.js" + } + }, + "node_modules/ts-interface-checker": { + "version": "0.1.13", + "resolved": "https://registry.npmjs.org/ts-interface-checker/-/ts-interface-checker-0.1.13.tgz", + "integrity": "sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==", + "dev": true + }, "node_modules/ts-jest": { "version": "29.2.2", "resolved": "https://registry.npmjs.org/ts-jest/-/ts-jest-29.2.2.tgz", @@ -3715,6 +5149,69 @@ "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", "dev": true }, + "node_modules/tsup": { + "version": "8.2.4", + "resolved": "https://registry.npmjs.org/tsup/-/tsup-8.2.4.tgz", + "integrity": "sha512-akpCPePnBnC/CXgRrcy72ZSntgIEUa1jN0oJbbvpALWKNOz1B7aM+UVDWGRGIO/T/PZugAESWDJUAb5FD48o8Q==", + "dev": true, + "dependencies": { + "bundle-require": "^5.0.0", + "cac": "^6.7.14", + "chokidar": "^3.6.0", + "consola": "^3.2.3", + "debug": "^4.3.5", + "esbuild": "^0.23.0", + "execa": "^5.1.1", + "globby": "^11.1.0", + "joycon": "^3.1.1", + "picocolors": "^1.0.1", + "postcss-load-config": "^6.0.1", + "resolve-from": "^5.0.0", + "rollup": "^4.19.0", + "source-map": "0.8.0-beta.0", + "sucrase": "^3.35.0", + "tree-kill": "^1.2.2" + }, + "bin": { + "tsup": "dist/cli-default.js", + "tsup-node": "dist/cli-node.js" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "@microsoft/api-extractor": "^7.36.0", + "@swc/core": "^1", + "postcss": "^8.4.12", + "typescript": ">=4.5.0" + }, + "peerDependenciesMeta": { + "@microsoft/api-extractor": { + "optional": true + }, + "@swc/core": { + "optional": true + }, + "postcss": { + "optional": true + }, + "typescript": { + "optional": true + } + } + }, + "node_modules/tsup/node_modules/source-map": { + "version": "0.8.0-beta.0", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.8.0-beta.0.tgz", + "integrity": "sha512-2ymg6oRBpebeZi9UUNsgQ89bhx01TcTkmNTGnNO88imTmbSgy4nfujrgVEFKWpMTEGA11EDkTt7mqObTPdigIA==", + "dev": true, + "dependencies": { + "whatwg-url": "^7.0.0" + }, + "engines": { + "node": ">= 8" + } + }, "node_modules/type-detect": { "version": "4.0.8", "resolved": "https://registry.npmjs.org/type-detect/-/type-detect-4.0.8.tgz", @@ -3794,6 +5291,7 @@ "version": "9.0.1", "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "dev": true, "funding": [ "https://github.com/sponsors/broofa", "https://github.com/sponsors/ctavan" @@ -3825,6 +5323,23 @@ "makeerror": "1.0.12" } }, + "node_modules/webidl-conversions": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-4.0.2.tgz", + "integrity": "sha512-YQ+BmxuTgd6UXZW3+ICGfyqRyHXVlD5GtQr5+qjiNW7bF0cqrzX500HVXPBOvgXb5YnzDd+h0zqyv61KUD7+Sg==", + "dev": true + }, + "node_modules/whatwg-url": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-7.1.0.tgz", + "integrity": "sha512-WUu7Rg1DroM7oQvGWfOiAK21n74Gg+T4elXEQYkOhtyLeWiJFoOGLXPKI/9gzIie9CtwVLm8wtw6YJdKyxSjeg==", + "dev": true, + "dependencies": { + "lodash.sortby": "^4.7.0", + "tr46": "^1.0.1", + "webidl-conversions": "^4.0.2" + } + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", @@ -3857,6 +5372,24 @@ "url": "https://github.com/chalk/wrap-ansi?sponsor=1" } }, + "node_modules/wrap-ansi-cjs": { + "name": "wrap-ansi", + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "dev": true, + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, "node_modules/wrappy": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 7114a625..4b93536f 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,22 +1,19 @@ { "name": "@mendable/firecrawl-js", - "version": "1.2.2", + "version": "1.4.5", "description": "JavaScript SDK for Firecrawl API", - "main": "build/cjs/index.js", - "types": "types/index.d.ts", - "type": "module", + "main": "dist/index.js", + "types": "dist/index.d.ts", "exports": { - "require": { - "types": "./types/index.d.ts", - "default": "./build/cjs/index.js" - }, - "import": { - "types": "./types/index.d.ts", - "default": "./build/esm/index.js" + "./package.json": "./package.json", + ".": { + "import": "./dist/index.js", + "default": "./dist/index.cjs" } }, + "type": "module", "scripts": { - "build": "tsc --module commonjs --moduleResolution node10 --outDir build/cjs/ && echo '{\"type\": \"commonjs\"}' > build/cjs/package.json && npx tsc --module NodeNext --moduleResolution NodeNext --outDir build/esm/ && echo '{\"type\": \"module\"}' > build/esm/package.json", + "build": "tsup", "build-and-publish": "npm run build && npm publish --access public", "publish-beta": "npm run build && npm publish --access public --tag beta", "test": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts" @@ -29,10 +26,8 @@ "license": "MIT", "dependencies": { "axios": "^1.6.8", - "dotenv": "^16.4.5", "isows": "^1.0.4", "typescript-event-target": "^1.1.1", - "uuid": "^9.0.1", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.0" }, @@ -41,6 +36,8 @@ }, "homepage": "https://github.com/mendableai/firecrawl#readme", "devDependencies": { + "uuid": "^9.0.1", + "dotenv": "^16.4.5", "@jest/globals": "^29.7.0", "@types/axios": "^0.14.0", "@types/dotenv": "^8.2.0", @@ -50,6 +47,7 @@ "@types/uuid": "^9.0.8", "jest": "^29.7.0", "ts-jest": "^29.2.2", + "tsup": "^8.2.4", "typescript": "^5.4.5" }, "keywords": [ diff --git a/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts index 9f6c6462..dea55846 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts @@ -1,4 +1,4 @@ -import FirecrawlApp, { CrawlParams, CrawlResponse, CrawlStatusResponse, MapResponse, ScrapeParams, ScrapeResponse } from '../../../index'; +import FirecrawlApp, { type CrawlParams, type CrawlResponse, type CrawlStatusResponse, type MapResponse, type ScrapeResponse } from '../../../index'; import { v4 as uuidv4 } from 'uuid'; import dotenv from 'dotenv'; import { describe, test, expect } from '@jest/globals'; @@ -6,7 +6,7 @@ import { describe, test, expect } from '@jest/globals'; dotenv.config(); const TEST_API_KEY = process.env.TEST_API_KEY; -const API_URL = "http://127.0.0.1:3002"; +const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev"; describe('FirecrawlApp E2E Tests', () => { test.concurrent('should throw error for no API key', async () => { @@ -28,14 +28,22 @@ describe('FirecrawlApp E2E Tests', () => { test.concurrent('should return successful response with valid preview token', async () => { const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL }); - const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponse; + const response = await app.scrapeUrl('https://roastmywebsite.ai'); + if (!response.success) { + throw new Error(response.error); + } + expect(response).not.toBeNull(); expect(response?.markdown).toContain("_Roast_"); }, 30000); // 30 seconds timeout test.concurrent('should return successful response for valid scrape', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); - const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponse; + const response = await app.scrapeUrl('https://roastmywebsite.ai'); + if (!response.success) { + throw new Error(response.error); + } + expect(response).not.toBeNull(); expect(response).not.toHaveProperty('content'); // v0 expect(response).not.toHaveProperty('html'); @@ -58,7 +66,11 @@ describe('FirecrawlApp E2E Tests', () => { onlyMainContent: true, timeout: 30000, waitFor: 1000 - }) as ScrapeResponse; + }); + if (!response.success) { + throw new Error(response.error); + } + expect(response).not.toBeNull(); expect(response).not.toHaveProperty('content'); // v0 expect(response.markdown).toContain("_Roast_"); @@ -71,6 +83,7 @@ describe('FirecrawlApp E2E Tests', () => { expect(response.links?.length).toBeGreaterThan(0); expect(response.links?.[0]).toContain("https://"); expect(response.metadata).not.toBeNull(); + expect(response.metadata).not.toBeUndefined(); expect(response.metadata).toHaveProperty("title"); expect(response.metadata).toHaveProperty("description"); expect(response.metadata).toHaveProperty("keywords"); @@ -85,31 +98,58 @@ describe('FirecrawlApp E2E Tests', () => { expect(response.metadata).not.toHaveProperty("pageStatusCode"); expect(response.metadata).toHaveProperty("statusCode"); expect(response.metadata).not.toHaveProperty("pageError"); - expect(response.metadata.error).toBeUndefined(); - expect(response.metadata.title).toBe("Roast My Website"); - expect(response.metadata.description).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. ๐ŸŒถ๏ธ"); - expect(response.metadata.keywords).toBe("Roast My Website,Roast,Website,GitHub,Firecrawl"); - expect(response.metadata.robots).toBe("follow, index"); - expect(response.metadata.ogTitle).toBe("Roast My Website"); - expect(response.metadata.ogDescription).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. ๐ŸŒถ๏ธ"); - expect(response.metadata.ogUrl).toBe("https://www.roastmywebsite.ai"); - expect(response.metadata.ogImage).toBe("https://www.roastmywebsite.ai/og.png"); - expect(response.metadata.ogLocaleAlternate).toStrictEqual([]); - expect(response.metadata.ogSiteName).toBe("Roast My Website"); - expect(response.metadata.sourceURL).toBe("https://roastmywebsite.ai"); - expect(response.metadata.statusCode).toBe(200); + + if (response.metadata !== undefined) { + expect(response.metadata.error).toBeUndefined(); + expect(response.metadata.title).toBe("Roast My Website"); + expect(response.metadata.description).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. ๐ŸŒถ๏ธ"); + expect(response.metadata.keywords).toBe("Roast My Website,Roast,Website,GitHub,Firecrawl"); + expect(response.metadata.robots).toBe("follow, index"); + expect(response.metadata.ogTitle).toBe("Roast My Website"); + expect(response.metadata.ogDescription).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. ๐ŸŒถ๏ธ"); + expect(response.metadata.ogUrl).toBe("https://www.roastmywebsite.ai"); + expect(response.metadata.ogImage).toBe("https://www.roastmywebsite.ai/og.png"); + expect(response.metadata.ogLocaleAlternate).toStrictEqual([]); + expect(response.metadata.ogSiteName).toBe("Roast My Website"); + expect(response.metadata.sourceURL).toBe("https://roastmywebsite.ai"); + expect(response.metadata.statusCode).toBe(200); + } + }, 30000); // 30 seconds timeout + + test.concurrent('should return successful response with valid API key and screenshot fullPage', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.scrapeUrl( + 'https://roastmywebsite.ai', { + formats: ['screenshot@fullPage'], + }); + if (!response.success) { + throw new Error(response.error); + } + + expect(response).not.toBeNull(); + expect(response.screenshot).not.toBeUndefined(); + expect(response.screenshot).not.toBeNull(); + expect(response.screenshot).toContain("https://"); }, 30000); // 30 seconds timeout test.concurrent('should return successful response for valid scrape with PDF file', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); - const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf') as ScrapeResponse; + const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf'); + if (!response.success) { + throw new Error(response.error); + } + expect(response).not.toBeNull(); expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); }, 30000); // 30 seconds timeout test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); - const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001') as ScrapeResponse; + const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001'); + if (!response.success) { + throw new Error(response.error); + } + expect(response).not.toBeNull(); expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); }, 30000); // 30 seconds timeout @@ -127,7 +167,7 @@ describe('FirecrawlApp E2E Tests', () => { test.concurrent('should return successful response for crawl and wait for completion', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); - const response = await app.crawlUrl('https://roastmywebsite.ai', {}, true, 30) as CrawlStatusResponse; + const response = await app.crawlUrl('https://roastmywebsite.ai', {}, 30) as CrawlStatusResponse; expect(response).not.toBeNull(); expect(response).toHaveProperty("total"); expect(response.total).toBeGreaterThan(0); @@ -138,21 +178,25 @@ describe('FirecrawlApp E2E Tests', () => { expect(response).toHaveProperty("status"); expect(response.status).toBe("completed"); expect(response).not.toHaveProperty("next"); // wait until done - expect(response.data?.length).toBeGreaterThan(0); - expect(response.data?.[0]).toHaveProperty("markdown"); - expect(response.data?.[0].markdown).toContain("_Roast_"); - expect(response.data?.[0]).not.toHaveProperty('content'); // v0 - expect(response.data?.[0]).not.toHaveProperty("html"); - expect(response.data?.[0]).not.toHaveProperty("rawHtml"); - expect(response.data?.[0]).not.toHaveProperty("screenshot"); - expect(response.data?.[0]).not.toHaveProperty("links"); - expect(response.data?.[0]).toHaveProperty("metadata"); - expect(response.data?.[0].metadata).toHaveProperty("title"); - expect(response.data?.[0].metadata).toHaveProperty("description"); - expect(response.data?.[0].metadata).toHaveProperty("language"); - expect(response.data?.[0].metadata).toHaveProperty("sourceURL"); - expect(response.data?.[0].metadata).toHaveProperty("statusCode"); - expect(response.data?.[0].metadata).not.toHaveProperty("error"); + expect(response.data.length).toBeGreaterThan(0); + expect(response.data[0]).not.toBeNull(); + expect(response.data[0]).not.toBeUndefined(); + if (response.data[0]) { + expect(response.data[0]).toHaveProperty("markdown"); + expect(response.data[0].markdown).toContain("_Roast_"); + expect(response.data[0]).not.toHaveProperty('content'); // v0 + expect(response.data[0]).not.toHaveProperty("html"); + expect(response.data[0]).not.toHaveProperty("rawHtml"); + expect(response.data[0]).not.toHaveProperty("screenshot"); + expect(response.data[0]).not.toHaveProperty("links"); + expect(response.data[0]).toHaveProperty("metadata"); + expect(response.data[0].metadata).toHaveProperty("title"); + expect(response.data[0].metadata).toHaveProperty("description"); + expect(response.data[0].metadata).toHaveProperty("language"); + expect(response.data[0].metadata).toHaveProperty("sourceURL"); + expect(response.data[0].metadata).toHaveProperty("statusCode"); + expect(response.data[0].metadata).not.toHaveProperty("error"); + } }, 60000); // 60 seconds timeout test.concurrent('should return successful response for crawl with options and wait for completion', async () => { @@ -173,7 +217,7 @@ describe('FirecrawlApp E2E Tests', () => { onlyMainContent: true, waitFor: 1000 } - } as CrawlParams, true, 30) as CrawlStatusResponse; + } as CrawlParams, 30) as CrawlStatusResponse; expect(response).not.toBeNull(); expect(response).toHaveProperty("total"); expect(response.total).toBeGreaterThan(0); @@ -184,41 +228,45 @@ describe('FirecrawlApp E2E Tests', () => { expect(response).toHaveProperty("status"); expect(response.status).toBe("completed"); expect(response).not.toHaveProperty("next"); - expect(response.data?.length).toBeGreaterThan(0); - expect(response.data?.[0]).toHaveProperty("markdown"); - expect(response.data?.[0].markdown).toContain("_Roast_"); - expect(response.data?.[0]).not.toHaveProperty('content'); // v0 - expect(response.data?.[0]).toHaveProperty("html"); - expect(response.data?.[0].html).toContain(" { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const uniqueIdempotencyKey = uuidv4(); - const response = await app.crawlUrl('https://roastmywebsite.ai', {}, false, 2, uniqueIdempotencyKey) as CrawlResponse; + const response = await app.asyncCrawlUrl('https://roastmywebsite.ai', {}, uniqueIdempotencyKey) as CrawlResponse; expect(response).not.toBeNull(); expect(response.id).toBeDefined(); - await expect(app.crawlUrl('https://roastmywebsite.ai', {}, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409"); + await expect(app.crawlUrl('https://roastmywebsite.ai', {}, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409"); }); test.concurrent('should check crawl status', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); - const response = await app.crawlUrl('https://firecrawl.dev', { scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links']}} as CrawlParams, false) as CrawlResponse; + const response = await app.asyncCrawlUrl('https://firecrawl.dev', { scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links']}} as CrawlParams) as CrawlResponse; expect(response).not.toBeNull(); expect(response.id).toBeDefined(); @@ -226,7 +274,8 @@ describe('FirecrawlApp E2E Tests', () => { const maxChecks = 15; let checks = 0; - while (statusResponse.status === 'scraping' && checks < maxChecks) { + expect(statusResponse.success).toBe(true); + while ((statusResponse as any).status === 'scraping' && checks < maxChecks) { await new Promise(resolve => setTimeout(resolve, 5000)); expect(statusResponse).not.toHaveProperty("partial_data"); // v0 expect(statusResponse).not.toHaveProperty("current"); // v0 @@ -236,44 +285,55 @@ describe('FirecrawlApp E2E Tests', () => { expect(statusResponse).toHaveProperty("expiresAt"); expect(statusResponse).toHaveProperty("status"); expect(statusResponse).toHaveProperty("next"); - expect(statusResponse.total).toBeGreaterThan(0); - expect(statusResponse.creditsUsed).toBeGreaterThan(0); - expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now()); - expect(statusResponse.status).toBe("scraping"); - expect(statusResponse.next).toContain("/v1/crawl/"); + expect(statusResponse.success).toBe(true); + if (statusResponse.success === true) { + expect(statusResponse.total).toBeGreaterThan(0); + expect(statusResponse.creditsUsed).toBeGreaterThan(0); + expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now()); + expect(statusResponse.status).toBe("scraping"); + expect(statusResponse.next).toContain("/v1/crawl/"); + } statusResponse = await app.checkCrawlStatus(response.id) as CrawlStatusResponse; + expect(statusResponse.success).toBe(true); checks++; } expect(statusResponse).not.toBeNull(); expect(statusResponse).toHaveProperty("total"); - expect(statusResponse.total).toBeGreaterThan(0); - expect(statusResponse).toHaveProperty("creditsUsed"); - expect(statusResponse.creditsUsed).toBeGreaterThan(0); - expect(statusResponse).toHaveProperty("expiresAt"); - expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now()); - expect(statusResponse).toHaveProperty("status"); - expect(statusResponse.status).toBe("completed"); - expect(statusResponse.data?.length).toBeGreaterThan(0); - expect(statusResponse.data?.[0]).toHaveProperty("markdown"); - expect(statusResponse.data?.[0].markdown?.length).toBeGreaterThan(10); - expect(statusResponse.data?.[0]).not.toHaveProperty('content'); // v0 - expect(statusResponse.data?.[0]).toHaveProperty("html"); - expect(statusResponse.data?.[0].html).toContain(" { diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 8b16adfb..6c859bee 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -1,5 +1,5 @@ -import axios, { AxiosResponse, AxiosRequestHeaders } from "axios"; -import { z } from "zod"; +import axios, { type AxiosResponse, type AxiosRequestHeaders, AxiosError } from "axios"; +import type * as zt from "zod"; import { zodToJsonSchema } from "zod-to-json-schema"; import { WebSocket } from "isows"; import { TypedEventTarget } from "typescript-event-target"; @@ -58,13 +58,13 @@ export interface FirecrawlDocumentMetadata { * Document interface for Firecrawl. * Represents a document retrieved or processed by Firecrawl. */ -export interface FirecrawlDocument { +export interface FirecrawlDocument { url?: string; markdown?: string; html?: string; rawHtml?: string; links?: string[]; - extract?: Record; + extract?: T; screenshot?: string; metadata?: FirecrawlDocumentMetadata; } @@ -73,26 +73,29 @@ export interface FirecrawlDocument { * Parameters for scraping operations. * Defines the options and configurations available for scraping web content. */ -export interface ScrapeParams { - formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[]; +export interface CrawlScrapeOptions { + formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[]; headers?: Record; includeTags?: string[]; excludeTags?: string[]; onlyMainContent?: boolean; - extract?: { - prompt?: string; - schema?: z.ZodSchema | any; - systemPrompt?: string; - }; waitFor?: number; timeout?: number; } +export interface ScrapeParams extends CrawlScrapeOptions { + extract?: { + prompt?: string; + schema?: LLMSchema; + systemPrompt?: string; + }; +} + /** * Response interface for scraping operations. * Defines the structure of the response received after a scraping operation. */ -export interface ScrapeResponse extends FirecrawlDocument { +export interface ScrapeResponse extends FirecrawlDocument { success: true; warning?: string; error?: string; @@ -110,7 +113,7 @@ export interface CrawlParams { allowBackwardLinks?: boolean; allowExternalLinks?: boolean; ignoreSitemap?: boolean; - scrapeOptions?: ScrapeParams; + scrapeOptions?: CrawlScrapeOptions; webhook?: string; } @@ -131,15 +134,14 @@ export interface CrawlResponse { */ export interface CrawlStatusResponse { success: true; - total: number; + status: "scraping" | "completed" | "failed" | "cancelled"; completed: number; + total: number; creditsUsed: number; expiresAt: Date; - status: "scraping" | "completed" | "failed"; - next: string; - data?: FirecrawlDocument[]; - error?: string; -} + next?: string; + data: FirecrawlDocument[]; +}; /** * Parameters for mapping operations. @@ -184,7 +186,11 @@ export default class FirecrawlApp { * @param config - Configuration options for the FirecrawlApp instance. */ constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) { - this.apiKey = apiKey || ""; + if (typeof apiKey !== "string") { + throw new Error("No API key provided"); + } + + this.apiKey = apiKey; this.apiUrl = apiUrl || "https://api.firecrawl.dev"; } @@ -194,10 +200,10 @@ export default class FirecrawlApp { * @param params - Additional parameters for the scrape request. * @returns The response from the scrape operation. */ - async scrapeUrl( + async scrapeUrl( url: string, - params?: ScrapeParams - ): Promise { + params?: ScrapeParams + ): Promise> | ErrorResponse> { const headers: AxiosRequestHeaders = { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}`, @@ -329,9 +335,10 @@ export default class FirecrawlApp { /** * Checks the status of a crawl job using the Firecrawl API. * @param id - The ID of the crawl operation. + * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`) * @returns The response containing the job status. */ - async checkCrawlStatus(id?: string): Promise { + async checkCrawlStatus(id?: string, getAllData = false): Promise { if (!id) { throw new Error("No crawl ID provided"); } @@ -343,16 +350,28 @@ export default class FirecrawlApp { headers ); if (response.status === 200) { + let allData = response.data.data; + if (getAllData && response.data.status === "completed") { + let statusData = response.data + if ("data" in statusData) { + let data = statusData.data; + while ('next' in statusData) { + statusData = (await this.getRequest(statusData.next, headers)).data; + data = data.concat(statusData.data); + } + allData = data; + } + } return ({ - success: true, + success: response.data.success, status: response.data.status, total: response.data.total, completed: response.data.completed, creditsUsed: response.data.creditsUsed, expiresAt: new Date(response.data.expiresAt), next: response.data.next, - data: response.data.data, - error: response.data.error + data: allData, + error: response.data.error, }) } else { this.handleError(response, "check crawl status"); @@ -433,11 +452,19 @@ export default class FirecrawlApp { * @param headers - The headers for the request. * @returns The response from the GET request. */ - getRequest( + async getRequest( url: string, headers: AxiosRequestHeaders ): Promise { - return axios.get(url, { headers }); + try { + return await axios.get(url, { headers }); + } catch (error) { + if (error instanceof AxiosError && error.response) { + return error.response as AxiosResponse; + } else { + throw error; + } + } } /** @@ -452,7 +479,7 @@ export default class FirecrawlApp { id: string, headers: AxiosRequestHeaders, checkInterval: number - ): Promise { + ): Promise { while (true) { let statusResponse: AxiosResponse = await this.getRequest( `${this.apiUrl}/v1/crawl/${id}`, @@ -460,20 +487,20 @@ export default class FirecrawlApp { ); if (statusResponse.status === 200) { let statusData = statusResponse.data; - if (statusData.status === "completed") { - if ("data" in statusData) { - let data = statusData.data; - while ('next' in statusData) { - statusResponse = await this.getRequest(statusData.next, headers); - statusData = statusResponse.data; - data = data.concat(statusData.data); + if (statusData.status === "completed") { + if ("data" in statusData) { + let data = statusData.data; + while ('next' in statusData) { + statusResponse = await this.getRequest(statusData.next, headers); + statusData = statusResponse.data; + data = data.concat(statusData.data); + } + statusData.data = data; + return statusData; + } else { + throw new Error("Crawl job completed but no data was returned"); } - statusData.data = data; - return statusData; - } else { - throw new Error("Crawl job completed but no data was returned"); - } - } else if ( + } else if ( ["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status) ) { checkInterval = Math.max(checkInterval, 2); @@ -512,21 +539,21 @@ export default class FirecrawlApp { } interface CrawlWatcherEvents { - document: CustomEvent, + document: CustomEvent>, done: CustomEvent<{ status: CrawlStatusResponse["status"]; - data: FirecrawlDocument[]; + data: FirecrawlDocument[]; }>, error: CustomEvent<{ status: CrawlStatusResponse["status"], - data: FirecrawlDocument[], + data: FirecrawlDocument[], error: string, }>, } export class CrawlWatcher extends TypedEventTarget { private ws: WebSocket; - public data: FirecrawlDocument[]; + public data: FirecrawlDocument[]; public status: CrawlStatusResponse["status"]; constructor(id: string, app: FirecrawlApp) { @@ -547,7 +574,7 @@ export class CrawlWatcher extends TypedEventTarget { type DocumentMessage = { type: "document", - data: FirecrawlDocument, + data: FirecrawlDocument, } type DoneMessage = { type: "done" } diff --git a/apps/js-sdk/firecrawl/tsconfig.json b/apps/js-sdk/firecrawl/tsconfig.json index 56f13ced..1297aed9 100644 --- a/apps/js-sdk/firecrawl/tsconfig.json +++ b/apps/js-sdk/firecrawl/tsconfig.json @@ -1,110 +1,24 @@ { "compilerOptions": { - /* Visit https://aka.ms/tsconfig to read more about this file */ + // See https://www.totaltypescript.com/tsconfig-cheat-sheet + /* Base Options: */ + "esModuleInterop": true, + "skipLibCheck": true, + "target": "es2022", + "allowJs": true, + "resolveJsonModule": true, + "moduleDetection": "force", + "isolatedModules": true, + "verbatimModuleSyntax": true, - /* Projects */ - // "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */ - // "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */ - // "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */ - // "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */ - // "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */ - // "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */ + /* Strictness */ + "strict": true, + "noUncheckedIndexedAccess": true, + "noImplicitOverride": true, - /* Language and Environment */ - "target": "es2020", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */ - // "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */ - // "jsx": "preserve", /* Specify what JSX code is generated. */ - // "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */ - // "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */ - // "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */ - // "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */ - // "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */ - // "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */ - // "noLib": true, /* Disable including any library files, including the default lib.d.ts. */ - // "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */ - // "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */ - - /* Modules */ - "module": "commonjs", /* Specify what module code is generated. */ - "rootDir": "./src", /* Specify the root folder within your source files. */ - "moduleResolution": "node", /* Specify how TypeScript looks up a file from a given module specifier. */ - // "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */ - // "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */ - // "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */ - // "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */ - // "types": [], /* Specify type package names to be included without being referenced in a source file. */ - // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */ - // "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */ - // "allowImportingTsExtensions": true, /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */ - // "resolvePackageJsonExports": true, /* Use the package.json 'exports' field when resolving package imports. */ - // "resolvePackageJsonImports": true, /* Use the package.json 'imports' field when resolving imports. */ - // "customConditions": [], /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */ - // "resolveJsonModule": true, /* Enable importing .json files. */ - // "allowArbitraryExtensions": true, /* Enable importing files with any extension, provided a declaration file is present. */ - // "noResolve": true, /* Disallow 'import's, 'require's or ''s from expanding the number of files TypeScript should add to a project. */ - - /* JavaScript Support */ - // "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */ - // "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */ - // "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */ - - /* Emit */ - "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */ - // "declarationMap": true, /* Create sourcemaps for d.ts files. */ - // "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */ - // "sourceMap": true, /* Create source map files for emitted JavaScript files. */ - // "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */ - // "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */ - "outDir": "./build", /* Specify an output folder for all emitted files. */ - // "removeComments": true, /* Disable emitting comments. */ - // "noEmit": true, /* Disable emitting files from a compilation. */ - // "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */ - // "importsNotUsedAsValues": "remove", /* Specify emit/checking behavior for imports that are only used for types. */ - // "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */ - // "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */ - // "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */ - // "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */ - // "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */ - // "newLine": "crlf", /* Set the newline character for emitting files. */ - // "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */ - // "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */ - // "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */ - // "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */ - "declarationDir": "./types", /* Specify the output directory for generated declaration files. */ - // "preserveValueImports": true, /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */ - - /* Interop Constraints */ - // "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */ - // "verbatimModuleSyntax": true, /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */ - // "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */ - "esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */ - // "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */ - "forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */ - - /* Type Checking */ - "strict": true, /* Enable all strict type-checking options. */ - // "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */ - // "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */ - // "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */ - // "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */ - // "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */ - // "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */ - // "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */ - // "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */ - // "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */ - // "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */ - // "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */ - // "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */ - // "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */ - // "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */ - // "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */ - // "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */ - // "allowUnusedLabels": true, /* Disable error reporting for unused labels. */ - // "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */ - - /* Completeness */ - // "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */ - "skipLibCheck": true /* Skip type checking all .d.ts files. */ + /* If NOT transpiling with TypeScript: */ + "module": "NodeNext", + "noEmit": true, }, "include": ["src/**/*"], "exclude": ["node_modules", "dist", "**/__tests__/*"] diff --git a/apps/js-sdk/firecrawl/tsup.config.ts b/apps/js-sdk/firecrawl/tsup.config.ts new file mode 100644 index 00000000..b3b7e42d --- /dev/null +++ b/apps/js-sdk/firecrawl/tsup.config.ts @@ -0,0 +1,9 @@ +import { defineConfig } from "tsup"; + +export default defineConfig({ + entryPoints: ["src/index.ts"], + format: ["cjs", "esm"], + dts: true, + outDir: "dist", + clean: true, +}); \ No newline at end of file diff --git a/apps/js-sdk/firecrawl/types/index.d.ts b/apps/js-sdk/firecrawl/types/index.d.ts deleted file mode 100644 index 36356c4e..00000000 --- a/apps/js-sdk/firecrawl/types/index.d.ts +++ /dev/null @@ -1,260 +0,0 @@ -import { AxiosResponse, AxiosRequestHeaders } from "axios"; -import { z } from "zod"; -import { TypedEventTarget } from "typescript-event-target"; -/** - * Configuration interface for FirecrawlApp. - * @param apiKey - Optional API key for authentication. - * @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'. - */ -export interface FirecrawlAppConfig { - apiKey?: string | null; - apiUrl?: string | null; -} -/** - * Metadata for a Firecrawl document. - * Includes various optional properties for document metadata. - */ -export interface FirecrawlDocumentMetadata { - title?: string; - description?: string; - language?: string; - keywords?: string; - robots?: string; - ogTitle?: string; - ogDescription?: string; - ogUrl?: string; - ogImage?: string; - ogAudio?: string; - ogDeterminer?: string; - ogLocale?: string; - ogLocaleAlternate?: string[]; - ogSiteName?: string; - ogVideo?: string; - dctermsCreated?: string; - dcDateCreated?: string; - dcDate?: string; - dctermsType?: string; - dcType?: string; - dctermsAudience?: string; - dctermsSubject?: string; - dcSubject?: string; - dcDescription?: string; - dctermsKeywords?: string; - modifiedTime?: string; - publishedTime?: string; - articleTag?: string; - articleSection?: string; - sourceURL?: string; - statusCode?: number; - error?: string; - [key: string]: any; -} -/** - * Document interface for Firecrawl. - * Represents a document retrieved or processed by Firecrawl. - */ -export interface FirecrawlDocument { - url?: string; - markdown?: string; - html?: string; - rawHtml?: string; - links?: string[]; - extract?: Record; - screenshot?: string; - metadata?: FirecrawlDocumentMetadata; -} -/** - * Parameters for scraping operations. - * Defines the options and configurations available for scraping web content. - */ -export interface ScrapeParams { - formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[]; - headers?: Record; - includeTags?: string[]; - excludeTags?: string[]; - onlyMainContent?: boolean; - extract?: { - prompt?: string; - schema?: z.ZodSchema | any; - systemPrompt?: string; - }; - waitFor?: number; - timeout?: number; -} -/** - * Response interface for scraping operations. - * Defines the structure of the response received after a scraping operation. - */ -export interface ScrapeResponse extends FirecrawlDocument { - success: true; - warning?: string; - error?: string; -} -/** - * Parameters for crawling operations. - * Includes options for both scraping and mapping during a crawl. - */ -export interface CrawlParams { - includePaths?: string[]; - excludePaths?: string[]; - maxDepth?: number; - limit?: number; - allowBackwardLinks?: boolean; - allowExternalLinks?: boolean; - ignoreSitemap?: boolean; - scrapeOptions?: ScrapeParams; - webhook?: string; -} -/** - * Response interface for crawling operations. - * Defines the structure of the response received after initiating a crawl. - */ -export interface CrawlResponse { - id?: string; - url?: string; - success: true; - error?: string; -} -/** - * Response interface for job status checks. - * Provides detailed status of a crawl job including progress and results. - */ -export interface CrawlStatusResponse { - success: true; - total: number; - completed: number; - creditsUsed: number; - expiresAt: Date; - status: "scraping" | "completed" | "failed"; - next: string; - data?: FirecrawlDocument[]; - error?: string; -} -/** - * Parameters for mapping operations. - * Defines options for mapping URLs during a crawl. - */ -export interface MapParams { - search?: string; - ignoreSitemap?: boolean; - includeSubdomains?: boolean; - limit?: number; -} -/** - * Response interface for mapping operations. - * Defines the structure of the response received after a mapping operation. - */ -export interface MapResponse { - success: true; - links?: string[]; - error?: string; -} -/** - * Error response interface. - * Defines the structure of the response received when an error occurs. - */ -export interface ErrorResponse { - success: false; - error: string; -} -/** - * Main class for interacting with the Firecrawl API. - * Provides methods for scraping, searching, crawling, and mapping web content. - */ -export default class FirecrawlApp { - apiKey: string; - apiUrl: string; - /** - * Initializes a new instance of the FirecrawlApp class. - * @param config - Configuration options for the FirecrawlApp instance. - */ - constructor({ apiKey, apiUrl }: FirecrawlAppConfig); - /** - * Scrapes a URL using the Firecrawl API. - * @param url - The URL to scrape. - * @param params - Additional parameters for the scrape request. - * @returns The response from the scrape operation. - */ - scrapeUrl(url: string, params?: ScrapeParams): Promise; - /** - * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API. - * @param query - The search query string. - * @param params - Additional parameters for the search. - * @returns Throws an error advising to use version 0 of the API. - */ - search(query: string, params?: any): Promise; - /** - * Initiates a crawl job for a URL using the Firecrawl API. - * @param url - The URL to crawl. - * @param params - Additional parameters for the crawl request. - * @param pollInterval - Time in seconds for job status checks. - * @param idempotencyKey - Optional idempotency key for the request. - * @returns The response from the crawl operation. - */ - crawlUrl(url: string, params?: CrawlParams, pollInterval?: number, idempotencyKey?: string): Promise; - asyncCrawlUrl(url: string, params?: CrawlParams, idempotencyKey?: string): Promise; - /** - * Checks the status of a crawl job using the Firecrawl API. - * @param id - The ID of the crawl operation. - * @returns The response containing the job status. - */ - checkCrawlStatus(id?: string): Promise; - crawlUrlAndWatch(url: string, params?: CrawlParams, idempotencyKey?: string): Promise; - mapUrl(url: string, params?: MapParams): Promise; - /** - * Prepares the headers for an API request. - * @param idempotencyKey - Optional key to ensure idempotency. - * @returns The prepared headers. - */ - prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders; - /** - * Sends a POST request to the specified URL. - * @param url - The URL to send the request to. - * @param data - The data to send in the request. - * @param headers - The headers for the request. - * @returns The response from the POST request. - */ - postRequest(url: string, data: any, headers: AxiosRequestHeaders): Promise; - /** - * Sends a GET request to the specified URL. - * @param url - The URL to send the request to. - * @param headers - The headers for the request. - * @returns The response from the GET request. - */ - getRequest(url: string, headers: AxiosRequestHeaders): Promise; - /** - * Monitors the status of a crawl job until completion or failure. - * @param id - The ID of the crawl operation. - * @param headers - The headers for the request. - * @param checkInterval - Interval in seconds for job status checks. - * @param checkUrl - Optional URL to check the status (used for v1 API) - * @returns The final job status or data. - */ - monitorJobStatus(id: string, headers: AxiosRequestHeaders, checkInterval: number): Promise; - /** - * Handles errors from API responses. - * @param {AxiosResponse} response - The response from the API. - * @param {string} action - The action being performed when the error occurred. - */ - handleError(response: AxiosResponse, action: string): void; -} -interface CrawlWatcherEvents { - document: CustomEvent; - done: CustomEvent<{ - status: CrawlStatusResponse["status"]; - data: FirecrawlDocument[]; - }>; - error: CustomEvent<{ - status: CrawlStatusResponse["status"]; - data: FirecrawlDocument[]; - error: string; - }>; -} -export declare class CrawlWatcher extends TypedEventTarget { - private ws; - data: FirecrawlDocument[]; - status: CrawlStatusResponse["status"]; - constructor(id: string, app: FirecrawlApp); - close(): void; -} -export {}; diff --git a/apps/js-sdk/package-lock.json b/apps/js-sdk/package-lock.json index 95dd7d27..975b14e8 100644 --- a/apps/js-sdk/package-lock.json +++ b/apps/js-sdk/package-lock.json @@ -9,8 +9,9 @@ "version": "1.0.0", "license": "ISC", "dependencies": { - "@mendable/firecrawl-js": "^0.0.36", + "@mendable/firecrawl-js": "^1.0.3", "axios": "^1.6.8", + "firecrawl": "^1.2.0", "ts-node": "^10.9.2", "typescript": "^5.4.5", "uuid": "^10.0.0", @@ -422,12 +423,14 @@ } }, "node_modules/@mendable/firecrawl-js": { - "version": "0.0.36", - "resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-0.0.36.tgz", - "integrity": "sha512-5zQMWUD49r6Q7cxj+QBthQ964Bm9fMooW4E8E4nIca3BMXCeEuQFVf5C3OEWwZf0SjJvR+5Yx2wUbXJWd1wCOA==", + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/@mendable/firecrawl-js/-/firecrawl-js-1.2.2.tgz", + "integrity": "sha512-2A1GzLD0bczlFIlcjxHcm/x8i76ndtV4EUzOfc81oOJ/HbycE2mbT6EUthoL+r4s5A8yO3bKr9o/GxmEn456VA==", "dependencies": { "axios": "^1.6.8", "dotenv": "^16.4.5", + "isows": "^1.0.4", + "typescript-event-target": "^1.1.1", "uuid": "^9.0.1", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.0" @@ -594,6 +597,32 @@ "@esbuild/win32-x64": "0.20.2" } }, + "node_modules/firecrawl": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/firecrawl/-/firecrawl-1.2.0.tgz", + "integrity": "sha512-Sy1BCCvs5FhGc4yxPP7NG9iWnK8RXdvA1ZS/K1Gj+LrEN3iAT2WRzhYET7x8G2bif25F6rHJg57vdVb5sr6RyQ==", + "dependencies": { + "axios": "^1.6.8", + "dotenv": "^16.4.5", + "isows": "^1.0.4", + "typescript-event-target": "^1.1.1", + "uuid": "^9.0.1", + "zod": "^3.23.8", + "zod-to-json-schema": "^3.23.0" + } + }, + "node_modules/firecrawl/node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/follow-redirects": { "version": "1.15.6", "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", @@ -652,6 +681,20 @@ "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" } }, + "node_modules/isows": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/isows/-/isows-1.0.4.tgz", + "integrity": "sha512-hEzjY+x9u9hPmBom9IIAqdJCwNLax+xrPb51vEPpERoFlIxgmZcHzsT5jKG06nvInKOBGvReAVz80Umed5CczQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/wagmi-dev" + } + ], + "peerDependencies": { + "ws": "*" + } + }, "node_modules/make-error": { "version": "1.3.6", "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz", @@ -763,6 +806,11 @@ "node": ">=14.17" } }, + "node_modules/typescript-event-target": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/typescript-event-target/-/typescript-event-target-1.1.1.tgz", + "integrity": "sha512-dFSOFBKV6uwaloBCCUhxlD3Pr/P1a/tJdcmPrTXCHlEFD3faj0mztjcGn6VBAhQ0/Bdy8K3VWrrqwbt/ffsYsg==" + }, "node_modules/undici-types": { "version": "5.26.5", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", @@ -786,6 +834,27 @@ "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==" }, + "node_modules/ws": { + "version": "8.18.0", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.0.tgz", + "integrity": "sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==", + "peer": true, + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, "node_modules/yn": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz", diff --git a/apps/js-sdk/package.json b/apps/js-sdk/package.json index b5d919f4..ac3ef038 100644 --- a/apps/js-sdk/package.json +++ b/apps/js-sdk/package.json @@ -13,6 +13,7 @@ "dependencies": { "@mendable/firecrawl-js": "^1.0.3", "axios": "^1.6.8", + "firecrawl": "^1.2.0", "ts-node": "^10.9.2", "typescript": "^5.4.5", "uuid": "^10.0.0", diff --git a/apps/python-sdk/firecrawl/__init__.py b/apps/python-sdk/firecrawl/__init__.py index f178cd61..540ce67e 100644 --- a/apps/python-sdk/firecrawl/__init__.py +++ b/apps/python-sdk/firecrawl/__init__.py @@ -13,7 +13,7 @@ import os from .firecrawl import FirecrawlApp -__version__ = "1.2.3" +__version__ = "1.2.4" # Define the logger for the Firecrawl project logger: logging.Logger = logging.getLogger("firecrawl") diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index 254f4c70..97f4e04f 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -13,7 +13,6 @@ import logging import os import time from typing import Any, Dict, Optional, List -import asyncio import json import requests @@ -229,7 +228,7 @@ class FirecrawlApp: json_data = {'url': url} if params: json_data.update(params) - + # Make the POST request with the prepared headers and JSON data response = requests.post( f'{self.api_url}{endpoint}', @@ -239,7 +238,7 @@ class FirecrawlApp: if response.status_code == 200: response = response.json() if response['success'] and 'links' in response: - return response['links'] + return response else: raise Exception(f'Failed to map URL. Error: {response["error"]}') else: @@ -435,4 +434,4 @@ class CrawlWatcher: self.dispatch_event('document', doc) elif msg['type'] == 'document': self.data.append(msg['data']) - self.dispatch_event('document', msg['data']) \ No newline at end of file + self.dispatch_event('document', msg['data']) diff --git a/apps/python-sdk/pyproject.toml b/apps/python-sdk/pyproject.toml index 969fb051..87cb91f1 100644 --- a/apps/python-sdk/pyproject.toml +++ b/apps/python-sdk/pyproject.toml @@ -12,8 +12,7 @@ dependencies = [ "requests", "python-dotenv", "websockets", - "asyncio", -"nest-asyncio" + "nest-asyncio" ] authors = [{name = "Mendable.ai",email = "nick@mendable.ai"}] maintainers = [{name = "Mendable.ai",email = "nick@mendable.ai"}] diff --git a/apps/python-sdk/requirements.txt b/apps/python-sdk/requirements.txt index 94971fde..db67ceeb 100644 --- a/apps/python-sdk/requirements.txt +++ b/apps/python-sdk/requirements.txt @@ -2,5 +2,4 @@ requests pytest python-dotenv websockets -asyncio nest-asyncio \ No newline at end of file diff --git a/apps/test-suite/tests/scrape.test.ts b/apps/test-suite/tests/scrape.test.ts index ec7b7202..8b2e15d1 100644 --- a/apps/test-suite/tests/scrape.test.ts +++ b/apps/test-suite/tests/scrape.test.ts @@ -31,6 +31,7 @@ describe("Scraping Checkup (E2E)", () => { describe("Scraping website tests with a dataset", () => { it("Should scrape the website and prompt it against OpenAI", async () => { + let totalTimeTaken = 0; let passedTests = 0; const batchSize = 15; // Adjusted to comply with the rate limit of 15 per minute const batchPromises = []; @@ -51,11 +52,16 @@ describe("Scraping Checkup (E2E)", () => { const batchPromise = Promise.all( batch.map(async (websiteData: WebsiteData) => { try { + const startTime = new Date().getTime(); const scrapedContent = await request(TEST_URL || "") - .post("/v0/scrape") + .post("/v1/scrape") .set("Content-Type", "application/json") .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) - .send({ url: websiteData.website, pageOptions: { onlyMainContent: true } }); + .send({ url: websiteData.website }); + + const endTime = new Date().getTime(); + const timeTaken = endTime - startTime; + totalTimeTaken += timeTaken; if (scrapedContent.statusCode !== 200) { console.error(`Failed to scrape ${websiteData.website} ${scrapedContent.statusCode}`); @@ -165,6 +171,7 @@ describe("Scraping Checkup (E2E)", () => { const timeTaken = (endTime - startTime) / 1000; console.log(`Score: ${score}%`); console.log(`Total tokens: ${totalTokens}`); + console.log(`Total time taken: ${totalTimeTaken} miliseconds`); await logErrors(errorLog, timeTaken, totalTokens, score, websitesData.length); diff --git a/apps/test-suite/utils/supabase.ts b/apps/test-suite/utils/supabase.ts index 3e66a991..a1549e24 100644 --- a/apps/test-suite/utils/supabase.ts +++ b/apps/test-suite/utils/supabase.ts @@ -1,5 +1,6 @@ import { createClient, SupabaseClient } from "@supabase/supabase-js"; -import "dotenv/config"; +import { configDotenv } from "dotenv"; +configDotenv(); // SupabaseService class initializes the Supabase client conditionally based on environment variables. class SupabaseService { diff --git a/examples/o1_web_crawler /o1_web_crawler.py b/examples/o1_web_crawler /o1_web_crawler.py new file mode 100644 index 00000000..45bbd1ee --- /dev/null +++ b/examples/o1_web_crawler /o1_web_crawler.py @@ -0,0 +1,152 @@ +import os +from firecrawl import FirecrawlApp +import json +from dotenv import load_dotenv +from openai import OpenAI + +# ANSI color codes +class Colors: + CYAN = '\033[96m' + YELLOW = '\033[93m' + GREEN = '\033[92m' + RED = '\033[91m' + MAGENTA = '\033[95m' + BLUE = '\033[94m' + RESET = '\033[0m' + +# Load environment variables +load_dotenv() + +# Retrieve API keys from environment variables +firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY") +openai_api_key = os.getenv("OPENAI_API_KEY") + +# Initialize the FirecrawlApp and OpenAI client +app = FirecrawlApp(api_key=firecrawl_api_key) +client = OpenAI(api_key=openai_api_key) + +# Find the page that most likely contains the objective +def find_relevant_page_via_map(objective, url, app, client): + try: + print(f"{Colors.CYAN}Understood. The objective is: {objective}{Colors.RESET}") + print(f"{Colors.CYAN}Initiating search on the website: {url}{Colors.RESET}") + + map_prompt = f""" + The map function generates a list of URLs from a website and it accepts a search parameter. Based on the objective of: {objective}, come up with a 1-2 word search parameter that will help us find the information we need. Only respond with 1-2 words nothing else. + """ + + print(f"{Colors.YELLOW}Analyzing objective to determine optimal search parameter...{Colors.RESET}") + completion = client.chat.completions.create( + model="o1-preview", + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": map_prompt + } + ] + } + ] + ) + + map_search_parameter = completion.choices[0].message.content + print(f"{Colors.GREEN}Optimal search parameter identified: {map_search_parameter}{Colors.RESET}") + + print(f"{Colors.YELLOW}Mapping website using the identified search parameter...{Colors.RESET}") + map_website = app.map_url(url, params={"search": map_search_parameter}) + print(f"{Colors.GREEN}Website mapping completed successfully.{Colors.RESET}") + print(f"{Colors.GREEN}Located {len(map_website)} relevant links.{Colors.RESET}") + return map_website + except Exception as e: + print(f"{Colors.RED}Error encountered during relevant page identification: {str(e)}{Colors.RESET}") + return None + +# Scrape the top 3 pages and see if the objective is met, if so return in json format else return None +def find_objective_in_top_pages(map_website, objective, app, client): + try: + # Get top 3 links from the map result + top_links = map_website[:3] if isinstance(map_website, list) else [] + print(f"{Colors.CYAN}Proceeding to analyze top {len(top_links)} links: {top_links}{Colors.RESET}") + + for link in top_links: + print(f"{Colors.YELLOW}Initiating scrape of page: {link}{Colors.RESET}") + # Scrape the page + scrape_result = app.scrape_url(link, params={'formats': ['markdown']}) + print(f"{Colors.GREEN}Page scraping completed successfully.{Colors.RESET}") + + + # Check if objective is met + check_prompt = f""" + Given the following scraped content and objective, determine if the objective is met. + If it is, extract the relevant information in a simple and concise JSON format. Use only the necessary fields and avoid nested structures if possible. + If the objective is not met with confidence, respond with 'Objective not met'. + + Objective: {objective} + Scraped content: {scrape_result['markdown']} + + Remember: + 1. Only return JSON if you are confident the objective is fully met. + 2. Keep the JSON structure as simple and flat as possible. + 3. Do not include any explanations or markdown formatting in your response. + """ + + completion = client.chat.completions.create( + model="o1-preview", + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": check_prompt + } + ] + } + ] + ) + + result = completion.choices[0].message.content + + if result != "Objective not met": + print(f"{Colors.GREEN}Objective potentially fulfilled. Relevant information identified.{Colors.RESET}") + try: + return json.loads(result) + except json.JSONDecodeError: + print(f"{Colors.RED}Error in parsing response. Proceeding to next page...{Colors.RESET}") + else: + print(f"{Colors.YELLOW}Objective not met on this page. Proceeding to next link...{Colors.RESET}") + + print(f"{Colors.RED}All available pages analyzed. Objective not fulfilled in examined content.{Colors.RESET}") + return None + + except Exception as e: + print(f"{Colors.RED}Error encountered during page analysis: {str(e)}{Colors.RESET}") + return None + +# Main function to execute the process +def main(): + # Get user input + url = input(f"{Colors.BLUE}Enter the website to crawl: {Colors.RESET}") + objective = input(f"{Colors.BLUE}Enter your objective: {Colors.RESET}") + + print(f"{Colors.YELLOW}Initiating web crawling process...{Colors.RESET}") + # Find the relevant page + map_website = find_relevant_page_via_map(objective, url, app, client) + + if map_website: + print(f"{Colors.GREEN}Relevant pages identified. Proceeding with detailed analysis...{Colors.RESET}") + # Find objective in top pages + result = find_objective_in_top_pages(map_website, objective, app, client) + + if result: + print(f"{Colors.GREEN}Objective successfully fulfilled. Extracted information:{Colors.RESET}") + print(f"{Colors.MAGENTA}{json.dumps(result, indent=2)}{Colors.RESET}") + else: + print(f"{Colors.RED}Unable to fulfill the objective with the available content.{Colors.RESET}") + else: + print(f"{Colors.RED}No relevant pages identified. Consider refining the search parameters or trying a different website.{Colors.RESET}") + +if __name__ == "__main__": + main() diff --git a/examples/o1_web_crawler /requirements.txt b/examples/o1_web_crawler /requirements.txt new file mode 100644 index 00000000..249f8beb --- /dev/null +++ b/examples/o1_web_crawler /requirements.txt @@ -0,0 +1,3 @@ +firecrawl-py +python-dotenv +openai \ No newline at end of file diff --git a/examples/turning_docs_into_api_specs/turning_docs_into_api_specs.py b/examples/turning_docs_into_api_specs/turning_docs_into_api_specs.py new file mode 100644 index 00000000..47b54ede --- /dev/null +++ b/examples/turning_docs_into_api_specs/turning_docs_into_api_specs.py @@ -0,0 +1,137 @@ +# %% +import os +import datetime +import time +from firecrawl import FirecrawlApp +import json +import google.generativeai as genai +from dotenv import load_dotenv + +# Load environment variables +load_dotenv() + +# Retrieve API keys from environment variables +google_api_key = os.getenv("GOOGLE_API_KEY") +firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY") + +# Configure the Google Generative AI module with the API key +genai.configure(api_key=google_api_key) +model = genai.GenerativeModel("gemini-1.5-pro-001") + +# Set the docs URL +docs_url = "https://docs.firecrawl.dev/api-reference" + +# Initialize the FirecrawlApp with your API key +app = FirecrawlApp(api_key=firecrawl_api_key) + +# %% +# Crawl all pages on docs +crawl_result = app.crawl_url(docs_url) +print(f"Total pages crawled: {len(crawl_result['data'])}") + +# %% +# Define the prompt instructions for generating OpenAPI specs +prompt_instructions = """ +Given the following API documentation content, generate an OpenAPI 3.0 specification in JSON format ONLY if you are 100% confident and clear about all details. Focus on extracting the main endpoints, their HTTP methods, parameters, request bodies, and responses. The specification should follow OpenAPI 3.0 structure and conventions. Include only the 200 response for each endpoint. Limit all descriptions to 5 words or less. + +If there is ANY uncertainty, lack of complete information, or if you are not 100% confident about ANY part of the specification, return an empty JSON object {{}}. + +Do not make anything up. Only include information that is explicitly provided in the documentation. If any detail is unclear or missing, do not attempt to fill it in. + +API Documentation Content: +{{content}} + +Generate the OpenAPI 3.0 specification in JSON format ONLY if you are 100% confident about every single detail. Include only the JSON object, no additional text, and ensure it has no errors in the JSON format so it can be parsed. Remember to include only the 200 response for each endpoint and keep all descriptions to 5 words maximum. + +Once again, if there is ANY doubt, uncertainty, or lack of complete information, return an empty JSON object {{}}. + +To reiterate: accuracy is paramount. Do not make anything up. If you are not 100% clear or confident about the entire OpenAPI spec, return an empty JSON object {{}}. +""" + +# %% +# Initialize a list to store all API specs +all_api_specs = [] + +# Process each page in crawl_result +for index, page in enumerate(crawl_result['data']): + if 'markdown' in page: + # Update prompt_instructions with the current page's content + current_prompt = prompt_instructions.replace("{content}", page['markdown']) + try: + # Query the model + response = model.generate_content([current_prompt]) + response_dict = response.to_dict() + response_text = response_dict['candidates'][0]['content']['parts'][0]['text'] + + # Remove the ```json code wrap if present + response_text = response_text.strip().removeprefix('```json').removesuffix('```').strip() + + # Parse JSON + json_data = json.loads(response_text) + + # Add non-empty API specs to the list + if json_data != {}: + all_api_specs.append(json_data) + print(f"API specification generated for page {index}") + else: + print(f"No API specification found for page {index}") + + except json.JSONDecodeError: + print(f"Error parsing JSON response for page {index}") + except Exception as e: + print(f"An error occurred for page {index}: {str(e)}") + +# Print the total number of API specs collected +print(f"Total API specifications collected: {len(all_api_specs)}") + +# %% +# Combine all API specs and keep the most filled out spec for each path and method +combined_spec = { + "openapi": "3.0.0", + "info": { + "title": f"{docs_url} API Specification", + "version": "1.0.0" + }, + "paths": {}, + "components": { + "schemas": {} + } +} + +# Helper function to count properties in an object +def count_properties(obj): + if isinstance(obj, dict): + return sum(count_properties(v) for v in obj.values()) + len(obj) + elif isinstance(obj, list): + return sum(count_properties(item) for item in obj) + else: + return 1 + +# Combine specs, keeping the most detailed version of each path and schema +for spec in all_api_specs: + # Combine paths + if "paths" in spec: + for path, methods in spec["paths"].items(): + if path not in combined_spec["paths"]: + combined_spec["paths"][path] = {} + for method, details in methods.items(): + if method not in combined_spec["paths"][path] or count_properties(details) > count_properties(combined_spec["paths"][path][method]): + combined_spec["paths"][path][method] = details + + # Combine schemas + if "components" in spec and "schemas" in spec["components"]: + for schema_name, schema in spec["components"]["schemas"].items(): + if schema_name not in combined_spec["components"]["schemas"] or count_properties(schema) > count_properties(combined_spec["components"]["schemas"][schema_name]): + combined_spec["components"]["schemas"][schema_name] = schema + +# Print summary of combined spec +print(f"Combined API specification generated") +print(f"Total paths in combined spec: {len(combined_spec['paths'])}") +print(f"Total schemas in combined spec: {len(combined_spec['components']['schemas'])}") + +# Save the combined spec to a JSON file in the same directory as the Python file +output_file = os.path.join(os.path.dirname(__file__), "combined_api_spec.json") +with open(output_file, "w") as f: + json.dump(combined_spec, f, indent=2) + +print(f"Combined API specification saved to {output_file}") diff --git a/img/open-source-cloud.png b/img/open-source-cloud.png new file mode 100644 index 00000000..acc15859 Binary files /dev/null and b/img/open-source-cloud.png differ