diff --git a/apps/api/package.json b/apps/api/package.json index 0da99459..56724de7 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -55,7 +55,7 @@ "@bull-board/api": "^5.20.5", "@bull-board/express": "^5.20.5", "@devil7softwares/pos": "^1.0.2", - "@dqbd/tiktoken": "^1.0.16", + "@dqbd/tiktoken": "^1.0.17", "@nangohq/node": "^0.40.8", "@sentry/cli": "^2.33.1", "@sentry/node": "^8.26.0", @@ -73,6 +73,7 @@ "cacheable-lookup": "^6.1.0", "cheerio": "^1.0.0-rc.12", "cohere": "^1.1.1", + "cohere-ai": "^7.14.0", "cors": "^2.8.5", "cron-parser": "^4.9.0", "date-fns": "^3.6.0", diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml index c2a9c8a3..4557afa9 100644 --- a/apps/api/pnpm-lock.yaml +++ b/apps/api/pnpm-lock.yaml @@ -24,8 +24,8 @@ importers: specifier: ^1.0.2 version: 1.0.2 '@dqbd/tiktoken': - specifier: ^1.0.16 - version: 1.0.16 + specifier: ^1.0.17 + version: 1.0.17 '@nangohq/node': specifier: ^0.40.8 version: 0.40.8 @@ -77,6 +77,9 @@ importers: cohere: specifier: ^1.1.1 version: 1.1.1 + cohere-ai: + specifier: ^7.14.0 + version: 7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)) cors: specifier: ^2.8.5 version: 2.8.5 @@ -124,7 +127,7 @@ importers: version: 2.9.0 langchain: specifier: ^0.2.8 - version: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0) + version: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0) languagedetect: specifier: ^2.0.0 version: 2.0.0 @@ -145,10 +148,10 @@ importers: version: 2.30.1 mongoose: specifier: ^8.4.4 - version: 8.4.4(socks@2.8.3) + version: 8.4.4(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3) natural: specifier: ^7.0.7 - version: 7.0.7(socks@2.8.3) + version: 7.0.7(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3) openai: specifier: ^4.57.0 version: 4.57.0(zod@3.23.8) @@ -301,6 +304,154 @@ packages: '@anthropic-ai/sdk@0.24.3': resolution: {integrity: sha512-916wJXO6T6k8R6BAAcLhLPv/pnLGy7YSEBZXZ1XTFbLcTZE8oTy3oDW9WJf9KKZwMvVcePIfoTSvzXHRcGxkQQ==} + '@aws-crypto/crc32@3.0.0': + resolution: {integrity: sha512-IzSgsrxUcsrejQbPVilIKy16kAT52EwB6zSaI+M3xxIhKh5+aldEyvI+z6erM7TCLB2BJsFrtHjp6/4/sr+3dA==} + + '@aws-crypto/sha256-browser@5.2.0': + resolution: {integrity: sha512-AXfN/lGotSQwu6HNcEsIASo7kWXZ5HYWvfOmSNKDsEqC4OashTp8alTmaz+F7TC2L083SFv5RdB+qU3Vs1kZqw==} + + '@aws-crypto/sha256-js@5.2.0': + resolution: {integrity: sha512-FFQQyu7edu4ufvIZ+OadFpHHOt+eSTBaYaki44c+akjg7qZg9oOQeLlk77F6tSYqjDAFClrHJk9tMf0HdVyOvA==} + engines: {node: '>=16.0.0'} + + '@aws-crypto/supports-web-crypto@5.2.0': + resolution: {integrity: sha512-iAvUotm021kM33eCdNfwIN//F77/IADDSs58i+MDaOqFrVjZo9bAal0NK7HurRuWLLpF1iLX7gbWrjHjeo+YFg==} + + '@aws-crypto/util@3.0.0': + resolution: {integrity: sha512-2OJlpeJpCR48CC8r+uKVChzs9Iungj9wkZrl8Z041DWEWvyIHILYKCPNzJghKsivj+S3mLo6BVc7mBNzdxA46w==} + + '@aws-crypto/util@5.2.0': + resolution: {integrity: sha512-4RkU9EsI6ZpBve5fseQlGNUWKMa1RLPQ1dnjnQoe07ldfIzcsGb5hC5W0Dm7u423KWzawlrpbjXBrXCEv9zazQ==} + + '@aws-sdk/client-cognito-identity@3.679.0': + resolution: {integrity: sha512-vJzQ6QpaMu8itJMe3FH1/0rwMjL0ELh63iLTxiAmhiV/SvCwNNoSFLd2HdKxbV0Bg/x8lUiPVq3pl6+cxaIrEQ==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/client-sagemaker@3.679.0': + resolution: {integrity: sha512-n1hTHpQl6LwNkwn4vLmtbwkNoX2jxtiliRd0IaHR1CfAQvKNTfQ52mARWr73hR+/YcVsBzPx8sYKq2XHWArHKQ==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/client-sso-oidc@3.679.0': + resolution: {integrity: sha512-/dBYWcCwbA/id4sFCIVZvf0UsvzHCC68SryxeNQk/PDkY9N4n5yRcMUkZDaEyQCjowc3kY4JOXp2AdUP037nhA==} + engines: {node: '>=16.0.0'} + peerDependencies: + '@aws-sdk/client-sts': ^3.679.0 + + '@aws-sdk/client-sso@3.679.0': + resolution: {integrity: sha512-/0cAvYnpOZTo/Y961F1kx2fhDDLUYZ0SQQ5/75gh3xVImLj7Zw+vp74ieqFbqWLYGMaq8z1Arr9A8zG95mbLdg==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/client-sts@3.679.0': + resolution: {integrity: sha512-3CvrT8w1RjFu1g8vKA5Azfr5V83r2/b68Ock43WE003Bq/5Y38mwmYX7vk0fPHzC3qejt4YMAWk/C3fSKOy25g==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/core@3.679.0': + resolution: {integrity: sha512-CS6PWGX8l4v/xyvX8RtXnBisdCa5+URzKd0L6GvHChype9qKUVxO/Gg6N/y43Hvg7MNWJt9FBPNWIxUB+byJwg==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/credential-provider-cognito-identity@3.679.0': + resolution: {integrity: sha512-XvWd6RPk7TA7tmqITT+NXvJ6ltJP8BUtLO1NAvja4HKExPKR9HAyoOeeH7KM3lVRED4e4LUnLb3fzteH20IXaA==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/credential-provider-env@3.679.0': + resolution: {integrity: sha512-EdlTYbzMm3G7VUNAMxr9S1nC1qUNqhKlAxFU8E7cKsAe8Bp29CD5HAs3POc56AVo9GC4yRIS+/mtlZSmrckzUA==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/credential-provider-http@3.679.0': + resolution: {integrity: sha512-ZoKLubW5DqqV1/2a3TSn+9sSKg0T8SsYMt1JeirnuLJF0mCoYFUaWMyvxxKuxPoqvUsaycxKru4GkpJ10ltNBw==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/credential-provider-ini@3.679.0': + resolution: {integrity: sha512-Rg7t8RwUzKcumpipG4neZqaeJ6DF+Bco1+FHn5BZB68jpvwvjBjcQUuWkxj18B6ctYHr1fkunnzeKEn/+vy7+w==} + engines: {node: '>=16.0.0'} + peerDependencies: + '@aws-sdk/client-sts': ^3.679.0 + + '@aws-sdk/credential-provider-node@3.679.0': + resolution: {integrity: sha512-E3lBtaqCte8tWs6Rkssc8sLzvGoJ10TLGvpkijOlz43wPd6xCRh1YLwg6zolf9fVFtEyUs/GsgymiASOyxhFtw==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/credential-provider-process@3.679.0': + resolution: {integrity: sha512-u/p4TV8kQ0zJWDdZD4+vdQFTMhkDEJFws040Gm113VHa/Xo1SYOjbpvqeuFoz6VmM0bLvoOWjxB9MxnSQbwKpQ==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/credential-provider-sso@3.679.0': + resolution: {integrity: sha512-SAtWonhi9asxn0ukEbcE81jkyanKgqpsrtskvYPpO9Z9KOednM4Cqt6h1bfcS9zaHjN2zu815Gv8O7WiV+F/DQ==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/credential-provider-web-identity@3.679.0': + resolution: {integrity: sha512-a74tLccVznXCaBefWPSysUcLXYJiSkeUmQGtalNgJ1vGkE36W5l/8czFiiowdWdKWz7+x6xf0w+Kjkjlj42Ung==} + engines: {node: '>=16.0.0'} + peerDependencies: + '@aws-sdk/client-sts': ^3.679.0 + + '@aws-sdk/credential-providers@3.679.0': + resolution: {integrity: sha512-ZjZZb6OERw/UKlSqcJ24AUJIf/ekDLPZrPpo0kPMV70EQ0GkBiklIZ8qULu9bEcI2I4UIapBKRiXTrK4gA6YHg==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/middleware-host-header@3.679.0': + resolution: {integrity: sha512-y176HuQ8JRY3hGX8rQzHDSbCl9P5Ny9l16z4xmaiLo+Qfte7ee4Yr3yaAKd7GFoJ3/Mhud2XZ37fR015MfYl2w==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/middleware-logger@3.679.0': + resolution: {integrity: sha512-0vet8InEj7nvIvGKk+ch7bEF5SyZ7Us9U7YTEgXPrBNStKeRUsgwRm0ijPWWd0a3oz2okaEwXsFl7G/vI0XiEA==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/middleware-recursion-detection@3.679.0': + resolution: {integrity: sha512-sQoAZFsQiW/LL3DfKMYwBoGjYDEnMbA9WslWN8xneCmBAwKo6IcSksvYs23PP8XMIoBGe2I2J9BSr654XWygTQ==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/middleware-user-agent@3.679.0': + resolution: {integrity: sha512-4hdeXhPDURPqQLPd9jCpUEo9fQITXl3NM3W1MwcJpE0gdUM36uXkQOYsTPeeU/IRCLVjK8Htlh2oCaM9iJrLCA==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/protocol-http@3.374.0': + resolution: {integrity: sha512-9WpRUbINdGroV3HiZZIBoJvL2ndoWk39OfwxWs2otxByppJZNN14bg/lvCx5e8ggHUti7IBk5rb0nqQZ4m05pg==} + engines: {node: '>=14.0.0'} + deprecated: This package has moved to @smithy/protocol-http + + '@aws-sdk/region-config-resolver@3.679.0': + resolution: {integrity: sha512-Ybx54P8Tg6KKq5ck7uwdjiKif7n/8g1x+V0V9uTjBjRWqaIgiqzXwKWoPj6NCNkE7tJNtqI4JrNxp/3S3HvmRw==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/signature-v4@3.374.0': + resolution: {integrity: sha512-2xLJvSdzcZZAg0lsDLUAuSQuihzK0dcxIK7WmfuJeF7DGKJFmp9czQmz5f3qiDz6IDQzvgK1M9vtJSVCslJbyQ==} + engines: {node: '>=14.0.0'} + deprecated: This package has moved to @smithy/signature-v4 + + '@aws-sdk/token-providers@3.679.0': + resolution: {integrity: sha512-1/+Zso/x2jqgutKixYFQEGli0FELTgah6bm7aB+m2FAWH4Hz7+iMUsazg6nSWm714sG9G3h5u42Dmpvi9X6/hA==} + engines: {node: '>=16.0.0'} + peerDependencies: + '@aws-sdk/client-sso-oidc': ^3.679.0 + + '@aws-sdk/types@3.679.0': + resolution: {integrity: sha512-NwVq8YvInxQdJ47+zz4fH3BRRLC6lL+WLkvr242PVBbUOLRyK/lkwHlfiKUoeVIMyK5NF+up6TRg71t/8Bny6Q==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/util-endpoints@3.679.0': + resolution: {integrity: sha512-YL6s4Y/1zC45OvddvgE139fjeWSKKPgLlnfrvhVL7alNyY9n7beR4uhoDpNrt5mI6sn9qiBF17790o+xLAXjjg==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/util-locate-window@3.679.0': + resolution: {integrity: sha512-zKTd48/ZWrCplkXpYDABI74rQlbR0DNHs8nH95htfSLj9/mWRSwaGptoxwcihaq/77vi/fl2X3y0a1Bo8bt7RA==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/util-user-agent-browser@3.679.0': + resolution: {integrity: sha512-CusSm2bTBG1kFypcsqU8COhnYc6zltobsqs3nRrvYqYaOqtMnuE46K4XTWpnzKgwDejgZGOE+WYyprtAxrPvmQ==} + + '@aws-sdk/util-user-agent-node@3.679.0': + resolution: {integrity: sha512-Bw4uXZ+NU5ed6TNfo4tBbhBSW+2eQxXYjYBGl5gLUNUpg2pDFToQAP6rXBFiwcG52V2ny5oLGiD82SoYuYkAVg==} + engines: {node: '>=16.0.0'} + peerDependencies: + aws-crt: '>=1.0.0' + peerDependenciesMeta: + aws-crt: + optional: true + + '@aws-sdk/util-utf8-browser@3.259.0': + resolution: {integrity: sha512-UvFa/vR+e19XookZF8RzFZBrw2EUkQWxiBW0yYQAhvk3C+QVGl0H3ouca8LDBlBfQKXwmW3huo/59H8rwb1wJw==} + '@babel/code-frame@7.24.6': resolution: {integrity: sha512-ZJhac6FkEd1yhG2AHOmfcXG4ceoLltoCVJjN5XsWN9BifBQr+cHJbWi0h68HZuSORq+3WtJ2z0hwF2NG1b5kcA==} engines: {node: '>=6.9.0'} @@ -514,8 +665,8 @@ packages: engines: {node: '>=0'} deprecated: This package has been renamed to `fast-tag-pos` - '@dqbd/tiktoken@1.0.16': - resolution: {integrity: sha512-4uIrs5qxAwFVFFEP507HZIZhGOsgfaEMEWDXWalr+v+XP+wJwP60EVmkZtQyQe70IsKGVkx5umBxw4NfmU0pPg==} + '@dqbd/tiktoken@1.0.17': + resolution: {integrity: sha512-v2gz0V6DiuR2TsALM32TkBThf6LdjLbxe6HS/nx9/KJxuDX0Z7SGX7N7PvQfqIvRyus42jI9poVUqezc/j/aQw==} '@flydotio/dockerfile@0.4.11': resolution: {integrity: sha512-L52UAfrOhmAn3T4TxpeRofQOSO+Kctg+uraB4nLzo4mvvh+4Z7HYxSi7Dnq0Kirz+xx6fDIc4OMNT1EdaORecA==} @@ -983,6 +1134,216 @@ packages: '@sinonjs/fake-timers@10.3.0': resolution: {integrity: sha512-V4BG07kuYSUkTCSBHG8G8TNhM+F19jXFWnQtzj+we8DrkpSBCee9Z3Ms8yiGer/dlmhe35/Xdgyo3/0rQKg7YA==} + '@smithy/abort-controller@3.1.6': + resolution: {integrity: sha512-0XuhuHQlEqbNQZp7QxxrFTdVWdwxch4vjxYgfInF91hZFkPxf9QDrdQka0KfxFMPqLNzSw0b95uGTrLliQUavQ==} + engines: {node: '>=16.0.0'} + + '@smithy/config-resolver@3.0.10': + resolution: {integrity: sha512-Uh0Sz9gdUuz538nvkPiyv1DZRX9+D15EKDtnQP5rYVAzM/dnYk3P8cg73jcxyOitPgT3mE3OVj7ky7sibzHWkw==} + engines: {node: '>=16.0.0'} + + '@smithy/core@2.5.1': + resolution: {integrity: sha512-DujtuDA7BGEKExJ05W5OdxCoyekcKT3Rhg1ZGeiUWaz2BJIWXjZmsG/DIP4W48GHno7AQwRsaCb8NcBgH3QZpg==} + engines: {node: '>=16.0.0'} + + '@smithy/credential-provider-imds@3.2.5': + resolution: {integrity: sha512-4FTQGAsuwqTzVMmiRVTn0RR9GrbRfkP0wfu/tXWVHd2LgNpTY0uglQpIScXK4NaEyXbB3JmZt8gfVqO50lP8wg==} + engines: {node: '>=16.0.0'} + + '@smithy/eventstream-codec@1.1.0': + resolution: {integrity: sha512-3tEbUb8t8an226jKB6V/Q2XU/J53lCwCzULuBPEaF4JjSh+FlCMp7TmogE/Aij5J9DwlsZ4VAD/IRDuQ/0ZtMw==} + + '@smithy/fetch-http-handler@3.2.9': + resolution: {integrity: sha512-hYNVQOqhFQ6vOpenifFME546f0GfJn2OiQ3M0FDmuUu8V/Uiwy2wej7ZXxFBNqdx0R5DZAqWM1l6VRhGz8oE6A==} + + '@smithy/fetch-http-handler@4.0.0': + resolution: {integrity: sha512-MLb1f5tbBO2X6K4lMEKJvxeLooyg7guq48C2zKr4qM7F2Gpkz4dc+hdSgu77pCJ76jVqFBjZczHYAs6dp15N+g==} + + '@smithy/hash-node@3.0.8': + resolution: {integrity: sha512-tlNQYbfpWXHimHqrvgo14DrMAgUBua/cNoz9fMYcDmYej7MAmUcjav/QKQbFc3NrcPxeJ7QClER4tWZmfwoPng==} + engines: {node: '>=16.0.0'} + + '@smithy/invalid-dependency@3.0.8': + resolution: {integrity: sha512-7Qynk6NWtTQhnGTTZwks++nJhQ1O54Mzi7fz4PqZOiYXb4Z1Flpb2yRvdALoggTS8xjtohWUM+RygOtB30YL3Q==} + + '@smithy/is-array-buffer@1.1.0': + resolution: {integrity: sha512-twpQ/n+3OWZJ7Z+xu43MJErmhB/WO/mMTnqR6PwWQShvSJ/emx5d1N59LQZk6ZpTAeuRWrc+eHhkzTp9NFjNRQ==} + engines: {node: '>=14.0.0'} + + '@smithy/is-array-buffer@2.2.0': + resolution: {integrity: sha512-GGP3O9QFD24uGeAXYUjwSTXARoqpZykHadOmA8G5vfJPK0/DC67qa//0qvqrJzL1xc8WQWX7/yc7fwudjPHPhA==} + engines: {node: '>=14.0.0'} + + '@smithy/is-array-buffer@3.0.0': + resolution: {integrity: sha512-+Fsu6Q6C4RSJiy81Y8eApjEB5gVtM+oFKTffg+jSuwtvomJJrhUJBu2zS8wjXSgH/g1MKEWrzyChTBe6clb5FQ==} + engines: {node: '>=16.0.0'} + + '@smithy/middleware-content-length@3.0.10': + resolution: {integrity: sha512-T4dIdCs1d/+/qMpwhJ1DzOhxCZjZHbHazEPJWdB4GDi2HjIZllVzeBEcdJUN0fomV8DURsgOyrbEUzg3vzTaOg==} + engines: {node: '>=16.0.0'} + + '@smithy/middleware-endpoint@3.2.1': + resolution: {integrity: sha512-wWO3xYmFm6WRW8VsEJ5oU6h7aosFXfszlz3Dj176pTij6o21oZnzkCLzShfmRaaCHDkBXWBdO0c4sQAvLFP6zA==} + engines: {node: '>=16.0.0'} + + '@smithy/middleware-retry@3.0.25': + resolution: {integrity: sha512-m1F70cPaMBML4HiTgCw5I+jFNtjgz5z5UdGnUbG37vw6kh4UvizFYjqJGHvicfgKMkDL6mXwyPp5mhZg02g5sg==} + engines: {node: '>=16.0.0'} + + '@smithy/middleware-serde@3.0.8': + resolution: {integrity: sha512-Xg2jK9Wc/1g/MBMP/EUn2DLspN8LNt+GMe7cgF+Ty3vl+Zvu+VeZU5nmhveU+H8pxyTsjrAkci8NqY6OuvZnjA==} + engines: {node: '>=16.0.0'} + + '@smithy/middleware-stack@3.0.8': + resolution: {integrity: sha512-d7ZuwvYgp1+3682Nx0MD3D/HtkmZd49N3JUndYWQXfRZrYEnCWYc8BHcNmVsPAp9gKvlurdg/mubE6b/rPS9MA==} + engines: {node: '>=16.0.0'} + + '@smithy/node-config-provider@3.1.9': + resolution: {integrity: sha512-qRHoah49QJ71eemjuS/WhUXB+mpNtwHRWQr77J/m40ewBVVwvo52kYAmb7iuaECgGTTcYxHS4Wmewfwy++ueew==} + engines: {node: '>=16.0.0'} + + '@smithy/node-http-handler@3.2.5': + resolution: {integrity: sha512-PkOwPNeKdvX/jCpn0A8n9/TyoxjGZB8WVoJmm9YzsnAgggTj4CrjpRHlTQw7dlLZ320n1mY1y+nTRUDViKi/3w==} + engines: {node: '>=16.0.0'} + + '@smithy/property-provider@3.1.8': + resolution: {integrity: sha512-ukNUyo6rHmusG64lmkjFeXemwYuKge1BJ8CtpVKmrxQxc6rhUX0vebcptFA9MmrGsnLhwnnqeH83VTU9hwOpjA==} + engines: {node: '>=16.0.0'} + + '@smithy/protocol-http@1.2.0': + resolution: {integrity: sha512-GfGfruksi3nXdFok5RhgtOnWe5f6BndzYfmEXISD+5gAGdayFGpjWu5pIqIweTudMtse20bGbc+7MFZXT1Tb8Q==} + engines: {node: '>=14.0.0'} + + '@smithy/protocol-http@4.1.5': + resolution: {integrity: sha512-hsjtwpIemmCkm3ZV5fd/T0bPIugW1gJXwZ/hpuVubt2hEUApIoUTrf6qIdh9MAWlw0vjMrA1ztJLAwtNaZogvg==} + engines: {node: '>=16.0.0'} + + '@smithy/querystring-builder@3.0.8': + resolution: {integrity: sha512-btYxGVqFUARbUrN6VhL9c3dnSviIwBYD9Rz1jHuN1hgh28Fpv2xjU1HeCeDJX68xctz7r4l1PBnFhGg1WBBPuA==} + engines: {node: '>=16.0.0'} + + '@smithy/querystring-parser@3.0.8': + resolution: {integrity: sha512-BtEk3FG7Ks64GAbt+JnKqwuobJNX8VmFLBsKIwWr1D60T426fGrV2L3YS5siOcUhhp6/Y6yhBw1PSPxA5p7qGg==} + engines: {node: '>=16.0.0'} + + '@smithy/service-error-classification@3.0.8': + resolution: {integrity: sha512-uEC/kCCFto83bz5ZzapcrgGqHOh/0r69sZ2ZuHlgoD5kYgXJEThCoTuw/y1Ub3cE7aaKdznb+jD9xRPIfIwD7g==} + engines: {node: '>=16.0.0'} + + '@smithy/shared-ini-file-loader@3.1.9': + resolution: {integrity: sha512-/+OsJRNtoRbtsX0UpSgWVxFZLsJHo/4sTr+kBg/J78sr7iC+tHeOvOJrS5hCpVQ6sWBbhWLp1UNiuMyZhE6pmA==} + engines: {node: '>=16.0.0'} + + '@smithy/signature-v4@1.1.0': + resolution: {integrity: sha512-fDo3m7YqXBs7neciOePPd/X9LPm5QLlDMdIC4m1H6dgNLnXfLMFNIxEfPyohGA8VW9Wn4X8lygnPSGxDZSmp0Q==} + engines: {node: '>=14.0.0'} + + '@smithy/signature-v4@4.2.1': + resolution: {integrity: sha512-NsV1jF4EvmO5wqmaSzlnTVetemBS3FZHdyc5CExbDljcyJCEEkJr8ANu2JvtNbVg/9MvKAWV44kTrGS+Pi4INg==} + engines: {node: '>=16.0.0'} + + '@smithy/smithy-client@3.4.2': + resolution: {integrity: sha512-dxw1BDxJiY9/zI3cBqfVrInij6ShjpV4fmGHesGZZUiP9OSE/EVfdwdRz0PgvkEvrZHpsj2htRaHJfftE8giBA==} + engines: {node: '>=16.0.0'} + + '@smithy/types@1.2.0': + resolution: {integrity: sha512-z1r00TvBqF3dh4aHhya7nz1HhvCg4TRmw51fjMrh5do3h+ngSstt/yKlNbHeb9QxJmFbmN8KEVSWgb1bRvfEoA==} + engines: {node: '>=14.0.0'} + + '@smithy/types@3.6.0': + resolution: {integrity: sha512-8VXK/KzOHefoC65yRgCn5vG1cysPJjHnOVt9d0ybFQSmJgQj152vMn4EkYhGuaOmnnZvCPav/KnYyE6/KsNZ2w==} + engines: {node: '>=16.0.0'} + + '@smithy/url-parser@3.0.8': + resolution: {integrity: sha512-4FdOhwpTW7jtSFWm7SpfLGKIBC9ZaTKG5nBF0wK24aoQKQyDIKUw3+KFWCQ9maMzrgTJIuOvOnsV2lLGW5XjTg==} + + '@smithy/util-base64@3.0.0': + resolution: {integrity: sha512-Kxvoh5Qtt0CDsfajiZOCpJxgtPHXOKwmM+Zy4waD43UoEMA+qPxxa98aE/7ZhdnBFZFXMOiBR5xbcaMhLtznQQ==} + engines: {node: '>=16.0.0'} + + '@smithy/util-body-length-browser@3.0.0': + resolution: {integrity: sha512-cbjJs2A1mLYmqmyVl80uoLTJhAcfzMOyPgjwAYusWKMdLeNtzmMz9YxNl3/jRLoxSS3wkqkf0jwNdtXWtyEBaQ==} + + '@smithy/util-body-length-node@3.0.0': + resolution: {integrity: sha512-Tj7pZ4bUloNUP6PzwhN7K386tmSmEET9QtQg0TgdNOnxhZvCssHji+oZTUIuzxECRfG8rdm2PMw2WCFs6eIYkA==} + engines: {node: '>=16.0.0'} + + '@smithy/util-buffer-from@1.1.0': + resolution: {integrity: sha512-9m6NXE0ww+ra5HKHCHig20T+FAwxBAm7DIdwc/767uGWbRcY720ybgPacQNB96JMOI7xVr/CDa3oMzKmW4a+kw==} + engines: {node: '>=14.0.0'} + + '@smithy/util-buffer-from@2.2.0': + resolution: {integrity: sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA==} + engines: {node: '>=14.0.0'} + + '@smithy/util-buffer-from@3.0.0': + resolution: {integrity: sha512-aEOHCgq5RWFbP+UDPvPot26EJHjOC+bRgse5A8V3FSShqd5E5UN4qc7zkwsvJPPAVsf73QwYcHN1/gt/rtLwQA==} + engines: {node: '>=16.0.0'} + + '@smithy/util-config-provider@3.0.0': + resolution: {integrity: sha512-pbjk4s0fwq3Di/ANL+rCvJMKM5bzAQdE5S/6RL5NXgMExFAi6UgQMPOm5yPaIWPpr+EOXKXRonJ3FoxKf4mCJQ==} + engines: {node: '>=16.0.0'} + + '@smithy/util-defaults-mode-browser@3.0.25': + resolution: {integrity: sha512-fRw7zymjIDt6XxIsLwfJfYUfbGoO9CmCJk6rjJ/X5cd20+d2Is7xjU5Kt/AiDt6hX8DAf5dztmfP5O82gR9emA==} + engines: {node: '>= 10.0.0'} + + '@smithy/util-defaults-mode-node@3.0.25': + resolution: {integrity: sha512-H3BSZdBDiVZGzt8TG51Pd2FvFO0PAx/A0mJ0EH8a13KJ6iUCdYnw/Dk/MdC1kTd0eUuUGisDFaxXVXo4HHFL1g==} + engines: {node: '>= 10.0.0'} + + '@smithy/util-endpoints@2.1.4': + resolution: {integrity: sha512-kPt8j4emm7rdMWQyL0F89o92q10gvCUa6sBkBtDJ7nV2+P7wpXczzOfoDJ49CKXe5CCqb8dc1W+ZdLlrKzSAnQ==} + engines: {node: '>=16.0.0'} + + '@smithy/util-hex-encoding@1.1.0': + resolution: {integrity: sha512-7UtIE9eH0u41zpB60Jzr0oNCQ3hMJUabMcKRUVjmyHTXiWDE4vjSqN6qlih7rCNeKGbioS7f/y2Jgym4QZcKFg==} + engines: {node: '>=14.0.0'} + + '@smithy/util-hex-encoding@3.0.0': + resolution: {integrity: sha512-eFndh1WEK5YMUYvy3lPlVmYY/fZcQE1D8oSf41Id2vCeIkKJXPcYDCZD+4+xViI6b1XSd7tE+s5AmXzz5ilabQ==} + engines: {node: '>=16.0.0'} + + '@smithy/util-middleware@1.1.0': + resolution: {integrity: sha512-6hhckcBqVgjWAqLy2vqlPZ3rfxLDhFWEmM7oLh2POGvsi7j0tHkbN7w4DFhuBExVJAbJ/qqxqZdRY6Fu7/OezQ==} + engines: {node: '>=14.0.0'} + + '@smithy/util-middleware@3.0.8': + resolution: {integrity: sha512-p7iYAPaQjoeM+AKABpYWeDdtwQNxasr4aXQEA/OmbOaug9V0odRVDy3Wx4ci8soljE/JXQo+abV0qZpW8NX0yA==} + engines: {node: '>=16.0.0'} + + '@smithy/util-retry@3.0.8': + resolution: {integrity: sha512-TCEhLnY581YJ+g1x0hapPz13JFqzmh/pMWL2KEFASC51qCfw3+Y47MrTmea4bUE5vsdxQ4F6/KFbUeSz22Q1ow==} + engines: {node: '>=16.0.0'} + + '@smithy/util-stream@3.2.1': + resolution: {integrity: sha512-R3ufuzJRxSJbE58K9AEnL/uSZyVdHzud9wLS8tIbXclxKzoe09CRohj2xV8wpx5tj7ZbiJaKYcutMm1eYgz/0A==} + engines: {node: '>=16.0.0'} + + '@smithy/util-uri-escape@1.1.0': + resolution: {integrity: sha512-/jL/V1xdVRt5XppwiaEU8Etp5WHZj609n0xMTuehmCqdoOFbId1M+aEeDWZsQ+8JbEB/BJ6ynY2SlYmOaKtt8w==} + engines: {node: '>=14.0.0'} + + '@smithy/util-uri-escape@3.0.0': + resolution: {integrity: sha512-LqR7qYLgZTD7nWLBecUi4aqolw8Mhza9ArpNEQ881MJJIU2sE5iHCK6TdyqqzcDLy0OPe10IY4T8ctVdtynubg==} + engines: {node: '>=16.0.0'} + + '@smithy/util-utf8@1.1.0': + resolution: {integrity: sha512-p/MYV+JmqmPyjdgyN2UxAeYDj9cBqCjp0C/NsTWnnjoZUVqoeZ6IrW915L9CAKWVECgv9lVQGc4u/yz26/bI1A==} + engines: {node: '>=14.0.0'} + + '@smithy/util-utf8@2.3.0': + resolution: {integrity: sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A==} + engines: {node: '>=14.0.0'} + + '@smithy/util-utf8@3.0.0': + resolution: {integrity: sha512-rUeT12bxFnplYDe815GXbq/oixEGHfRFFtcTF3YdDi/JaENIM6aSYYLJydG83UNzLXeRI5K8abYd/8Sp/QM0kA==} + engines: {node: '>=16.0.0'} + + '@smithy/util-waiter@3.1.7': + resolution: {integrity: sha512-d5yGlQtmN/z5eoTtIYgkvOw27US2Ous4VycnXatyoImIF9tzlcpnKqQ/V7qhvJmb2p6xZne1NopCLakdTnkBBQ==} + engines: {node: '>=16.0.0'} + '@stdlib/assert-has-own-property@0.0.7': resolution: {integrity: sha512-3YHwSWiUqGlTLSwxAWxrqaD1PkgcJniGyotJeIt5X0tSNmSW0/c9RWroCImTUUB3zBkyBJ79MyU9Nf4Qgm59fQ==} engines: {node: '>=0.10.0', npm: '>2.7.0'} @@ -1473,6 +1834,9 @@ packages: bottleneck@2.19.5: resolution: {integrity: sha512-VHiNCbI1lKdl44tGrhNfU3lup0Tj/ZBMJB5/2ZbNXRCPuRCO7ed2mgcK4r17y+KB2EfuYuRaVlwNbAeaWGSpbw==} + bowser@2.11.0: + resolution: {integrity: sha512-AlcaJBi/pqqJBIQ8U9Mcpc9i8Aqxn88Skv5d+xBX006BY5u8N3mGLHa5Lgppa7L/HfwgwLgZ6NYs+Ag6uUmJRA==} + brace-expansion@1.1.11: resolution: {integrity: sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==} @@ -1603,6 +1967,9 @@ packages: resolution: {integrity: sha512-QVb0dM5HvG+uaxitm8wONl7jltx8dqhfU33DcqtOZcLSVIKSDDLDi7+0LbAKiyI8hD9u42m2YxXSkMGWThaecQ==} engines: {iojs: '>= 1.0.0', node: '>= 0.12.0'} + cohere-ai@7.14.0: + resolution: {integrity: sha512-hSo2/tFV29whjFFtVtdS7kHmtUsjfMO1sgwE/d5bhOE4O7Vkj5G1R9lLIqkIprp/+rrvCq3HGvEaOgry7xRcDA==} + cohere@1.1.1: resolution: {integrity: sha512-D116FKTuauCShJjPuOAFnkyAPMhV/6f403+yPZwyyFY6gErK1AA41y9rQdBvj8eHDZ9sXVJ6TzmzObVfAFh3ig==} @@ -2024,6 +2391,10 @@ packages: fast-safe-stringify@2.1.1: resolution: {integrity: sha512-W+KJc2dmILlPplD/H4K9l9LcAHAfPtP6BY84uVLXQ6Evcz9Lcg33Y2z1IVblT6xdY54PXYVHEv+0Wpq8Io6zkA==} + fast-xml-parser@4.4.1: + resolution: {integrity: sha512-xkjOecfnKGkSsOwtZ5Pz7Us/T6mrbPQrq0nh+aCO5V9nk5NLWmasAHumTKjiPJPWANe+kAZ84Jc8ooJkzZ88Sw==} + hasBin: true + fb-watchman@2.0.2: resolution: {integrity: sha512-p5161BqbuCaSnB8jIbzQHOlpgsPmK5rJVDfDKO91Axs5NC1uu3HRQm6wt9cd9/+GtQQIO53JdGXXoyDpTAsgYA==} @@ -2075,6 +2446,10 @@ packages: form-data-encoder@1.7.2: resolution: {integrity: sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==} + form-data-encoder@4.0.2: + resolution: {integrity: sha512-KQVhvhK8ZkWzxKxOr56CPulAhH3dobtuQ4+hNQ+HekH/Wp5gSOafqRAeTphQUJAIk0GBvHZgJ2ZGRWd5kphMuw==} + engines: {node: '>= 18'} + form-data@4.0.0: resolution: {integrity: sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==} engines: {node: '>= 6'} @@ -2083,6 +2458,10 @@ packages: resolution: {integrity: sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==} engines: {node: '>= 12.20'} + formdata-node@6.0.3: + resolution: {integrity: sha512-8e1++BCiTzUno9v5IZ2J6bv4RU+3UKDmqWUQD0MIMVCd9AdhWkO1gw57oo1mNEX1dMq2EGI+FbWz4B92pscSQg==} + engines: {node: '>= 18'} + formdata-polyfill@4.0.10: resolution: {integrity: sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==} engines: {node: '>=12.20.0'} @@ -2544,6 +2923,9 @@ packages: joplin-turndown-plugin-gfm@1.0.12: resolution: {integrity: sha512-qL4+1iycQjZ1fs8zk3jSRk7cg3ROBUHk7GKtiLAQLFzLPKErnILUvz5DLszSQvz3s1sTjPbywLDISVUtBY6HaA==} + js-base64@3.7.2: + resolution: {integrity: sha512-NnRs6dsyqUXejqk/yv2aiXlAvOs56sLkX6nUdeaNezI5LFFLlsZjOThmwnrcwh5ZZRwZlCMnVAY3CvhIhoVEKQ==} + js-beautify@1.15.1: resolution: {integrity: sha512-ESjNzSlt/sWE8sciZH8kBF8BPlwXPwhR6pWKAw8bw4Bwj+iZcnKW6ONWUutJ7eObuBZQpiIb8S7OYspWrKt7rA==} engines: {node: '>=14'} @@ -3439,6 +3821,10 @@ packages: resolution: {integrity: sha512-MvjoMCJwEarSbUYk5O+nmoSzSutSsTwF85zcHPQ9OrlFoZOYIjaqBAJIqIXjptyD5vThxGq52Xu/MaJzRkIk4Q==} engines: {node: '>=0.6'} + qs@6.11.2: + resolution: {integrity: sha512-tDNIz22aBzCDxLtVH++VnTfzxlfeK5CbqohpSqpJgj1Wg/cQbStNAz3NuqCs5vV+pjBsK4x4pN9HlVh7rcYRiA==} + engines: {node: '>=0.6'} + qs@6.12.2: resolution: {integrity: sha512-x+NLUpx9SYrcwXtX7ob1gnkSems4i/mGZX5SlYxwIau6RrUSODO89TR/XDGGpn5RPWSYIB+aSfuSlV5+CmbTBg==} engines: {node: '>=0.6'} @@ -3772,6 +4158,9 @@ packages: resolution: {integrity: sha512-syeEEd112om/waJ5gOQ+SaYi+setuidQ4ZIPiQREF4yJeegXhn2HKy6C0JYm7uhVQKfMAvuZ22dIRsnoDv7AMw==} engines: {node: '>=12.*'} + strnum@1.0.5: + resolution: {integrity: sha512-J8bbNyKKXl5qYcR36TIO8W3mVGVHrmmxsd5PAItGkmyzwJvybiw2IVq5nqd0i4LSNSkB/sx9VHllbfFdr9k1JA==} + supabase@1.172.2: resolution: {integrity: sha512-h2J6kKEikXnZyurUcCYg215qkQpINOhdWkiclHcWAuVeqXsNrfrYaf1s0qbbcdRyMtrVW48I+VdVTw71Cnn20Q==} engines: {npm: '>=8'} @@ -4208,6 +4597,503 @@ snapshots: transitivePeerDependencies: - encoding + '@aws-crypto/crc32@3.0.0': + dependencies: + '@aws-crypto/util': 3.0.0 + '@aws-sdk/types': 3.679.0 + tslib: 1.14.1 + + '@aws-crypto/sha256-browser@5.2.0': + dependencies: + '@aws-crypto/sha256-js': 5.2.0 + '@aws-crypto/supports-web-crypto': 5.2.0 + '@aws-crypto/util': 5.2.0 + '@aws-sdk/types': 3.679.0 + '@aws-sdk/util-locate-window': 3.679.0 + '@smithy/util-utf8': 2.3.0 + tslib: 2.6.3 + + '@aws-crypto/sha256-js@5.2.0': + dependencies: + '@aws-crypto/util': 5.2.0 + '@aws-sdk/types': 3.679.0 + tslib: 2.6.3 + + '@aws-crypto/supports-web-crypto@5.2.0': + dependencies: + tslib: 2.6.3 + + '@aws-crypto/util@3.0.0': + dependencies: + '@aws-sdk/types': 3.679.0 + '@aws-sdk/util-utf8-browser': 3.259.0 + tslib: 1.14.1 + + '@aws-crypto/util@5.2.0': + dependencies: + '@aws-sdk/types': 3.679.0 + '@smithy/util-utf8': 2.3.0 + tslib: 2.6.3 + + '@aws-sdk/client-cognito-identity@3.679.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/client-sso-oidc': 3.679.0(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/client-sts': 3.679.0 + '@aws-sdk/core': 3.679.0 + '@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/middleware-host-header': 3.679.0 + '@aws-sdk/middleware-logger': 3.679.0 + '@aws-sdk/middleware-recursion-detection': 3.679.0 + '@aws-sdk/middleware-user-agent': 3.679.0 + '@aws-sdk/region-config-resolver': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@aws-sdk/util-endpoints': 3.679.0 + '@aws-sdk/util-user-agent-browser': 3.679.0 + '@aws-sdk/util-user-agent-node': 3.679.0 + '@smithy/config-resolver': 3.0.10 + '@smithy/core': 2.5.1 + '@smithy/fetch-http-handler': 3.2.9 + '@smithy/hash-node': 3.0.8 + '@smithy/invalid-dependency': 3.0.8 + '@smithy/middleware-content-length': 3.0.10 + '@smithy/middleware-endpoint': 3.2.1 + '@smithy/middleware-retry': 3.0.25 + '@smithy/middleware-serde': 3.0.8 + '@smithy/middleware-stack': 3.0.8 + '@smithy/node-config-provider': 3.1.9 + '@smithy/node-http-handler': 3.2.5 + '@smithy/protocol-http': 4.1.5 + '@smithy/smithy-client': 3.4.2 + '@smithy/types': 3.6.0 + '@smithy/url-parser': 3.0.8 + '@smithy/util-base64': 3.0.0 + '@smithy/util-body-length-browser': 3.0.0 + '@smithy/util-body-length-node': 3.0.0 + '@smithy/util-defaults-mode-browser': 3.0.25 + '@smithy/util-defaults-mode-node': 3.0.25 + '@smithy/util-endpoints': 2.1.4 + '@smithy/util-middleware': 3.0.8 + '@smithy/util-retry': 3.0.8 + '@smithy/util-utf8': 3.0.0 + tslib: 2.6.3 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/client-sagemaker@3.679.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/client-sso-oidc': 3.679.0(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/client-sts': 3.679.0 + '@aws-sdk/core': 3.679.0 + '@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/middleware-host-header': 3.679.0 + '@aws-sdk/middleware-logger': 3.679.0 + '@aws-sdk/middleware-recursion-detection': 3.679.0 + '@aws-sdk/middleware-user-agent': 3.679.0 + '@aws-sdk/region-config-resolver': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@aws-sdk/util-endpoints': 3.679.0 + '@aws-sdk/util-user-agent-browser': 3.679.0 + '@aws-sdk/util-user-agent-node': 3.679.0 + '@smithy/config-resolver': 3.0.10 + '@smithy/core': 2.5.1 + '@smithy/fetch-http-handler': 3.2.9 + '@smithy/hash-node': 3.0.8 + '@smithy/invalid-dependency': 3.0.8 + '@smithy/middleware-content-length': 3.0.10 + '@smithy/middleware-endpoint': 3.2.1 + '@smithy/middleware-retry': 3.0.25 + '@smithy/middleware-serde': 3.0.8 + '@smithy/middleware-stack': 3.0.8 + '@smithy/node-config-provider': 3.1.9 + '@smithy/node-http-handler': 3.2.5 + '@smithy/protocol-http': 4.1.5 + '@smithy/smithy-client': 3.4.2 + '@smithy/types': 3.6.0 + '@smithy/url-parser': 3.0.8 + '@smithy/util-base64': 3.0.0 + '@smithy/util-body-length-browser': 3.0.0 + '@smithy/util-body-length-node': 3.0.0 + '@smithy/util-defaults-mode-browser': 3.0.25 + '@smithy/util-defaults-mode-node': 3.0.25 + '@smithy/util-endpoints': 2.1.4 + '@smithy/util-middleware': 3.0.8 + '@smithy/util-retry': 3.0.8 + '@smithy/util-utf8': 3.0.0 + '@smithy/util-waiter': 3.1.7 + '@types/uuid': 9.0.8 + tslib: 2.6.3 + uuid: 9.0.1 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/client-sts': 3.679.0 + '@aws-sdk/core': 3.679.0 + '@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/middleware-host-header': 3.679.0 + '@aws-sdk/middleware-logger': 3.679.0 + '@aws-sdk/middleware-recursion-detection': 3.679.0 + '@aws-sdk/middleware-user-agent': 3.679.0 + '@aws-sdk/region-config-resolver': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@aws-sdk/util-endpoints': 3.679.0 + '@aws-sdk/util-user-agent-browser': 3.679.0 + '@aws-sdk/util-user-agent-node': 3.679.0 + '@smithy/config-resolver': 3.0.10 + '@smithy/core': 2.5.1 + '@smithy/fetch-http-handler': 3.2.9 + '@smithy/hash-node': 3.0.8 + '@smithy/invalid-dependency': 3.0.8 + '@smithy/middleware-content-length': 3.0.10 + '@smithy/middleware-endpoint': 3.2.1 + '@smithy/middleware-retry': 3.0.25 + '@smithy/middleware-serde': 3.0.8 + '@smithy/middleware-stack': 3.0.8 + '@smithy/node-config-provider': 3.1.9 + '@smithy/node-http-handler': 3.2.5 + '@smithy/protocol-http': 4.1.5 + '@smithy/smithy-client': 3.4.2 + '@smithy/types': 3.6.0 + '@smithy/url-parser': 3.0.8 + '@smithy/util-base64': 3.0.0 + '@smithy/util-body-length-browser': 3.0.0 + '@smithy/util-body-length-node': 3.0.0 + '@smithy/util-defaults-mode-browser': 3.0.25 + '@smithy/util-defaults-mode-node': 3.0.25 + '@smithy/util-endpoints': 2.1.4 + '@smithy/util-middleware': 3.0.8 + '@smithy/util-retry': 3.0.8 + '@smithy/util-utf8': 3.0.0 + tslib: 2.6.3 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/client-sso@3.679.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/core': 3.679.0 + '@aws-sdk/middleware-host-header': 3.679.0 + '@aws-sdk/middleware-logger': 3.679.0 + '@aws-sdk/middleware-recursion-detection': 3.679.0 + '@aws-sdk/middleware-user-agent': 3.679.0 + '@aws-sdk/region-config-resolver': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@aws-sdk/util-endpoints': 3.679.0 + '@aws-sdk/util-user-agent-browser': 3.679.0 + '@aws-sdk/util-user-agent-node': 3.679.0 + '@smithy/config-resolver': 3.0.10 + '@smithy/core': 2.5.1 + '@smithy/fetch-http-handler': 3.2.9 + '@smithy/hash-node': 3.0.8 + '@smithy/invalid-dependency': 3.0.8 + '@smithy/middleware-content-length': 3.0.10 + '@smithy/middleware-endpoint': 3.2.1 + '@smithy/middleware-retry': 3.0.25 + '@smithy/middleware-serde': 3.0.8 + '@smithy/middleware-stack': 3.0.8 + '@smithy/node-config-provider': 3.1.9 + '@smithy/node-http-handler': 3.2.5 + '@smithy/protocol-http': 4.1.5 + '@smithy/smithy-client': 3.4.2 + '@smithy/types': 3.6.0 + '@smithy/url-parser': 3.0.8 + '@smithy/util-base64': 3.0.0 + '@smithy/util-body-length-browser': 3.0.0 + '@smithy/util-body-length-node': 3.0.0 + '@smithy/util-defaults-mode-browser': 3.0.25 + '@smithy/util-defaults-mode-node': 3.0.25 + '@smithy/util-endpoints': 2.1.4 + '@smithy/util-middleware': 3.0.8 + '@smithy/util-retry': 3.0.8 + '@smithy/util-utf8': 3.0.0 + tslib: 2.6.3 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/client-sts@3.679.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/client-sso-oidc': 3.679.0(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/core': 3.679.0 + '@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/middleware-host-header': 3.679.0 + '@aws-sdk/middleware-logger': 3.679.0 + '@aws-sdk/middleware-recursion-detection': 3.679.0 + '@aws-sdk/middleware-user-agent': 3.679.0 + '@aws-sdk/region-config-resolver': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@aws-sdk/util-endpoints': 3.679.0 + '@aws-sdk/util-user-agent-browser': 3.679.0 + '@aws-sdk/util-user-agent-node': 3.679.0 + '@smithy/config-resolver': 3.0.10 + '@smithy/core': 2.5.1 + '@smithy/fetch-http-handler': 3.2.9 + '@smithy/hash-node': 3.0.8 + '@smithy/invalid-dependency': 3.0.8 + '@smithy/middleware-content-length': 3.0.10 + '@smithy/middleware-endpoint': 3.2.1 + '@smithy/middleware-retry': 3.0.25 + '@smithy/middleware-serde': 3.0.8 + '@smithy/middleware-stack': 3.0.8 + '@smithy/node-config-provider': 3.1.9 + '@smithy/node-http-handler': 3.2.5 + '@smithy/protocol-http': 4.1.5 + '@smithy/smithy-client': 3.4.2 + '@smithy/types': 3.6.0 + '@smithy/url-parser': 3.0.8 + '@smithy/util-base64': 3.0.0 + '@smithy/util-body-length-browser': 3.0.0 + '@smithy/util-body-length-node': 3.0.0 + '@smithy/util-defaults-mode-browser': 3.0.25 + '@smithy/util-defaults-mode-node': 3.0.25 + '@smithy/util-endpoints': 2.1.4 + '@smithy/util-middleware': 3.0.8 + '@smithy/util-retry': 3.0.8 + '@smithy/util-utf8': 3.0.0 + tslib: 2.6.3 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/core@3.679.0': + dependencies: + '@aws-sdk/types': 3.679.0 + '@smithy/core': 2.5.1 + '@smithy/node-config-provider': 3.1.9 + '@smithy/property-provider': 3.1.8 + '@smithy/protocol-http': 4.1.5 + '@smithy/signature-v4': 4.2.1 + '@smithy/smithy-client': 3.4.2 + '@smithy/types': 3.6.0 + '@smithy/util-middleware': 3.0.8 + fast-xml-parser: 4.4.1 + tslib: 2.6.3 + + '@aws-sdk/credential-provider-cognito-identity@3.679.0': + dependencies: + '@aws-sdk/client-cognito-identity': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@smithy/property-provider': 3.1.8 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/credential-provider-env@3.679.0': + dependencies: + '@aws-sdk/core': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@smithy/property-provider': 3.1.8 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@aws-sdk/credential-provider-http@3.679.0': + dependencies: + '@aws-sdk/core': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@smithy/fetch-http-handler': 3.2.9 + '@smithy/node-http-handler': 3.2.5 + '@smithy/property-provider': 3.1.8 + '@smithy/protocol-http': 4.1.5 + '@smithy/smithy-client': 3.4.2 + '@smithy/types': 3.6.0 + '@smithy/util-stream': 3.2.1 + tslib: 2.6.3 + + '@aws-sdk/credential-provider-ini@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0)': + dependencies: + '@aws-sdk/client-sts': 3.679.0 + '@aws-sdk/core': 3.679.0 + '@aws-sdk/credential-provider-env': 3.679.0 + '@aws-sdk/credential-provider-http': 3.679.0 + '@aws-sdk/credential-provider-process': 3.679.0 + '@aws-sdk/credential-provider-sso': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)) + '@aws-sdk/credential-provider-web-identity': 3.679.0(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/types': 3.679.0 + '@smithy/credential-provider-imds': 3.2.5 + '@smithy/property-provider': 3.1.8 + '@smithy/shared-ini-file-loader': 3.1.9 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + transitivePeerDependencies: + - '@aws-sdk/client-sso-oidc' + - aws-crt + + '@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0)': + dependencies: + '@aws-sdk/credential-provider-env': 3.679.0 + '@aws-sdk/credential-provider-http': 3.679.0 + '@aws-sdk/credential-provider-ini': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/credential-provider-process': 3.679.0 + '@aws-sdk/credential-provider-sso': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)) + '@aws-sdk/credential-provider-web-identity': 3.679.0(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/types': 3.679.0 + '@smithy/credential-provider-imds': 3.2.5 + '@smithy/property-provider': 3.1.8 + '@smithy/shared-ini-file-loader': 3.1.9 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + transitivePeerDependencies: + - '@aws-sdk/client-sso-oidc' + - '@aws-sdk/client-sts' + - aws-crt + + '@aws-sdk/credential-provider-process@3.679.0': + dependencies: + '@aws-sdk/core': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@smithy/property-provider': 3.1.8 + '@smithy/shared-ini-file-loader': 3.1.9 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@aws-sdk/credential-provider-sso@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))': + dependencies: + '@aws-sdk/client-sso': 3.679.0 + '@aws-sdk/core': 3.679.0 + '@aws-sdk/token-providers': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)) + '@aws-sdk/types': 3.679.0 + '@smithy/property-provider': 3.1.8 + '@smithy/shared-ini-file-loader': 3.1.9 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + transitivePeerDependencies: + - '@aws-sdk/client-sso-oidc' + - aws-crt + + '@aws-sdk/credential-provider-web-identity@3.679.0(@aws-sdk/client-sts@3.679.0)': + dependencies: + '@aws-sdk/client-sts': 3.679.0 + '@aws-sdk/core': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@smithy/property-provider': 3.1.8 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))': + dependencies: + '@aws-sdk/client-cognito-identity': 3.679.0 + '@aws-sdk/client-sso': 3.679.0 + '@aws-sdk/client-sts': 3.679.0 + '@aws-sdk/core': 3.679.0 + '@aws-sdk/credential-provider-cognito-identity': 3.679.0 + '@aws-sdk/credential-provider-env': 3.679.0 + '@aws-sdk/credential-provider-http': 3.679.0 + '@aws-sdk/credential-provider-ini': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/credential-provider-process': 3.679.0 + '@aws-sdk/credential-provider-sso': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)) + '@aws-sdk/credential-provider-web-identity': 3.679.0(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/types': 3.679.0 + '@smithy/credential-provider-imds': 3.2.5 + '@smithy/property-provider': 3.1.8 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + transitivePeerDependencies: + - '@aws-sdk/client-sso-oidc' + - aws-crt + + '@aws-sdk/middleware-host-header@3.679.0': + dependencies: + '@aws-sdk/types': 3.679.0 + '@smithy/protocol-http': 4.1.5 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@aws-sdk/middleware-logger@3.679.0': + dependencies: + '@aws-sdk/types': 3.679.0 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@aws-sdk/middleware-recursion-detection@3.679.0': + dependencies: + '@aws-sdk/types': 3.679.0 + '@smithy/protocol-http': 4.1.5 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@aws-sdk/middleware-user-agent@3.679.0': + dependencies: + '@aws-sdk/core': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@aws-sdk/util-endpoints': 3.679.0 + '@smithy/core': 2.5.1 + '@smithy/protocol-http': 4.1.5 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@aws-sdk/protocol-http@3.374.0': + dependencies: + '@smithy/protocol-http': 1.2.0 + tslib: 2.6.3 + + '@aws-sdk/region-config-resolver@3.679.0': + dependencies: + '@aws-sdk/types': 3.679.0 + '@smithy/node-config-provider': 3.1.9 + '@smithy/types': 3.6.0 + '@smithy/util-config-provider': 3.0.0 + '@smithy/util-middleware': 3.0.8 + tslib: 2.6.3 + + '@aws-sdk/signature-v4@3.374.0': + dependencies: + '@smithy/signature-v4': 1.1.0 + tslib: 2.6.3 + + '@aws-sdk/token-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))': + dependencies: + '@aws-sdk/client-sso-oidc': 3.679.0(@aws-sdk/client-sts@3.679.0) + '@aws-sdk/types': 3.679.0 + '@smithy/property-provider': 3.1.8 + '@smithy/shared-ini-file-loader': 3.1.9 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@aws-sdk/types@3.679.0': + dependencies: + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@aws-sdk/util-endpoints@3.679.0': + dependencies: + '@aws-sdk/types': 3.679.0 + '@smithy/types': 3.6.0 + '@smithy/util-endpoints': 2.1.4 + tslib: 2.6.3 + + '@aws-sdk/util-locate-window@3.679.0': + dependencies: + tslib: 2.6.3 + + '@aws-sdk/util-user-agent-browser@3.679.0': + dependencies: + '@aws-sdk/types': 3.679.0 + '@smithy/types': 3.6.0 + bowser: 2.11.0 + tslib: 2.6.3 + + '@aws-sdk/util-user-agent-node@3.679.0': + dependencies: + '@aws-sdk/middleware-user-agent': 3.679.0 + '@aws-sdk/types': 3.679.0 + '@smithy/node-config-provider': 3.1.9 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@aws-sdk/util-utf8-browser@3.259.0': + dependencies: + tslib: 2.6.3 + '@babel/code-frame@7.24.6': dependencies: '@babel/highlight': 7.24.6 @@ -4457,7 +5343,7 @@ snapshots: '@devil7softwares/pos@1.0.2': {} - '@dqbd/tiktoken@1.0.16': {} + '@dqbd/tiktoken@1.0.17': {} '@flydotio/dockerfile@0.4.11': dependencies: @@ -4676,13 +5562,13 @@ snapshots: '@jridgewell/resolve-uri': 3.1.2 '@jridgewell/sourcemap-codec': 1.4.15 - '@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))': + '@langchain/core@0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))': dependencies: ansi-styles: 5.2.0 camelcase: 6.3.0 decamelize: 1.2.0 js-tiktoken: 1.0.12 - langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) + langsmith: 0.1.34(zyeavx4tfqw3smbbpiinhfxxeu) ml-distance: 4.0.1 mustache: 4.2.0 p-queue: 6.6.2 @@ -4694,9 +5580,9 @@ snapshots: - langchain - openai - '@langchain/openai@0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))': + '@langchain/openai@0.2.1(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))': dependencies: - '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) + '@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) js-tiktoken: 1.0.12 openai: 4.57.0(zod@3.23.8) zod: 3.23.8 @@ -4705,9 +5591,9 @@ snapshots: - encoding - langchain - '@langchain/textsplitters@0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))': + '@langchain/textsplitters@0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8))': dependencies: - '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) + '@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) js-tiktoken: 1.0.12 transitivePeerDependencies: - langchain @@ -5147,6 +6033,340 @@ snapshots: dependencies: '@sinonjs/commons': 3.0.1 + '@smithy/abort-controller@3.1.6': + dependencies: + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/config-resolver@3.0.10': + dependencies: + '@smithy/node-config-provider': 3.1.9 + '@smithy/types': 3.6.0 + '@smithy/util-config-provider': 3.0.0 + '@smithy/util-middleware': 3.0.8 + tslib: 2.6.3 + + '@smithy/core@2.5.1': + dependencies: + '@smithy/middleware-serde': 3.0.8 + '@smithy/protocol-http': 4.1.5 + '@smithy/types': 3.6.0 + '@smithy/util-body-length-browser': 3.0.0 + '@smithy/util-middleware': 3.0.8 + '@smithy/util-stream': 3.2.1 + '@smithy/util-utf8': 3.0.0 + tslib: 2.6.3 + + '@smithy/credential-provider-imds@3.2.5': + dependencies: + '@smithy/node-config-provider': 3.1.9 + '@smithy/property-provider': 3.1.8 + '@smithy/types': 3.6.0 + '@smithy/url-parser': 3.0.8 + tslib: 2.6.3 + + '@smithy/eventstream-codec@1.1.0': + dependencies: + '@aws-crypto/crc32': 3.0.0 + '@smithy/types': 1.2.0 + '@smithy/util-hex-encoding': 1.1.0 + tslib: 2.6.3 + + '@smithy/fetch-http-handler@3.2.9': + dependencies: + '@smithy/protocol-http': 4.1.5 + '@smithy/querystring-builder': 3.0.8 + '@smithy/types': 3.6.0 + '@smithy/util-base64': 3.0.0 + tslib: 2.6.3 + + '@smithy/fetch-http-handler@4.0.0': + dependencies: + '@smithy/protocol-http': 4.1.5 + '@smithy/querystring-builder': 3.0.8 + '@smithy/types': 3.6.0 + '@smithy/util-base64': 3.0.0 + tslib: 2.6.3 + + '@smithy/hash-node@3.0.8': + dependencies: + '@smithy/types': 3.6.0 + '@smithy/util-buffer-from': 3.0.0 + '@smithy/util-utf8': 3.0.0 + tslib: 2.6.3 + + '@smithy/invalid-dependency@3.0.8': + dependencies: + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/is-array-buffer@1.1.0': + dependencies: + tslib: 2.6.3 + + '@smithy/is-array-buffer@2.2.0': + dependencies: + tslib: 2.6.3 + + '@smithy/is-array-buffer@3.0.0': + dependencies: + tslib: 2.6.3 + + '@smithy/middleware-content-length@3.0.10': + dependencies: + '@smithy/protocol-http': 4.1.5 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/middleware-endpoint@3.2.1': + dependencies: + '@smithy/core': 2.5.1 + '@smithy/middleware-serde': 3.0.8 + '@smithy/node-config-provider': 3.1.9 + '@smithy/shared-ini-file-loader': 3.1.9 + '@smithy/types': 3.6.0 + '@smithy/url-parser': 3.0.8 + '@smithy/util-middleware': 3.0.8 + tslib: 2.6.3 + + '@smithy/middleware-retry@3.0.25': + dependencies: + '@smithy/node-config-provider': 3.1.9 + '@smithy/protocol-http': 4.1.5 + '@smithy/service-error-classification': 3.0.8 + '@smithy/smithy-client': 3.4.2 + '@smithy/types': 3.6.0 + '@smithy/util-middleware': 3.0.8 + '@smithy/util-retry': 3.0.8 + tslib: 2.6.3 + uuid: 9.0.1 + + '@smithy/middleware-serde@3.0.8': + dependencies: + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/middleware-stack@3.0.8': + dependencies: + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/node-config-provider@3.1.9': + dependencies: + '@smithy/property-provider': 3.1.8 + '@smithy/shared-ini-file-loader': 3.1.9 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/node-http-handler@3.2.5': + dependencies: + '@smithy/abort-controller': 3.1.6 + '@smithy/protocol-http': 4.1.5 + '@smithy/querystring-builder': 3.0.8 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/property-provider@3.1.8': + dependencies: + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/protocol-http@1.2.0': + dependencies: + '@smithy/types': 1.2.0 + tslib: 2.6.3 + + '@smithy/protocol-http@4.1.5': + dependencies: + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/querystring-builder@3.0.8': + dependencies: + '@smithy/types': 3.6.0 + '@smithy/util-uri-escape': 3.0.0 + tslib: 2.6.3 + + '@smithy/querystring-parser@3.0.8': + dependencies: + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/service-error-classification@3.0.8': + dependencies: + '@smithy/types': 3.6.0 + + '@smithy/shared-ini-file-loader@3.1.9': + dependencies: + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/signature-v4@1.1.0': + dependencies: + '@smithy/eventstream-codec': 1.1.0 + '@smithy/is-array-buffer': 1.1.0 + '@smithy/types': 1.2.0 + '@smithy/util-hex-encoding': 1.1.0 + '@smithy/util-middleware': 1.1.0 + '@smithy/util-uri-escape': 1.1.0 + '@smithy/util-utf8': 1.1.0 + tslib: 2.6.3 + + '@smithy/signature-v4@4.2.1': + dependencies: + '@smithy/is-array-buffer': 3.0.0 + '@smithy/protocol-http': 4.1.5 + '@smithy/types': 3.6.0 + '@smithy/util-hex-encoding': 3.0.0 + '@smithy/util-middleware': 3.0.8 + '@smithy/util-uri-escape': 3.0.0 + '@smithy/util-utf8': 3.0.0 + tslib: 2.6.3 + + '@smithy/smithy-client@3.4.2': + dependencies: + '@smithy/core': 2.5.1 + '@smithy/middleware-endpoint': 3.2.1 + '@smithy/middleware-stack': 3.0.8 + '@smithy/protocol-http': 4.1.5 + '@smithy/types': 3.6.0 + '@smithy/util-stream': 3.2.1 + tslib: 2.6.3 + + '@smithy/types@1.2.0': + dependencies: + tslib: 2.6.3 + + '@smithy/types@3.6.0': + dependencies: + tslib: 2.6.3 + + '@smithy/url-parser@3.0.8': + dependencies: + '@smithy/querystring-parser': 3.0.8 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/util-base64@3.0.0': + dependencies: + '@smithy/util-buffer-from': 3.0.0 + '@smithy/util-utf8': 3.0.0 + tslib: 2.6.3 + + '@smithy/util-body-length-browser@3.0.0': + dependencies: + tslib: 2.6.3 + + '@smithy/util-body-length-node@3.0.0': + dependencies: + tslib: 2.6.3 + + '@smithy/util-buffer-from@1.1.0': + dependencies: + '@smithy/is-array-buffer': 1.1.0 + tslib: 2.6.3 + + '@smithy/util-buffer-from@2.2.0': + dependencies: + '@smithy/is-array-buffer': 2.2.0 + tslib: 2.6.3 + + '@smithy/util-buffer-from@3.0.0': + dependencies: + '@smithy/is-array-buffer': 3.0.0 + tslib: 2.6.3 + + '@smithy/util-config-provider@3.0.0': + dependencies: + tslib: 2.6.3 + + '@smithy/util-defaults-mode-browser@3.0.25': + dependencies: + '@smithy/property-provider': 3.1.8 + '@smithy/smithy-client': 3.4.2 + '@smithy/types': 3.6.0 + bowser: 2.11.0 + tslib: 2.6.3 + + '@smithy/util-defaults-mode-node@3.0.25': + dependencies: + '@smithy/config-resolver': 3.0.10 + '@smithy/credential-provider-imds': 3.2.5 + '@smithy/node-config-provider': 3.1.9 + '@smithy/property-provider': 3.1.8 + '@smithy/smithy-client': 3.4.2 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/util-endpoints@2.1.4': + dependencies: + '@smithy/node-config-provider': 3.1.9 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/util-hex-encoding@1.1.0': + dependencies: + tslib: 2.6.3 + + '@smithy/util-hex-encoding@3.0.0': + dependencies: + tslib: 2.6.3 + + '@smithy/util-middleware@1.1.0': + dependencies: + tslib: 2.6.3 + + '@smithy/util-middleware@3.0.8': + dependencies: + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/util-retry@3.0.8': + dependencies: + '@smithy/service-error-classification': 3.0.8 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + + '@smithy/util-stream@3.2.1': + dependencies: + '@smithy/fetch-http-handler': 4.0.0 + '@smithy/node-http-handler': 3.2.5 + '@smithy/types': 3.6.0 + '@smithy/util-base64': 3.0.0 + '@smithy/util-buffer-from': 3.0.0 + '@smithy/util-hex-encoding': 3.0.0 + '@smithy/util-utf8': 3.0.0 + tslib: 2.6.3 + + '@smithy/util-uri-escape@1.1.0': + dependencies: + tslib: 2.6.3 + + '@smithy/util-uri-escape@3.0.0': + dependencies: + tslib: 2.6.3 + + '@smithy/util-utf8@1.1.0': + dependencies: + '@smithy/util-buffer-from': 1.1.0 + tslib: 2.6.3 + + '@smithy/util-utf8@2.3.0': + dependencies: + '@smithy/util-buffer-from': 2.2.0 + tslib: 2.6.3 + + '@smithy/util-utf8@3.0.0': + dependencies: + '@smithy/util-buffer-from': 3.0.0 + tslib: 2.6.3 + + '@smithy/util-waiter@3.1.7': + dependencies: + '@smithy/abort-controller': 3.1.6 + '@smithy/types': 3.6.0 + tslib: 2.6.3 + '@stdlib/assert-has-own-property@0.0.7': {} '@stdlib/assert-has-symbol-support@0.0.8': @@ -5702,6 +6922,8 @@ snapshots: bottleneck@2.19.5: {} + bowser@2.11.0: {} + brace-expansion@1.1.11: dependencies: balanced-match: 1.0.2 @@ -5851,6 +7073,25 @@ snapshots: co@4.6.0: {} + cohere-ai@7.14.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)): + dependencies: + '@aws-sdk/client-sagemaker': 3.679.0 + '@aws-sdk/credential-providers': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)) + '@aws-sdk/protocol-http': 3.374.0 + '@aws-sdk/signature-v4': 3.374.0 + form-data: 4.0.0 + form-data-encoder: 4.0.2 + formdata-node: 6.0.3 + js-base64: 3.7.2 + node-fetch: 2.7.0 + qs: 6.11.2 + readable-stream: 4.5.2 + url-join: 4.0.1 + transitivePeerDependencies: + - '@aws-sdk/client-sso-oidc' + - aws-crt + - encoding + cohere@1.1.1: {} collect-v8-coverage@1.0.2: {} @@ -6247,6 +7488,10 @@ snapshots: fast-safe-stringify@2.1.1: {} + fast-xml-parser@4.4.1: + dependencies: + strnum: 1.0.5 + fb-watchman@2.0.2: dependencies: bser: 2.1.1 @@ -6300,6 +7545,8 @@ snapshots: form-data-encoder@1.7.2: {} + form-data-encoder@4.0.2: {} + form-data@4.0.0: dependencies: asynckit: 0.4.0 @@ -6311,6 +7558,8 @@ snapshots: node-domexception: 1.0.0 web-streams-polyfill: 4.0.0-beta.3 + formdata-node@6.0.3: {} + formdata-polyfill@4.0.10: dependencies: fetch-blob: 3.2.0 @@ -7012,6 +8261,8 @@ snapshots: joplin-turndown-plugin-gfm@1.0.12: {} + js-base64@3.7.2: {} + js-beautify@1.15.1: dependencies: config-chain: 1.1.13 @@ -7074,17 +8325,17 @@ snapshots: kuler@2.0.0: {} - langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0): + langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0): dependencies: - '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) - '@langchain/openai': 0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)) - '@langchain/textsplitters': 0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) + '@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) + '@langchain/openai': 0.2.1(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)) + '@langchain/textsplitters': 0.0.3(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) binary-extensions: 2.3.0 js-tiktoken: 1.0.12 js-yaml: 4.1.0 jsonpointer: 5.0.1 langchainhub: 0.0.11 - langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) + langsmith: 0.1.34(zyeavx4tfqw3smbbpiinhfxxeu) ml-distance: 4.0.1 openapi-types: 12.1.3 p-retry: 4.6.2 @@ -7093,14 +8344,16 @@ snapshots: zod: 3.23.8 zod-to-json-schema: 3.23.1(zod@3.23.8) optionalDependencies: + '@aws-sdk/credential-provider-node': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0) '@supabase/supabase-js': 2.44.2 axios: 1.7.2 cheerio: 1.0.0-rc.12 + fast-xml-parser: 4.4.1 handlebars: 4.7.8 html-to-text: 9.0.5 ioredis: 5.4.1 mammoth: 1.7.2 - mongodb: 6.6.2(socks@2.8.3) + mongodb: 6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3) pdf-parse: 1.1.1 puppeteer: 22.12.1(typescript@5.4.5) redis: 4.6.14 @@ -7111,7 +8364,7 @@ snapshots: langchainhub@0.0.11: {} - langsmith@0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)): + langsmith@0.1.34(zyeavx4tfqw3smbbpiinhfxxeu): dependencies: '@types/uuid': 9.0.8 commander: 10.0.1 @@ -7120,8 +8373,8 @@ snapshots: p-retry: 4.6.2 uuid: 9.0.1 optionalDependencies: - '@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) - langchain: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0) + '@langchain/core': 0.2.12(langchain@0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.57.0(zod@3.23.8)) + langchain: 0.2.8(@aws-sdk/credential-provider-node@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0))(@aws-sdk/client-sts@3.679.0))(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(fast-xml-parser@4.4.1)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3))(openai@4.57.0(zod@3.23.8))(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0) openai: 4.57.0(zod@3.23.8) languagedetect@2.0.0: {} @@ -7325,19 +8578,20 @@ snapshots: '@types/whatwg-url': 11.0.5 whatwg-url: 13.0.0 - mongodb@6.6.2(socks@2.8.3): + mongodb@6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3): dependencies: '@mongodb-js/saslprep': 1.1.7 bson: 6.8.0 mongodb-connection-string-url: 3.0.1 optionalDependencies: + '@aws-sdk/credential-providers': 3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)) socks: 2.8.3 - mongoose@8.4.4(socks@2.8.3): + mongoose@8.4.4(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3): dependencies: bson: 6.8.0 kareem: 2.6.3 - mongodb: 6.6.2(socks@2.8.3) + mongodb: 6.6.2(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3) mpath: 0.9.0 mquery: 5.0.0 ms: 2.1.3 @@ -7386,7 +8640,7 @@ snapshots: natural-compare@1.4.0: {} - natural@7.0.7(socks@2.8.3): + natural@7.0.7(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3): dependencies: afinn-165: 1.0.4 afinn-165-financialmarketnews: 3.0.0 @@ -7394,7 +8648,7 @@ snapshots: dotenv: 16.4.5 http-server: 14.1.1 memjs: 1.3.2 - mongoose: 8.4.4(socks@2.8.3) + mongoose: 8.4.4(@aws-sdk/credential-providers@3.679.0(@aws-sdk/client-sso-oidc@3.679.0(@aws-sdk/client-sts@3.679.0)))(socks@2.8.3) pg: 8.12.0 redis: 4.6.14 safe-stable-stringify: 2.4.3 @@ -7818,6 +9072,10 @@ snapshots: dependencies: side-channel: 1.0.6 + qs@6.11.2: + dependencies: + side-channel: 1.0.6 + qs@6.12.2: dependencies: side-channel: 1.0.6 @@ -8161,6 +9419,8 @@ snapshots: '@types/node': 20.14.1 qs: 6.12.2 + strnum@1.0.5: {} + supabase@1.172.2: dependencies: bin-links: 4.0.4 diff --git a/apps/api/requests.http b/apps/api/requests.http index 4ce40b2c..0e3b9206 100644 --- a/apps/api/requests.http +++ b/apps/api/requests.http @@ -46,4 +46,32 @@ content-type: application/json @batchScrapeId = {{batchScrape.response.body.$.id}} # @name batchScrapeStatus GET {{baseUrl}}/v1/crawl/{{batchScrapeId}} HTTP/1.1 -Authorization: Bearer {{$dotenv TEST_API_KEY}} \ No newline at end of file +Authorization: Bearer {{$dotenv TEST_API_KEY}} + + +### Map Website +# @name map +POST {{baseUrl}}/v1/map HTTP/1.1 +Authorization: Bearer {{$dotenv TEST_API_KEY}} +content-type: application/json + +{ + "url": "firecrawl.dev", + "sitemapOnly": true +} + +### Extract +# @name extract +POST {{baseUrl}}/v1/extract HTTP/1.1 +Authorization: Bearer {{$dotenv TEST_API_KEY}} +content-type: application/json + +{ + "urls": ["firecrawl.dev"], + "prompt": "What is the title, description and main product of the page?", + "schema": { + "title": "string", + "description": "string", + "mainProduct": "string" + } +} \ No newline at end of file diff --git a/apps/api/src/__tests__/e2e_extract/index.test.ts b/apps/api/src/__tests__/e2e_extract/index.test.ts new file mode 100644 index 00000000..525ff6a2 --- /dev/null +++ b/apps/api/src/__tests__/e2e_extract/index.test.ts @@ -0,0 +1,249 @@ +import request from "supertest"; +import dotenv from "dotenv"; +import { + FirecrawlCrawlResponse, + FirecrawlCrawlStatusResponse, + FirecrawlScrapeResponse, +} from "../../types"; + +dotenv.config(); +const TEST_URL = "http://127.0.0.1:3002"; + +describe("E2E Tests for Extract API Routes", () => { + it.concurrent("should return authors of blog posts on firecrawl.dev", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["https://firecrawl.dev/*"], + prompt: "Who are the authors of the blog posts?", + schema: { + type: "object", + properties: { authors: { type: "array", items: { type: "string" } } }, + }, + }); + + console.log(response.body); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + expect(response.body.data).toHaveProperty("authors"); + + let gotItRight = 0; + for (const author of response.body.data?.authors) { + if (author.includes("Caleb Peffer")) gotItRight++; + if (author.includes("Gergő Móricz")) gotItRight++; + if (author.includes("Eric Ciarla")) gotItRight++; + if (author.includes("Nicolas Camara")) gotItRight++; + if (author.includes("Jon")) gotItRight++; + if (author.includes("Wendong")) gotItRight++; + + } + + expect(gotItRight).toBeGreaterThan(1); + }, 60000); + + it.concurrent("should return founders of firecrawl.dev (allowExternalLinks = true)", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["firecrawl.dev/*"], + prompt: "Who are the founders of the company?", + allowExternalLinks: true, + schema: { + type: "object", + properties: { founders: { type: "array", items: { type: "string" } } }, + }, + }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + expect(response.body.data).toHaveProperty("founders"); + + console.log(response.body.data?.founders); + let gotItRight = 0; + for (const founder of response.body.data?.founders) { + if (founder.includes("Caleb")) gotItRight++; + if (founder.includes("Eric")) gotItRight++; + if (founder.includes("Nicolas")) gotItRight++; + if (founder.includes("nick")) gotItRight++; + if (founder.includes("eric")) gotItRight++; + if (founder.includes("jon-noronha")) gotItRight++; + + } + + expect(gotItRight).toBeGreaterThanOrEqual(2); + }, 60000); + + it.concurrent("should return hiring opportunities on firecrawl.dev (allowExternalLinks = true)", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["https://firecrawl.dev/*"], + prompt: "What are they hiring for?", + allowExternalLinks: true, + schema: { + type: "array", + items: { + type: "string" + }, + required: ["items"] + }, + }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + console.log(response.body.data); + + let gotItRight = 0; + for (const hiring of response.body.data?.items) { + if (hiring.includes("Developer Support Engineer")) gotItRight++; + if (hiring.includes("Dev Ops Engineer")) gotItRight++; + if (hiring.includes("Founding Web Automation Engineer")) gotItRight++; + } + + expect(gotItRight).toBeGreaterThan(2); + }, 60000); + + it.concurrent("should return PCI DSS compliance for Fivetran", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["fivetran.com/*"], + prompt: "Does Fivetran have PCI DSS compliance?", + allowExternalLinks: true, + schema: { + type: "object", + properties: { + pciDssCompliance: { type: "boolean" } + } + }, + }); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + expect(response.body.data?.pciDssCompliance).toBe(true); + }, 60000); + + it.concurrent("should return Azure Data Connectors for Fivetran", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["fivetran.com/*"], + prompt: "What are the Azure Data Connectors they offer?", + schema: { + type: "array", + items: { + type: "object", + properties: { + connector: { type: "string" }, + description: { type: "string" }, + supportsCaptureDelete: { type: "boolean" } + } + } + } + }) + + console.log(response.body); + // expect(response.statusCode).toBe(200); + // expect(response.body).toHaveProperty("data"); + // expect(response.body.data?.pciDssCompliance).toBe(true); + }, 60000); + + it.concurrent("should return Greenhouse Applicant Tracking System for Abnormal Security", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["https://careers.abnormalsecurity.com/jobs/6119456003?gh_jid=6119456003"], + prompt: "what applicant tracking system is this company using?", + schema: { + type: "object", + properties: { + isGreenhouseATS: { type: "boolean" }, + answer: { type: "string" } + } + }, + allowExternalLinks: true + }) + + console.log(response.body); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + expect(response.body.data?.isGreenhouseATS).toBe(true); + }, 60000); + + it.concurrent("should return mintlify api components", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["https://mintlify.com/docs/*"], + prompt: "what are the 4 API components?", + schema: { + type: "array", + items: { + type: "object", + properties: { + component: { type: "string" } + } + }, + required: ["items"] + }, + allowExternalLinks: true + }) + + console.log(response.body.data?.items); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + expect(response.body.data?.items.length).toBe(4); + let gotItRight = 0; + for (const component of response.body.data?.items) { + if (component.component.toLowerCase().includes("parameter")) gotItRight++; + if (component.component.toLowerCase().includes("response")) gotItRight++; + if (component.component.toLowerCase().includes("expandable")) gotItRight++; + if (component.component.toLowerCase().includes("sticky")) gotItRight++; + if (component.component.toLowerCase().includes("examples")) gotItRight++; + + } + expect(gotItRight).toBeGreaterThan(2); + }, 60000); + + it.concurrent("should return information about Eric Ciarla", async () => { + const response = await request(TEST_URL) + .post("/v1/extract") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + urls: ["https://ericciarla.com/"], + prompt: "Who is Eric Ciarla? Where does he work? Where did he go to school?", + schema: { + type: "object", + properties: { + name: { type: "string" }, + work: { type: "string" }, + education: { type: "string" } + }, + required: ["name", "work", "education"] + }, + allowExternalLinks: true + }) + + console.log(response.body.data); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("data"); + expect(response.body.data?.name).toBe("Eric Ciarla"); + expect(response.body.data?.work).toBeDefined(); + expect(response.body.data?.education).toBeDefined(); + }, 60000); + + + +}); diff --git a/apps/api/src/__tests__/e2e_map/index.test.ts b/apps/api/src/__tests__/e2e_map/index.test.ts new file mode 100644 index 00000000..b065dff1 --- /dev/null +++ b/apps/api/src/__tests__/e2e_map/index.test.ts @@ -0,0 +1,117 @@ +import request from "supertest"; +import dotenv from "dotenv"; + +dotenv.config(); +const TEST_URL = "http://127.0.0.1:3002"; + +describe("E2E Tests for Map API Routes", () => { + it.concurrent( + "(feat-search)should return links containing 'smart-crawl'", + async () => { + const response = await request(TEST_URL) + .post("/v1/map") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + url: "https://firecrawl.dev", + sitemapOnly: false, + search: "smart-crawl", + }); + + console.log(response.body); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("links"); + expect(response.body.links.length).toBeGreaterThan(0); + expect(response.body.links[0]).toContain("firecrawl.dev/smart-crawl"); + }, + 60000 + ); + + it.concurrent( + "(feat-subdomains) should return mapped links for firecrawl.dev with subdomains included", + async () => { + const response = await request(TEST_URL) + .post("/v1/map") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + url: "https://firecrawl.dev", + sitemapOnly: false, + includeSubdomains: true, + }); + + console.log(response.body); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("links"); + expect(response.body.links.length).toBeGreaterThan(0); + expect(response.body.links[response.body.links.length - 1]).toContain( + "docs.firecrawl.dev" + ); + }, + 60000 + ); + + it.concurrent( + "(feat-sitemap-only) should return mapped links for firecrawl.dev with sitemap only", + async () => { + const response = await request(TEST_URL) + .post("/v1/map") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + url: "https://firecrawl.dev", + sitemapOnly: true, + }); + + console.log(response.body); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("links"); + expect(response.body.links.length).toBeGreaterThan(0); + expect(response.body.links[response.body.links.length - 1]).not.toContain( + "docs.firecrawl.dev" + ); + }, + 60000 + ); + + it.concurrent( + "(feat-limit) should return mapped links for firecrawl.dev with a limit", + async () => { + const response = await request(TEST_URL) + .post("/v1/map") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + url: "https://firecrawl.dev", + sitemapOnly: false, + limit: 10, + }); + + console.log(response.body); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("links"); + expect(response.body.links.length).toBeLessThanOrEqual(10); + }, + 60000 + ); + + it.concurrent( + "(feat-sitemap-large) should return more than 1900 links for geekflare sitemap", + async () => { + const response = await request(TEST_URL) + .post("/v1/map") + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .send({ + url: "https://geekflare.com/sitemap_index.xml", + sitemapOnly: true, + }); + + console.log(response.body); + expect(response.statusCode).toBe(200); + expect(response.body).toHaveProperty("links"); + expect(response.body.links.length).toBeGreaterThan(1900); + }, + 60000 + ); +}); diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts new file mode 100644 index 00000000..e5f2595c --- /dev/null +++ b/apps/api/src/controllers/v1/extract.ts @@ -0,0 +1,265 @@ +import { Request, Response } from "express"; +import { + // Document, + RequestWithAuth, + ExtractRequest, + extractRequestSchema, + ExtractResponse, + MapDocument, + scrapeOptions, +} from "./types"; +import { Document } from "../../lib/entities"; +import Redis from "ioredis"; +import { configDotenv } from "dotenv"; +import { performRanking } from "../../lib/ranker"; +import { billTeam } from "../../services/billing/credit_billing"; +import { logJob } from "../../services/logging/log_job"; +import { logger } from "../../lib/logger"; +import { getScrapeQueue } from "../../services/queue-service"; +import { waitForJob } from "../../services/queue-jobs"; +import { addScrapeJob } from "../../services/queue-jobs"; +import { PlanType } from "../../types"; +import { getJobPriority } from "../../lib/job-priority"; +import { generateOpenAICompletions } from "../../scraper/scrapeURL/transformers/llmExtract"; +import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; +import { getMapResults } from "./map"; +import { buildDocument } from "../../lib/extract/build-document"; + +configDotenv(); +const redis = new Redis(process.env.REDIS_URL!); + +const MAX_EXTRACT_LIMIT = 100; +const MAX_RANKING_LIMIT = 10; +const INITIAL_SCORE_THRESHOLD = 0.75; +const FALLBACK_SCORE_THRESHOLD = 0.5; +const MIN_REQUIRED_LINKS = 1; + +/** + * Extracts data from the provided URLs based on the request parameters. + * Currently in beta. + * @param req - The request object containing authentication and extraction details. + * @param res - The response object to send the extraction results. + * @returns A promise that resolves when the extraction process is complete. + */ +export async function extractController( + req: RequestWithAuth<{}, ExtractResponse, ExtractRequest>, + res: Response +) { + const selfHosted = process.env.USE_DB_AUTHENTICATION !== "true"; + + req.body = extractRequestSchema.parse(req.body); + + const id = crypto.randomUUID(); + let links: string[] = []; + let docs: Document[] = []; + const earlyReturn = false; + + // Process all URLs in parallel + const urlPromises = req.body.urls.map(async (url) => { + if (url.includes('/*') || req.body.allowExternalLinks) { + // Handle glob pattern URLs + const baseUrl = url.replace('/*', ''); + // const pathPrefix = baseUrl.split('/').slice(3).join('/'); // Get path after domain if any + + const allowExternalLinks = req.body.allowExternalLinks ?? true; + let urlWithoutWww = baseUrl.replace("www.", ""); + let mapUrl = req.body.prompt && allowExternalLinks + ? `${req.body.prompt} ${urlWithoutWww}` + : req.body.prompt ? `${req.body.prompt} site:${urlWithoutWww}` + : `site:${urlWithoutWww}`; + + const mapResults = await getMapResults({ + url: baseUrl, + search: req.body.prompt, + teamId: req.auth.team_id, + plan: req.auth.plan, + allowExternalLinks, + origin: req.body.origin, + limit: req.body.limit, + // If we're self-hosted, we don't want to ignore the sitemap, due to our fire-engine mapping + ignoreSitemap: !selfHosted ? true : false, + includeMetadata: true, + includeSubdomains: req.body.includeSubdomains, + }); + + let mappedLinks = mapResults.links as MapDocument[]; + // Limit number of links to MAX_EXTRACT_LIMIT + mappedLinks = mappedLinks.slice(0, MAX_EXTRACT_LIMIT); + + let mappedLinksRerank = mappedLinks.map(x => `url: ${x.url}, title: ${x.title}, description: ${x.description}`); + + // Filter by path prefix if present + // wrong + // if (pathPrefix) { + // mappedLinks = mappedLinks.filter(x => x.url && x.url.includes(`/${pathPrefix}/`)); + // } + + if (req.body.prompt) { + // Get similarity scores between the search query and each link's context + const linksAndScores = await performRanking(mappedLinksRerank, mappedLinks.map(l => l.url), mapUrl); + + // First try with high threshold + let filteredLinks = filterAndProcessLinks(mappedLinks, linksAndScores, INITIAL_SCORE_THRESHOLD); + + // If we don't have enough high-quality links, try with lower threshold + if (filteredLinks.length < MIN_REQUIRED_LINKS) { + logger.info(`Only found ${filteredLinks.length} links with score > ${INITIAL_SCORE_THRESHOLD}. Trying lower threshold...`); + filteredLinks = filterAndProcessLinks(mappedLinks, linksAndScores, FALLBACK_SCORE_THRESHOLD); + + if (filteredLinks.length === 0) { + // If still no results, take top N results regardless of score + logger.warn(`No links found with score > ${FALLBACK_SCORE_THRESHOLD}. Taking top ${MIN_REQUIRED_LINKS} results.`); + filteredLinks = linksAndScores + .sort((a, b) => b.score - a.score) + .slice(0, MIN_REQUIRED_LINKS) + .map(x => mappedLinks.find(link => link.url === x.link)) + .filter((x): x is MapDocument => x !== undefined && x.url !== undefined && !isUrlBlocked(x.url)); + } + } + + mappedLinks = filteredLinks.slice(0, MAX_RANKING_LIMIT); + } + + return mappedLinks.map(x => x.url) as string[]; + + } else { + // Handle direct URLs without glob pattern + if (!isUrlBlocked(url)) { + return [url]; + } + return []; + } + }); + + // Wait for all URL processing to complete and flatten results + const processedUrls = await Promise.all(urlPromises); + links.push(...processedUrls.flat()); + + if (links.length === 0) { + return res.status(400).json({ + success: false, + error: "No valid URLs found to scrape. Try adjusting your search criteria or including more URLs." + }); + } + + // Scrape all links in parallel with retries + const scrapePromises = links.map(async (url) => { + const origin = req.body.origin || "api"; + const timeout = Math.floor((req.body.timeout || 40000) * 0.7) || 30000; // Use 70% of total timeout for individual scrapes + const jobId = crypto.randomUUID(); + + const jobPriority = await getJobPriority({ + plan: req.auth.plan as PlanType, + team_id: req.auth.team_id, + basePriority: 10, + }); + + await addScrapeJob( + { + url, + mode: "single_urls", + team_id: req.auth.team_id, + scrapeOptions: scrapeOptions.parse({}), + internalOptions: {}, + plan: req.auth.plan!, + origin, + is_scrape: true, + }, + {}, + jobId, + jobPriority + ); + + try { + const doc = await waitForJob(jobId, timeout); + await getScrapeQueue().remove(jobId); + if (earlyReturn) { + return null; + } + return doc; + } catch (e) { + logger.error(`Error in scrapeController: ${e}`); + if (e instanceof Error && (e.message.startsWith("Job wait") || e.message === "timeout")) { + throw { + status: 408, + error: "Request timed out" + }; + } else { + throw { + status: 500, + error: `(Internal server error) - ${(e && e.message) ? e.message : e}` + }; + } + } + }); + + try { + const results = await Promise.all(scrapePromises); + docs.push(...results.filter(doc => doc !== null).map(x => x!)); + } catch (e) { + return res.status(e.status).json({ + success: false, + error: e.error + }); + } + + const completions = await generateOpenAICompletions( + logger.child({ method: "extractController/generateOpenAICompletions" }), + { + mode: "llm", + systemPrompt: "Always prioritize using the provided content to answer the question. Do not make up an answer. Be concise and follow the schema if provided.", + prompt: req.body.prompt, + schema: req.body.schema, + }, + docs.map(x => buildDocument(x)).join('\n') + ); + + // TODO: change this later + // While on beta, we're billing 5 credits per link discovered/scraped. + billTeam(req.auth.team_id, req.acuc?.sub_id, links.length * 5).catch(error => { + logger.error(`Failed to bill team ${req.auth.team_id} for ${links.length * 5} credits: ${error}`); + }); + + let data = completions.extract ?? {}; + let warning = completions.warning; + + logJob({ + job_id: id, + success: true, + message: "Extract completed", + num_docs: 1, + docs: data, + time_taken: (new Date().getTime() - Date.now()) / 1000, + team_id: req.auth.team_id, + mode: "extract", + url: req.body.urls.join(", "), + scrapeOptions: req.body, + origin: req.body.origin ?? "api", + num_tokens: completions.numTokens ?? 0 + }); + + return res.status(200).json({ + success: true, + data: data, + scrape_id: id, + warning: warning + }); +} + +/** + * Filters links based on their similarity score to the search query. + * @param mappedLinks - The list of mapped links to filter. + * @param linksAndScores - The list of links and their similarity scores. + * @param threshold - The score threshold to filter by. + * @returns The filtered list of links. + */ +function filterAndProcessLinks( + mappedLinks: MapDocument[], + linksAndScores: { link: string, linkWithContext: string, score: number, originalIndex: number }[], + threshold: number +): MapDocument[] { + return linksAndScores + .filter(x => x.score > threshold) + .map(x => mappedLinks.find(link => link.url === x.link)) + .filter((x): x is MapDocument => x !== undefined && x.url !== undefined && !isUrlBlocked(x.url)); +} \ No newline at end of file diff --git a/apps/api/src/controllers/v1/map.ts b/apps/api/src/controllers/v1/map.ts index ab9a5ed7..9a0a5eb6 100644 --- a/apps/api/src/controllers/v1/map.ts +++ b/apps/api/src/controllers/v1/map.ts @@ -1,6 +1,6 @@ import { Response } from "express"; import { v4 as uuidv4 } from "uuid"; -import { mapRequestSchema, RequestWithAuth, scrapeOptions } from "./types"; +import { MapDocument, mapRequestSchema, RequestWithAuth, scrapeOptions } from "./types"; import { crawlToCrawler, StoredCrawl } from "../../lib/crawl-redis"; import { MapResponse, MapRequest } from "./types"; import { configDotenv } from "dotenv"; @@ -25,37 +25,61 @@ const MAX_MAP_LIMIT = 5000; // Max Links that "Smart /map" can return const MAX_FIRE_ENGINE_RESULTS = 1000; -export async function mapController( - req: RequestWithAuth<{}, MapResponse, MapRequest>, - res: Response -) { - const startTime = new Date().getTime(); - - req.body = mapRequestSchema.parse(req.body); - - const limit: number = req.body.limit ?? MAX_MAP_LIMIT; +interface MapResult { + success: boolean; + links: string[] | any[]; + scrape_id?: string; + job_id: string; + time_taken: number; +} +export async function getMapResults({ + url, + search, + limit = MAX_MAP_LIMIT, + ignoreSitemap = false, + includeSubdomains = true, + crawlerOptions = {}, + teamId, + plan, + origin, + includeMetadata = false, + allowExternalLinks +}: { + url: string; + search?: string; + limit?: number; + ignoreSitemap?: boolean; + includeSubdomains?: boolean; + crawlerOptions?: any; + teamId: string; + plan?: string; + origin?: string; + includeMetadata?: boolean; + allowExternalLinks?: boolean; +}): Promise { const id = uuidv4(); - let links: string[] = [req.body.url]; + let links: string[] = [url]; + let mapResults: MapDocument[] = []; const sc: StoredCrawl = { - originUrl: req.body.url, + originUrl: url, crawlerOptions: { - ...req.body, - limit: req.body.sitemapOnly ? 10000000 : limit, + ...crawlerOptions, + limit: crawlerOptions.sitemapOnly ? 10000000 : limit, scrapeOptions: undefined, }, scrapeOptions: scrapeOptions.parse({}), internalOptions: {}, - team_id: req.auth.team_id, + team_id: teamId, createdAt: Date.now(), - plan: req.auth.plan, + plan: plan, }; const crawler = crawlToCrawler(id, sc); // If sitemapOnly is true, only get links from sitemap - if (req.body.sitemapOnly) { + if (crawlerOptions.sitemapOnly) { const sitemap = await crawler.tryGetSitemap(true, true); if (sitemap !== null) { sitemap.forEach((x) => { @@ -73,19 +97,18 @@ export async function mapController( // links = links.slice(1, limit); // don't slice, unnecessary } } else { - let urlWithoutWww = req.body.url.replace("www.", ""); + let urlWithoutWww = url.replace("www.", ""); - let mapUrl = req.body.search - ? `"${req.body.search}" site:${urlWithoutWww}` - : `site:${req.body.url}`; + let mapUrl = search && allowExternalLinks + ? `${search} ${urlWithoutWww}` + : search ? `${search} site:${urlWithoutWww}` + : `site:${url}`; const resultsPerPage = 100; - const maxPages = Math.ceil( - Math.min(MAX_FIRE_ENGINE_RESULTS, limit) / resultsPerPage - ); + const maxPages = Math.ceil(Math.min(MAX_FIRE_ENGINE_RESULTS, limit) / resultsPerPage); const cacheKey = `fireEngineMap:${mapUrl}`; - const cachedResult = null; + const cachedResult = await redis.get(cacheKey); let allResults: any[] = []; let pagePromises: Promise[] = []; @@ -110,7 +133,7 @@ export async function mapController( // Parallelize sitemap fetch with serper search const [sitemap, ...searchResults] = await Promise.all([ - req.body.ignoreSitemap ? null : crawler.tryGetSitemap(true), + ignoreSitemap ? null : crawler.tryGetSitemap(true), ...(cachedResult ? [] : pagePromises), ]); @@ -124,7 +147,7 @@ export async function mapController( }); } - let mapResults = allResults + mapResults = allResults .flat() .filter((result) => result !== null && result !== undefined); @@ -134,7 +157,7 @@ export async function mapController( } if (mapResults.length > 0) { - if (req.body.search) { + if (search) { // Ensure all map results are first, maintaining their order links = [ mapResults[0].url, @@ -149,9 +172,8 @@ export async function mapController( } // Perform cosine similarity between the search query and the list of links - if (req.body.search) { - const searchQuery = req.body.search.toLowerCase(); - + if (search) { + const searchQuery = search.toLowerCase(); links = performCosineSimilarity(links, searchQuery); } @@ -166,95 +188,75 @@ export async function mapController( .filter((x) => x !== null) as string[]; // allows for subdomains to be included - links = links.filter((x) => isSameDomain(x, req.body.url)); + links = links.filter((x) => isSameDomain(x, url)); // if includeSubdomains is false, filter out subdomains - if (!req.body.includeSubdomains) { - links = links.filter((x) => isSameSubdomain(x, req.body.url)); + if (!includeSubdomains) { + links = links.filter((x) => isSameSubdomain(x, url)); } // remove duplicates that could be due to http/https or www links = removeDuplicateUrls(links); - links.slice(0, limit); } + const linksToReturn = crawlerOptions.sitemapOnly ? links : links.slice(0, limit); + + return { + success: true, + links: includeMetadata ? mapResults : linksToReturn, + scrape_id: origin?.includes("website") ? id : undefined, + job_id: id, + time_taken: (new Date().getTime() - Date.now()) / 1000, + }; +} + +export async function mapController( + req: RequestWithAuth<{}, MapResponse, MapRequest>, + res: Response +) { + req.body = mapRequestSchema.parse(req.body); + + const result = await getMapResults({ + url: req.body.url, + search: req.body.search, + limit: req.body.limit, + ignoreSitemap: req.body.ignoreSitemap, + includeSubdomains: req.body.includeSubdomains, + crawlerOptions: req.body, + origin: req.body.origin, + teamId: req.auth.team_id, + plan: req.auth.plan, + }); + + // Bill the team billTeam(req.auth.team_id, req.acuc?.sub_id, 1).catch((error) => { logger.error( `Failed to bill team ${req.auth.team_id} for 1 credit: ${error}` ); - // Optionally, you could notify an admin or add to a retry queue here }); - const endTime = new Date().getTime(); - const timeTakenInSeconds = (endTime - startTime) / 1000; - + // Log the job logJob({ - job_id: id, - success: links.length > 0, + job_id: result.job_id, + success: result.links.length > 0, message: "Map completed", - num_docs: links.length, - docs: links, - time_taken: timeTakenInSeconds, + num_docs: result.links.length, + docs: result.links, + time_taken: result.time_taken, team_id: req.auth.team_id, - mode: "map", + mode: "map", url: req.body.url, crawlerOptions: {}, scrapeOptions: {}, - origin: req.body.origin, + origin: req.body.origin ?? "api", num_tokens: 0, }); - return res.status(200).json({ - success: true, - links: links, - scrape_id: req.body.origin?.includes("website") ? id : undefined, - }); -} + const response = { + success: true as const, + links: result.links, + scrape_id: result.scrape_id + }; -// Subdomain sitemap url checking - -// // For each result, check for subdomains, get their sitemaps and add them to the links -// const processedUrls = new Set(); -// const processedSubdomains = new Set(); - -// for (const result of links) { -// let url; -// let hostParts; -// try { -// url = new URL(result); -// hostParts = url.hostname.split('.'); -// } catch (e) { -// continue; -// } - -// console.log("hostParts", hostParts); -// // Check if it's a subdomain (more than 2 parts, and not 'www') -// if (hostParts.length > 2 && hostParts[0] !== 'www') { -// const subdomain = hostParts[0]; -// console.log("subdomain", subdomain); -// const subdomainUrl = `${url.protocol}//${subdomain}.${hostParts.slice(-2).join('.')}`; -// console.log("subdomainUrl", subdomainUrl); - -// if (!processedSubdomains.has(subdomainUrl)) { -// processedSubdomains.add(subdomainUrl); - -// const subdomainCrawl = crawlToCrawler(id, { -// originUrl: subdomainUrl, -// crawlerOptions: legacyCrawlerOptions(req.body), -// pageOptions: {}, -// team_id: req.auth.team_id, -// createdAt: Date.now(), -// plan: req.auth.plan, -// }); -// const subdomainSitemap = await subdomainCrawl.tryGetSitemap(); -// if (subdomainSitemap) { -// subdomainSitemap.forEach((x) => { -// if (!processedUrls.has(x.url)) { -// processedUrls.add(x.url); -// links.push(x.url); -// } -// }); -// } -// } -// } -// } + return res.status(200).json(response); +} \ No newline at end of file diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index eefcd10e..f354c640 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -151,8 +151,25 @@ export const scrapeOptions = z.object({ }).strict(strictMessage) + export type ScrapeOptions = z.infer; +export const extractV1Options = z.object({ + urls: url.array(), + prompt: z.string().optional(), + schema: z.any().optional(), + limit: z.number().int().positive().finite().safe().optional(), + ignoreSitemap: z.boolean().default(false), + includeSubdomains: z.boolean().default(true), + allowExternalLinks: z.boolean().default(false), + origin: z.string().optional().default("api"), + timeout: z.number().int().positive().finite().safe().default(60000) +}).strict(strictMessage) + +export type ExtractV1Options = z.infer; +export const extractRequestSchema = extractV1Options; +export type ExtractRequest = z.infer; + export const scrapeRequestSchema = scrapeOptions.omit({ timeout: true }).extend({ url, origin: z.string().optional().default("api"), @@ -173,6 +190,8 @@ export const scrapeRequestSchema = scrapeOptions.omit({ timeout: true }).extend( return obj; }); + + export type ScrapeRequest = z.infer; export type ScrapeRequestInput = z.input; @@ -340,6 +359,21 @@ export interface ScrapeResponseRequestTest { error?: string; } +export type ExtractResponse = + | ErrorResponse + | { + success: true; + warning?: string; + data: z.infer; + scrape_id?: string; + }; + +export interface ExtractResponseRequestTest { + statusCode: number; + body: ExtractResponse; + error?: string; +} + export type CrawlResponse = | ErrorResponse | { @@ -496,6 +530,13 @@ export function fromLegacyCrawlerOptions(x: any): { crawlOptions: CrawlerOptions }; } + + +export interface MapDocument { + url: string; + title?: string; + description?: string; +} export function fromLegacyScrapeOptions(pageOptions: PageOptions, extractorOptions: ExtractorOptions | undefined, timeout: number | undefined): { scrapeOptions: ScrapeOptions, internalOptions: InternalOptions } { return { scrapeOptions: scrapeOptions.parse({ diff --git a/apps/api/src/lib/cache.ts b/apps/api/src/lib/cache.ts new file mode 100644 index 00000000..896d9429 --- /dev/null +++ b/apps/api/src/lib/cache.ts @@ -0,0 +1,50 @@ +import IORedis from "ioredis"; +import { ScrapeOptions } from "../controllers/v1/types"; +import { InternalOptions } from "../scraper/scrapeURL"; +import { logger as _logger } from "./logger"; +const logger = _logger.child({module: "cache"}); + +export const cacheRedis = process.env.CACHE_REDIS_URL ? new IORedis(process.env.CACHE_REDIS_URL, { + maxRetriesPerRequest: null, +}) : null; + +export function cacheKey(url: string, scrapeOptions: ScrapeOptions, internalOptions: InternalOptions): string | null { + if (!cacheRedis) return null; + + // these options disqualify a cache + if (internalOptions.v0CrawlOnlyUrls || internalOptions.forceEngine || internalOptions.v0UseFastMode || internalOptions.atsv + || (scrapeOptions.actions && scrapeOptions.actions.length > 0) + ) { + return null; + } + + return "cache:" + url + ":waitFor:" + scrapeOptions.waitFor; +} + +export type CacheEntry = { + url: string; + html: string; + statusCode: number; + error?: string; +}; + +export async function saveEntryToCache(key: string, entry: CacheEntry) { + if (!cacheRedis) return; + + try { + await cacheRedis.set(key, JSON.stringify(entry)); + } catch (error) { + logger.warn("Failed to save to cache", { key, error }); + } +} + +export async function getEntryFromCache(key: string): Promise { + if (!cacheRedis) return null; + + try { + return JSON.parse(await cacheRedis.get(key) ?? "null"); + } catch (error) { + logger.warn("Failed to get from cache", { key, error }); + return null; + } +} diff --git a/apps/api/src/lib/extract/build-document.ts b/apps/api/src/lib/extract/build-document.ts new file mode 100644 index 00000000..66417a07 --- /dev/null +++ b/apps/api/src/lib/extract/build-document.ts @@ -0,0 +1,15 @@ +import { Document } from "../../controllers/v1/types"; + +export function buildDocument(document: Document): string { + const metadata = document.metadata; + const markdown = document.markdown; + + // for each key in the metadata allow up to 250 characters + const metadataString = Object.entries(metadata).map(([key, value]) => { + return `${key}: ${value?.toString().slice(0, 250)}`; + }).join('\n'); + + const documentMetadataString = `\n- - - - - Page metadata - - - - -\n${metadataString}`; + const documentString = `${markdown}${documentMetadataString}`; + return documentString; +} diff --git a/apps/api/src/lib/extract/completions.ts b/apps/api/src/lib/extract/completions.ts new file mode 100644 index 00000000..34a5a215 --- /dev/null +++ b/apps/api/src/lib/extract/completions.ts @@ -0,0 +1,124 @@ +// use llmExtract.ts instead + +// import OpenAI from "openai"; +// import { encoding_for_model } from "@dqbd/tiktoken"; +// import { TiktokenModel } from "@dqbd/tiktoken"; +// import { ExtractOptions } from "../../controllers/v1/types"; +// import { Document } from "../entities"; +// import { z } from "zod"; + +// const maxTokens = 32000; +// const modifier = 4; + +// export class LLMRefusalError extends Error { +// constructor(refusal: string) { +// super("LLM refused to extract the website's content"); +// this.name = "LLMRefusalError"; +// } +// } + +// interface GenerateCompletionsParams { +// systemPrompt?: string; +// prompt?: string; +// schema?: any; +// pagesContent: string; +// } + +// export async function generateBasicCompletion(prompt: string) { +// const openai = new OpenAI(); +// const model: TiktokenModel = +// (process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini"; + +// const completion = await openai.chat.completions.create({ +// model, +// messages: [{ role: "user", content: prompt }], +// }); + +// return completion.choices[0].message.content; +// } + +// export async function generateFinalExtraction({ +// pagesContent, +// systemPrompt, +// prompt, +// schema, +// }: GenerateCompletionsParams): Promise<{ +// content: string; +// metadata: { numTokens: number; warning: string }; +// }> { +// const openai = new OpenAI(); +// const model: TiktokenModel = +// (process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini"; + +// let extractionContent = pagesContent; +// let numTokens = 0; +// let warning = ""; + +// const encoder = encoding_for_model(model); +// try { +// const tokens = encoder.encode(extractionContent); +// numTokens = tokens.length; +// } catch (error) { +// extractionContent = extractionContent.slice(0, maxTokens * modifier); +// warning = `Failed to derive number of LLM tokens the extraction might use -- the input has been automatically trimmed to the maximum number of tokens (${maxTokens}) we support.`; +// } finally { +// encoder.free(); +// } + +// if (numTokens > maxTokens) { +// extractionContent = extractionContent.slice(0, maxTokens * modifier); +// warning = `The extraction content would have used more tokens (${numTokens}) than the maximum we allow (${maxTokens}). -- the input has been automatically trimmed.`; +// } + +// if (schema && (schema.type === "array" || schema._type === "ZodArray")) { +// schema = { +// type: "object", +// properties: { +// items: schema, +// }, +// required: ["items"], +// additionalProperties: false, +// }; +// } else if (schema) { +// schema.additionalProperties = false; +// schema.required = Object.keys(schema.properties); +// } + +// const jsonCompletion = await openai.beta.chat.completions.parse({ +// temperature: 0, +// model, +// messages: [ +// { role: "system", content: systemPrompt ?? "" }, +// { role: "user", content: [{ type: "text", text: extractionContent }] }, +// { +// role: "user", +// content: prompt +// ? `Transform the above content into structured JSON output based on the following user request: ${prompt}` +// : "Transform the above content into structured JSON output.", +// }, +// ], +// response_format: schema +// ? { +// type: "json_schema", +// json_schema: { +// name: "websiteContent", +// schema: schema, +// strict: true, +// }, +// } +// : { type: "json_object" }, +// }); + +// if (jsonCompletion.choices[0].message.refusal !== null) { +// throw new LLMRefusalError(jsonCompletion.choices[0].message.refusal); +// } + +// const extraction = jsonCompletion.choices[0].message.parsed; +// return { +// content: extraction ?? "", +// metadata: { +// numTokens, +// warning, +// }, +// }; +// } diff --git a/apps/api/src/lib/extract/reranker.ts b/apps/api/src/lib/extract/reranker.ts new file mode 100644 index 00000000..30aca441 --- /dev/null +++ b/apps/api/src/lib/extract/reranker.ts @@ -0,0 +1,22 @@ +import { CohereClient } from "cohere-ai"; +import { MapDocument } from "../../controllers/v1/types"; +const cohere = new CohereClient({ + token: process.env.COHERE_API_KEY, +}); + +export async function rerankDocuments( + documents: (string | Record)[], + query: string, + topN = 3, + model = "rerank-english-v3.0" +) { + const rerank = await cohere.v2.rerank({ + documents, + query, + topN, + model, + returnDocuments: true, + }); + + return rerank.results.sort((a, b) => b.relevanceScore - a.relevanceScore).map(x => ({ document: x.document, index: x.index, relevanceScore: x.relevanceScore })); +} diff --git a/apps/api/src/lib/ranker.test.ts b/apps/api/src/lib/ranker.test.ts new file mode 100644 index 00000000..6d17a08b --- /dev/null +++ b/apps/api/src/lib/ranker.test.ts @@ -0,0 +1,68 @@ +import { performRanking } from './ranker'; + +describe('performRanking', () => { + it('should rank links based on similarity to search query', async () => { + const linksWithContext = [ + 'url: https://example.com/dogs, title: All about dogs, description: Learn about different dog breeds', + 'url: https://example.com/cats, title: Cat care guide, description: Everything about cats', + 'url: https://example.com/pets, title: General pet care, description: Care for all types of pets' + ]; + + const links = [ + 'https://example.com/dogs', + 'https://example.com/cats', + 'https://example.com/pets' + ]; + + const searchQuery = 'cats training'; + + const result = await performRanking(linksWithContext, links, searchQuery); + + // Should return array of objects with link, linkWithContext, score, originalIndex + expect(result).toBeInstanceOf(Array); + expect(result.length).toBe(3); + + // First result should be the dogs page since query is about dogs + expect(result[0].link).toBe('https://example.com/cats'); + + // Each result should have required properties + result.forEach(item => { + expect(item).toHaveProperty('link'); + expect(item).toHaveProperty('linkWithContext'); + expect(item).toHaveProperty('score'); + expect(item).toHaveProperty('originalIndex'); + expect(typeof item.score).toBe('number'); + expect(item.score).toBeGreaterThanOrEqual(0); + expect(item.score).toBeLessThanOrEqual(1); + }); + + // Scores should be in descending order + for (let i = 1; i < result.length; i++) { + expect(result[i].score).toBeLessThanOrEqual(result[i-1].score); + } + }); + + it('should handle empty inputs', async () => { + const result = await performRanking([], [], ''); + expect(result).toEqual([]); + }); + + it('should maintain original order for equal scores', async () => { + const linksWithContext = [ + 'url: https://example.com/1, title: Similar content A, description: test', + 'url: https://example.com/2, title: Similar content B, description: test' + ]; + + const links = [ + 'https://example.com/1', + 'https://example.com/2' + ]; + + const searchQuery = 'test'; + + const result = await performRanking(linksWithContext, links, searchQuery); + + // If scores are equal, original order should be maintained + expect(result[0].originalIndex).toBeLessThan(result[1].originalIndex); + }); +}); diff --git a/apps/api/src/lib/ranker.ts b/apps/api/src/lib/ranker.ts new file mode 100644 index 00000000..e7fa235c --- /dev/null +++ b/apps/api/src/lib/ranker.ts @@ -0,0 +1,92 @@ +import axios from 'axios'; +import { configDotenv } from 'dotenv'; +import OpenAI from "openai"; + +configDotenv(); + +const openai = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, +}); + +async function getEmbedding(text: string) { + const embedding = await openai.embeddings.create({ + model: "text-embedding-ada-002", + input: text, + encoding_format: "float", + }); + + return embedding.data[0].embedding; +} + +const cosineSimilarity = (vec1: number[], vec2: number[]): number => { + const dotProduct = vec1.reduce((sum, val, i) => sum + val * vec2[i], 0); + const magnitude1 = Math.sqrt( + vec1.reduce((sum, val) => sum + val * val, 0) + ); + const magnitude2 = Math.sqrt( + vec2.reduce((sum, val) => sum + val * val, 0) + ); + if (magnitude1 === 0 || magnitude2 === 0) return 0; + return dotProduct / (magnitude1 * magnitude2); +}; + +// Function to convert text to vector +const textToVector = (searchQuery: string, text: string): number[] => { + const words = searchQuery.toLowerCase().split(/\W+/); + return words.map((word) => { + const count = (text.toLowerCase().match(new RegExp(word, "g")) || []) + .length; + return count / text.length; + }); +}; + +async function performRanking(linksWithContext: string[], links: string[], searchQuery: string) { + try { + // Handle invalid inputs + if (!searchQuery || !linksWithContext.length || !links.length) { + return []; + } + + // Sanitize search query by removing null characters + const sanitizedQuery = searchQuery; + + // Generate embeddings for the search query + const queryEmbedding = await getEmbedding(sanitizedQuery); + + // Generate embeddings for each link and calculate similarity + const linksAndScores = await Promise.all(linksWithContext.map(async (linkWithContext, index) => { + try { + const linkEmbedding = await getEmbedding(linkWithContext); + const score = cosineSimilarity(queryEmbedding, linkEmbedding); + + return { + link: links[index], + linkWithContext, + score, + originalIndex: index + }; + } catch (err) { + // If embedding fails for a link, return with score 0 + return { + link: links[index], + linkWithContext, + score: 0, + originalIndex: index + }; + } + })); + + // Sort links based on similarity scores while preserving original order for equal scores + linksAndScores.sort((a, b) => { + const scoreDiff = b.score - a.score; + return scoreDiff === 0 ? a.originalIndex - b.originalIndex : scoreDiff; + }); + + return linksAndScores; + } catch (error) { + console.error(`Error performing semantic search: ${error}`); + return []; + } +} + +export { performRanking }; diff --git a/apps/api/src/lib/timeout.ts b/apps/api/src/lib/timeout.ts index fd0e5ade..46d34a5a 100644 --- a/apps/api/src/lib/timeout.ts +++ b/apps/api/src/lib/timeout.ts @@ -1 +1 @@ -export const axiosTimeout = 3000; \ No newline at end of file +export const axiosTimeout = 5000; \ No newline at end of file diff --git a/apps/api/src/routes/v0.ts b/apps/api/src/routes/v0.ts index 3a7bda65..2169c2bd 100644 --- a/apps/api/src/routes/v0.ts +++ b/apps/api/src/routes/v0.ts @@ -27,4 +27,4 @@ v0Router.post("/v0/search", searchController); // Health/Probe routes v0Router.get("/v0/health/liveness", livenessController); -v0Router.get("/v0/health/readiness", readinessController); +v0Router.get("/v0/health/readiness", readinessController); \ No newline at end of file diff --git a/apps/api/src/routes/v1.ts b/apps/api/src/routes/v1.ts index 3eaace3b..048e1efc 100644 --- a/apps/api/src/routes/v1.ts +++ b/apps/api/src/routes/v1.ts @@ -18,6 +18,7 @@ import { logger } from "../lib/logger"; import { scrapeStatusController } from "../controllers/v1/scrape-status"; import { concurrencyCheckController } from "../controllers/v1/concurrency-check"; import { batchScrapeController } from "../controllers/v1/batch-scrape"; +import { extractController } from "../controllers/v1/extract"; // import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview"; // import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status"; // import { searchController } from "../../src/controllers/v1/search"; @@ -98,7 +99,7 @@ function idempotencyMiddleware(req: Request, res: Response, next: NextFunction) function blocklistMiddleware(req: Request, res: Response, next: NextFunction) { if (typeof req.body.url === "string" && isUrlBlocked(req.body.url)) { if (!res.headersSent) { - return res.status(403).json({ success: false, error: "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." }); + return res.status(403).json({ success: false, error: "URL is blocked intentionally. Firecrawl currently does not support social media scraping due to policy restrictions." }); } } next(); @@ -178,6 +179,13 @@ v1Router.ws( crawlStatusWSController ); +v1Router.post( + "/extract", + authMiddleware(RateLimiterMode.Scrape), + checkCreditsMiddleware(1), + wrap(extractController) +); + // v1Router.post("/crawlWebsitePreview", crawlPreviewController); @@ -199,3 +207,4 @@ v1Router.delete( // Health/Probe routes // v1Router.get("/health/liveness", livenessController); // v1Router.get("/health/readiness", readinessController); + diff --git a/apps/api/src/scraper/scrapeURL/engines/cache/index.ts b/apps/api/src/scraper/scrapeURL/engines/cache/index.ts new file mode 100644 index 00000000..9506be0f --- /dev/null +++ b/apps/api/src/scraper/scrapeURL/engines/cache/index.ts @@ -0,0 +1,19 @@ +import { cacheKey, getEntryFromCache } from "../../../../lib/cache"; +import { EngineScrapeResult } from ".."; +import { Meta } from "../.."; +import { EngineError } from "../../error"; + +export async function scrapeCache(meta: Meta): Promise { + const key = cacheKey(meta.url, meta.options, meta.internalOptions); + if (key === null) throw new EngineError("Scrape not eligible for caching"); + + const entry = await getEntryFromCache(key); + if (entry === null) throw new EngineError("Cache missed"); + + return { + url: entry.url, + html: entry.html, + statusCode: entry.statusCode, + error: entry.error, + }; +} \ No newline at end of file diff --git a/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts b/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts index 1f1bcc69..2c67e196 100644 --- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts +++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/checkStatus.ts @@ -103,4 +103,4 @@ export async function fireEngineCheckStatus(logger: Logger, jobId: string): Prom } }); } -} \ No newline at end of file +} diff --git a/apps/api/src/scraper/scrapeURL/engines/index.ts b/apps/api/src/scraper/scrapeURL/engines/index.ts index aadef7fc..d2305d9c 100644 --- a/apps/api/src/scraper/scrapeURL/engines/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/index.ts @@ -6,14 +6,17 @@ import { scrapePDF } from "./pdf"; import { scrapeURLWithScrapingBee } from "./scrapingbee"; import { scrapeURLWithFetch } from "./fetch"; import { scrapeURLWithPlaywright } from "./playwright"; +import { scrapeCache } from "./cache"; -export type Engine = "fire-engine;chrome-cdp" | "fire-engine;playwright" | "fire-engine;tlsclient" | "scrapingbee" | "scrapingbeeLoad" | "playwright" | "fetch" | "pdf" | "docx"; +export type Engine = "fire-engine;chrome-cdp" | "fire-engine;playwright" | "fire-engine;tlsclient" | "scrapingbee" | "scrapingbeeLoad" | "playwright" | "fetch" | "pdf" | "docx" | "cache"; const useScrapingBee = process.env.SCRAPING_BEE_API_KEY !== '' && process.env.SCRAPING_BEE_API_KEY !== undefined; const useFireEngine = process.env.FIRE_ENGINE_BETA_URL !== '' && process.env.FIRE_ENGINE_BETA_URL !== undefined; const usePlaywright = process.env.PLAYWRIGHT_MICROSERVICE_URL !== '' && process.env.PLAYWRIGHT_MICROSERVICE_URL !== undefined; +const useCache = process.env.CACHE_REDIS_URL !== '' && process.env.CACHE_REDIS_URL !== undefined; export const engines: Engine[] = [ + // ...(useCache ? [ "cache" as const ] : []), ...(useFireEngine ? [ "fire-engine;chrome-cdp" as const, "fire-engine;playwright" as const, "fire-engine;tlsclient" as const ] : []), ...(useScrapingBee ? [ "scrapingbee" as const, "scrapingbeeLoad" as const ] : []), ...(usePlaywright ? [ "playwright" as const ] : []), @@ -74,6 +77,7 @@ export type EngineScrapeResult = { const engineHandlers: { [E in Engine]: (meta: Meta) => Promise } = { + "cache": scrapeCache, "fire-engine;chrome-cdp": scrapeURLWithFireEngineChromeCDP, "fire-engine;playwright": scrapeURLWithFireEnginePlaywright, "fire-engine;tlsclient": scrapeURLWithFireEngineTLSClient, @@ -95,6 +99,22 @@ export const engineOptions: { quality: number, } } = { + "cache": { + features: { + "actions": false, + "waitFor": true, + "screenshot": false, + "screenshot@fullScreen": false, + "pdf": false, // TODO: figure this out + "docx": false, // TODO: figure this out + "atsv": false, + "location": false, + "mobile": false, + "skipTlsVerification": false, + "useFastMode": false, + }, + quality: 1000, // cache should always be tried first + }, "fire-engine;chrome-cdp": { features: { "actions": true, diff --git a/apps/api/src/scraper/scrapeURL/transformers/cache.ts b/apps/api/src/scraper/scrapeURL/transformers/cache.ts new file mode 100644 index 00000000..e0c09c44 --- /dev/null +++ b/apps/api/src/scraper/scrapeURL/transformers/cache.ts @@ -0,0 +1,26 @@ +import { Document } from "../../../controllers/v1/types"; +import { Meta } from ".."; +import { CacheEntry, cacheKey, saveEntryToCache } from "../../../lib/cache"; + +export function saveToCache(meta: Meta, document: Document): Document { + if (document.metadata.statusCode! < 200 || document.metadata.statusCode! >= 300) return document; + + if (document.rawHtml === undefined) { + throw new Error("rawHtml is undefined -- this transformer is being called out of order"); + } + + const key = cacheKey(meta.url, meta.options, meta.internalOptions); + + if (key !== null) { + const entry: CacheEntry = { + html: document.rawHtml!, + statusCode: document.metadata.statusCode!, + url: document.metadata.url ?? document.metadata.sourceURL!, + error: document.metadata.error ?? undefined, + }; + + saveEntryToCache(key, entry); + } + + return document; +} \ No newline at end of file diff --git a/apps/api/src/scraper/scrapeURL/transformers/index.ts b/apps/api/src/scraper/scrapeURL/transformers/index.ts index d839f8bc..b8063f7e 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/index.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/index.ts @@ -7,6 +7,7 @@ import { extractMetadata } from "../lib/extractMetadata"; import { performLLMExtract } from "./llmExtract"; import { uploadScreenshot } from "./uploadScreenshot"; import { removeBase64Images } from "./removeBase64Images"; +import { saveToCache } from "./cache"; export type Transformer = (meta: Meta, document: Document) => Document | Promise; @@ -104,6 +105,7 @@ export function coerceFieldsToFormats(meta: Meta, document: Document): Document // TODO: allow some of these to run in parallel export const transformerStack: Transformer[] = [ + saveToCache, deriveHTMLFromRawHTML, deriveMarkdownFromHTML, deriveLinksFromHTML, diff --git a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts index 69a92197..3866683a 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts @@ -58,32 +58,33 @@ function normalizeSchema(x: any): any { } } -async function generateOpenAICompletions(logger: Logger, document: Document, options: ExtractOptions): Promise { +export async function generateOpenAICompletions(logger: Logger, options: ExtractOptions, markdown?: string, previousWarning?: string): Promise<{ extract: any, numTokens: number, warning: string | undefined }> { + let extract: any; + let warning: string | undefined; + const openai = new OpenAI(); const model: TiktokenModel = (process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini"; - if (document.markdown === undefined) { + if (markdown === undefined) { throw new Error("document.markdown is undefined -- this is unexpected"); } - let extractionContent = document.markdown; - // count number of tokens let numTokens = 0; const encoder = encoding_for_model(model as TiktokenModel); try { // Encode the message into tokens - const tokens = encoder.encode(extractionContent); + const tokens = encoder.encode(markdown); // Return the number of tokens numTokens = tokens.length; } catch (error) { - logger.warn("Calculating num tokens of string failed", { error, extractionContent }); + logger.warn("Calculating num tokens of string failed", { error, markdown }); - extractionContent = extractionContent.slice(0, maxTokens * modifier); + markdown = markdown.slice(0, maxTokens * modifier); - const warning = "Failed to derive number of LLM tokens the extraction might use -- the input has been automatically trimmed to the maximum number of tokens (" + maxTokens + ") we support."; - document.warning = document.warning === undefined ? warning : " " + warning; + let w = "Failed to derive number of LLM tokens the extraction might use -- the input has been automatically trimmed to the maximum number of tokens (" + maxTokens + ") we support."; + warning = previousWarning === undefined ? w : w + " " + previousWarning; } finally { // Free the encoder resources after use encoder.free(); @@ -91,10 +92,10 @@ async function generateOpenAICompletions(logger: Logger, document: Document, opt if (numTokens > maxTokens) { // trim the document to the maximum number of tokens, tokens != characters - extractionContent = extractionContent.slice(0, maxTokens * modifier); + markdown = markdown.slice(0, maxTokens * modifier); - const warning = "The extraction content would have used more tokens (" + numTokens + ") than the maximum we allow (" + maxTokens + "). -- the input has been automatically trimmed."; - document.warning = document.warning === undefined ? warning : " " + warning; + const w = "The extraction content would have used more tokens (" + numTokens + ") than the maximum we allow (" + maxTokens + "). -- the input has been automatically trimmed."; + warning = previousWarning === undefined ? w : w + " " + previousWarning; } let schema = options.schema; @@ -107,12 +108,22 @@ async function generateOpenAICompletions(logger: Logger, document: Document, opt required: ["items"], additionalProperties: false, }; + } else if (schema && typeof schema === 'object' && !schema.type) { + schema = { + type: "object", + properties: Object.fromEntries( + Object.entries(schema).map(([key, value]) => [key, { type: value }]) + ), + required: Object.keys(schema), + additionalProperties: false + }; } schema = normalizeSchema(schema); const jsonCompletion = await openai.beta.chat.completions.parse({ model, + temperature: 0, messages: [ { role: "system", @@ -120,7 +131,7 @@ async function generateOpenAICompletions(logger: Logger, document: Document, opt }, { role: "user", - content: [{ type: "text", text: extractionContent }], + content: [{ type: "text", text: markdown }], }, { role: "user", @@ -143,26 +154,35 @@ async function generateOpenAICompletions(logger: Logger, document: Document, opt throw new LLMRefusalError(jsonCompletion.choices[0].message.refusal); } - document.extract = jsonCompletion.choices[0].message.parsed; + extract = jsonCompletion.choices[0].message.parsed; - if (document.extract === null && jsonCompletion.choices[0].message.content !== null) { + if (extract === null && jsonCompletion.choices[0].message.content !== null) { try { - document.extract = JSON.parse(jsonCompletion.choices[0].message.content); + extract = JSON.parse(jsonCompletion.choices[0].message.content); } catch (e) { logger.error("Failed to parse returned JSON, no schema specified.", { error: e }); throw new LLMRefusalError("Failed to parse returned JSON. Please specify a schema in the extract object."); } } - if (options.schema && options.schema.type === "array") { - document.extract = document.extract?.items; + // If the users actually wants the items object, they can specify it as 'required' in the schema + // otherwise, we just return the items array + if (options.schema && options.schema.type === "array" && !schema?.required?.includes("items")) { + extract = extract?.items; } - return document; + return { extract, warning, numTokens }; } export async function performLLMExtract(meta: Meta, document: Document): Promise { if (meta.options.formats.includes("extract")) { - document = await generateOpenAICompletions(meta.logger.child({ method: "performLLMExtract/generateOpenAICompletions" }), document, meta.options.extract!); + const { extract, warning } = await generateOpenAICompletions( + meta.logger.child({ method: "performLLMExtract/generateOpenAICompletions" }), + meta.options.extract!, + document.markdown, + document.warning, + ); + document.extract = extract; + document.warning = warning; } return document; diff --git a/apps/api/src/services/queue-jobs.ts b/apps/api/src/services/queue-jobs.ts index e4a5ace8..bc2debfe 100644 --- a/apps/api/src/services/queue-jobs.ts +++ b/apps/api/src/services/queue-jobs.ts @@ -109,6 +109,6 @@ export function waitForJob(jobId: string, timeout: number): Promise } } } - }, 500); + }, 250); }) } diff --git a/apps/api/src/types.ts b/apps/api/src/types.ts index cc04ca18..06ff1d48 100644 --- a/apps/api/src/types.ts +++ b/apps/api/src/types.ts @@ -106,6 +106,15 @@ export interface FirecrawlCrawlStatusResponse { error?: string; } +export interface FirecrawlExtractResponse { + statusCode: number; + body: { + success: boolean; + data: any[]; + }; + error?: string; +} + export enum RateLimiterMode { Crawl = "crawl", CrawlStatus = "crawlStatus", diff --git a/apps/js-sdk/example.js b/apps/js-sdk/example.js index c4b21d5f..21bdb2a1 100644 --- a/apps/js-sdk/example.js +++ b/apps/js-sdk/example.js @@ -1,4 +1,5 @@ import FirecrawlApp from 'firecrawl'; +import { z } from 'zod'; const app = new FirecrawlApp({apiKey: "fc-YOUR_API_KEY"}); @@ -42,6 +43,18 @@ const main = async () => { const mapResult = await app.mapUrl('https://firecrawl.dev'); console.log(mapResult) + // Extract information from a website using LLM: + const extractSchema = z.object({ + title: z.string(), + description: z.string(), + links: z.array(z.string()) + }); + + const extractResult = await app.extract(['https://firecrawl.dev'], { + prompt: "Extract the title, description, and links from the website", + schema: extractSchema + }); + console.log(extractResult); // Crawl a website with WebSockets: const watch = await app.crawlUrlAndWatch('mendable.ai', { excludePaths: ['blog/*'], limit: 5}); diff --git a/apps/js-sdk/example.ts b/apps/js-sdk/example.ts index 7412e479..a8fff30a 100644 --- a/apps/js-sdk/example.ts +++ b/apps/js-sdk/example.ts @@ -42,6 +42,19 @@ const main = async () => { const mapResult = await app.mapUrl('https://firecrawl.dev'); console.log(mapResult) + // // Extract information from a website using LLM: + // const extractSchema = z.object({ + // title: z.string(), + // description: z.string(), + // links: z.array(z.string()) + // }); + + // const extractResult = await app.extractUrls(['https://firecrawl.dev'], { + // prompt: "Extract the title, description, and links from the website", + // schema: extractSchema + // }); + // console.log(extractResult); + // Crawl a website with WebSockets: const watch = await app.crawlUrlAndWatch('mendable.ai', { excludePaths: ['blog/*'], limit: 5}); diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 65540840..8f3682c2 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "1.8.5", + "version": "1.9.0", "description": "JavaScript SDK for Firecrawl API", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index e6824dcb..6fb8ad2e 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -236,6 +236,27 @@ export interface MapResponse { error?: string; } +/** + * Parameters for extracting information from URLs. + * Defines options for extracting information from URLs. + */ +export interface ExtractParams { + prompt: string; + schema?: zt.ZodSchema; + systemPrompt?: string; + allowExternalLinks?: boolean; +} + +/** + * Response interface for extracting information from URLs. + * Defines the structure of the response received after extracting information from URLs. + */ +export interface ExtractResponse { + success: true; + data: zt.infer; + error?: string; +} + /** * Error response interface. * Defines the structure of the response received when an error occurs. @@ -245,7 +266,6 @@ export interface ErrorResponse { error: string; } - /** * Custom error class for Firecrawl. * Extends the built-in Error class to include a status code. @@ -679,6 +699,44 @@ export default class FirecrawlApp { return { success: false, error: "Internal server error." }; } + /** + * Extracts information from URLs using the Firecrawl API. + * @param url - The URL to extract information from. + * @param params - Additional parameters for the extract request. + * @returns The response from the extract operation. + */ + async extract(urls: string[], params?: ExtractParams): Promise { + const headers = this.prepareHeaders(); + + if (!params?.prompt) { + throw new FirecrawlError("Prompt is required", 400); + } + + let jsonData: { urls: string[] } & ExtractParams= { urls, ...params }; + let jsonSchema: any; + try { + jsonSchema = params?.schema ? zodToJsonSchema(params.schema) : undefined; + } catch (error: any) { + throw new FirecrawlError("Invalid schema. Use a valid Zod schema.", 400); + } + + try { + const response: AxiosResponse = await this.postRequest( + this.apiUrl + `/v1/extract`, + { ...jsonData, schema: jsonSchema }, + headers + ); + if (response.status === 200) { + return response.data as ExtractResponse; + } else { + this.handleError(response, "extract"); + } + } catch (error: any) { + throw new FirecrawlError(error.message, 500); + } + return { success: false, error: "Internal server error." }; + } + /** * Prepares the headers for an API request. * @param idempotencyKey - Optional key to ensure idempotency. diff --git a/apps/python-sdk/example.py b/apps/python-sdk/example.py index e7c80b30..686b7676 100644 --- a/apps/python-sdk/example.py +++ b/apps/python-sdk/example.py @@ -2,6 +2,8 @@ import time import nest_asyncio import uuid from firecrawl.firecrawl import FirecrawlApp +from pydantic import BaseModel, Field +from typing import List app = FirecrawlApp(api_key="fc-") @@ -50,9 +52,6 @@ print(crawl_status) # LLM Extraction: # Define schema to extract contents into using pydantic -from pydantic import BaseModel, Field -from typing import List - class ArticleSchema(BaseModel): title: str points: int @@ -115,6 +114,22 @@ llm_extraction_result = app2.scrape_url('https://news.ycombinator.com', { map_result = app.map_url('https://firecrawl.dev', { 'search': 'blog' }) print(map_result) +# Extract URLs: +class ExtractSchema(BaseModel): + title: str + description: str + links: List[str] + +# Define the schema using Pydantic +extract_schema = ExtractSchema.schema() + +# Perform the extraction +extract_result = app.extract(['https://firecrawl.dev'], { + 'prompt': "Extract the title, description, and links from the website", + 'schema': extract_schema +}) +print(extract_result) + # Crawl a website with WebSockets: # inside an async function... nest_asyncio.apply() diff --git a/apps/python-sdk/firecrawl/__init__.py b/apps/python-sdk/firecrawl/__init__.py index cb897b7e..d39b77a8 100644 --- a/apps/python-sdk/firecrawl/__init__.py +++ b/apps/python-sdk/firecrawl/__init__.py @@ -13,7 +13,7 @@ import os from .firecrawl import FirecrawlApp # noqa -__version__ = "1.5.0" +__version__ = "1.6.0" # Define the logger for the Firecrawl project logger: logging.Logger = logging.getLogger("firecrawl") diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index c2693c3d..bb87906c 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -12,15 +12,40 @@ Classes: import logging import os import time -from typing import Any, Dict, Optional, List +from typing import Any, Dict, Optional, List, Union import json import requests +import pydantic import websockets logger : logging.Logger = logging.getLogger("firecrawl") class FirecrawlApp: + class ExtractParams(pydantic.BaseModel): + """ + Parameters for the extract operation. + """ + prompt: str + schema: Optional[Any] = None + system_prompt: Optional[str] = None + allow_external_links: Optional[bool] = False + + class ExtractResponse(pydantic.BaseModel): + """ + Response from the extract operation. + """ + success: bool + data: Optional[Any] = None + error: Optional[str] = None + + class ErrorResponse(pydantic.BaseModel): + """ + Error response. + """ + success: bool + error: str + def __init__(self, api_key: Optional[str] = None, api_url: Optional[str] = None) -> None: """ Initialize the FirecrawlApp instance with API key, API URL. @@ -434,6 +459,48 @@ class FirecrawlApp: else: self._handle_error(response, 'check batch scrape status') + + def extract(self, urls: List[str], params: Optional[ExtractParams] = None) -> Union[ExtractResponse, ErrorResponse]: + """ + Extracts information from a URL using the Firecrawl API. + + Args: + urls (List[str]): The URLs to extract information from. + params (Optional[ExtractParams]): Additional parameters for the extract request. + + Returns: + Union[ExtractResponse, ErrorResponse]: The response from the extract operation. + """ + headers = self._prepare_headers() + + if not params or not params.get('prompt'): + raise ValueError("Prompt is required") + + if not params.get('schema'): + raise ValueError("Schema is required for extraction") + + jsonData = {'urls': urls, **params} + jsonSchema = params['schema'].schema() if hasattr(params['schema'], 'schema') else None + + try: + response = self._post_request( + f'{self.api_url}/v1/extract', + { + **jsonData, + 'allowExternalLinks': params.get('allow_external_links', False), + 'schema': jsonSchema + }, + headers + ) + if response.status_code == 200: + return response.json() + else: + self._handle_error(response, "extract") + except Exception as e: + raise ValueError(str(e), 500) + + return {'success': False, 'error': "Internal server error."} + def _prepare_headers(self, idempotency_key: Optional[str] = None) -> Dict[str, str]: """ Prepare the headers for API requests.