diff --git a/apps/api/package.json b/apps/api/package.json index 48aa780092..699878b72e 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -113,7 +113,7 @@ "joplin-turndown-plugin-gfm": "^1.0.12", "jsdom": "^26.0.0", "koffi": "^2.9.0", - "lodash": "^4.17.21", + "lodash": "^4.17.23", "marked": "^14.1.2", "ollama-ai-provider": "^1.2.0", "openai": "^5.20.2", diff --git a/apps/api/pnpm-lock.yaml b/apps/api/pnpm-lock.yaml index 521a382323..24c5dcde85 100644 --- a/apps/api/pnpm-lock.yaml +++ b/apps/api/pnpm-lock.yaml @@ -146,8 +146,8 @@ importers: specifier: ^2.9.0 version: 2.9.0 lodash: - specifier: ^4.17.21 - version: 4.17.21 + specifier: ^4.17.23 + version: 4.17.23 marked: specifier: ^14.1.2 version: 14.1.2 @@ -4771,8 +4771,8 @@ packages: lodash.repeat@4.1.0: resolution: {integrity: sha512-eWsgQW89IewS95ZOcr15HHCX6FVDxq3f2PNUIng3fyzsPev9imFQxIYdFZ6crl8L56UR6ZlGDLcEb3RZsCSSqw==} - lodash@4.17.21: - resolution: {integrity: sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==} + lodash@4.17.23: + resolution: {integrity: sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==} log-update@6.1.0: resolution: {integrity: sha512-9ie8ItPR6tjY5uYJh8K/Zrv/RMZ5VOlOWvtZdEHYSTFKZfIBPQa9tOAEeAWhd+AnIneLJ22w5fjOYtoutpWq5w==} @@ -6897,7 +6897,7 @@ snapshots: idb-keyval: 6.2.1 ox: 0.6.9(typescript@5.8.3)(zod@3.25.76) preact: 10.24.2 - viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12) + viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76) zustand: 5.0.3(react@18.3.1)(use-sync-external-store@1.4.0(react@18.3.1)) transitivePeerDependencies: - '@types/react' @@ -6971,7 +6971,7 @@ snapshots: idb-keyval: 6.2.1 ox: 0.6.9(typescript@5.8.3)(zod@3.25.76) preact: 10.24.2 - viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12) + viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76) zustand: 5.0.3(react@18.3.1)(use-sync-external-store@1.4.0(react@18.3.1)) transitivePeerDependencies: - '@types/react' @@ -7174,11 +7174,11 @@ snapshots: ethereum-cryptography: 2.2.1 micro-ftch: 0.3.1 - '@gemini-wallet/core@0.2.0(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76))': + '@gemini-wallet/core@0.2.0(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12))': dependencies: '@metamask/rpc-errors': 7.0.2 eventemitter3: 5.0.1 - viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12) + viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76) transitivePeerDependencies: - supports-color @@ -7707,7 +7707,7 @@ snapshots: '@types/debug': 4.1.12 '@types/lodash': 4.17.20 debug: 4.4.3 - lodash: 4.17.21 + lodash: 4.17.23 pony-cause: 2.1.11 semver: 7.7.3 uuid: 9.0.1 @@ -8504,7 +8504,7 @@ snapshots: dependencies: big.js: 6.2.2 dayjs: 1.11.13 - viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12) + viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76) transitivePeerDependencies: - bufferutil - typescript @@ -8517,7 +8517,7 @@ snapshots: '@reown/appkit-wallet': 1.7.8(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10) '@walletconnect/universal-provider': 2.21.0(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76) valtio: 1.13.2(react@18.3.1) - viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12) + viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76) transitivePeerDependencies: - '@azure/app-configuration' - '@azure/cosmos' @@ -8663,7 +8663,7 @@ snapshots: '@walletconnect/logger': 2.1.2 '@walletconnect/universal-provider': 2.21.0(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76) valtio: 1.13.2(react@18.3.1) - viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12) + viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76) transitivePeerDependencies: - '@azure/app-configuration' - '@azure/cosmos' @@ -8716,7 +8716,7 @@ snapshots: '@walletconnect/universal-provider': 2.21.0(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76) bs58: 6.0.0 valtio: 1.13.2(react@18.3.1) - viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12) + viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76) transitivePeerDependencies: - '@azure/app-configuration' - '@azure/cosmos' @@ -8757,7 +8757,7 @@ snapshots: '@safe-global/safe-apps-sdk@9.1.0(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76)': dependencies: '@safe-global/safe-gateway-typescript-sdk': 3.23.1 - viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12) + viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76) transitivePeerDependencies: - bufferutil - typescript @@ -9762,19 +9762,19 @@ snapshots: '@vercel/oidc@3.0.3': {} - '@wagmi/connectors@5.11.2(@tanstack/react-query@5.84.1(react@18.3.1))(@wagmi/core@2.21.2(@tanstack/query-core@5.83.1)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76)))(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(utf-8-validate@5.0.10)(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76))(wagmi@2.17.5(@tanstack/query-core@5.83.1)(@tanstack/react-query@5.84.1(react@18.3.1))(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(react@18.3.1)(typescript@5.8.3)(utf-8-validate@5.0.10)(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76))(zod@3.25.76))(zod@3.25.76)': + '@wagmi/connectors@5.11.2(@tanstack/react-query@5.84.1(react@18.3.1))(@wagmi/core@2.21.2(@tanstack/query-core@5.83.1)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12)))(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(utf-8-validate@5.0.10)(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12))(wagmi@2.17.5(@tanstack/query-core@5.83.1)(@tanstack/react-query@5.84.1(react@18.3.1))(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(react@18.3.1)(typescript@5.8.3)(utf-8-validate@5.0.10)(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12))(zod@3.25.76))(zod@3.25.76)': dependencies: '@base-org/account': 1.1.1(bufferutil@4.0.9)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(utf-8-validate@5.0.10)(zod@3.25.76) '@coinbase/wallet-sdk': 4.3.6(bufferutil@4.0.9)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(utf-8-validate@5.0.10)(zod@3.25.76) - '@gemini-wallet/core': 0.2.0(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76)) + '@gemini-wallet/core': 0.2.0(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12)) '@metamask/sdk': 0.33.1(bufferutil@4.0.9)(encoding@0.1.13)(utf-8-validate@5.0.10) '@safe-global/safe-apps-provider': 0.18.6(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76) '@safe-global/safe-apps-sdk': 9.1.0(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76) - '@wagmi/core': 2.21.2(@tanstack/query-core@5.83.1)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76)) + '@wagmi/core': 2.21.2(@tanstack/query-core@5.83.1)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12)) '@walletconnect/ethereum-provider': 2.21.1(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(react@18.3.1)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76) cbw-sdk: '@coinbase/wallet-sdk@3.9.3' - porto: 0.2.19(@tanstack/react-query@5.84.1(react@18.3.1))(@wagmi/core@2.21.2(@tanstack/query-core@5.83.1)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76)))(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76))(wagmi@2.17.5(@tanstack/query-core@5.83.1)(@tanstack/react-query@5.84.1(react@18.3.1))(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(react@18.3.1)(typescript@5.8.3)(utf-8-validate@5.0.10)(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76))(zod@3.25.76)) - viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12) + porto: 0.2.19(@tanstack/react-query@5.84.1(react@18.3.1))(@wagmi/core@2.21.2(@tanstack/query-core@5.83.1)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12)))(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12))(wagmi@2.17.5(@tanstack/query-core@5.83.1)(@tanstack/react-query@5.84.1(react@18.3.1))(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(react@18.3.1)(typescript@5.8.3)(utf-8-validate@5.0.10)(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12))(zod@3.25.76)) + viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76) optionalDependencies: typescript: 5.8.3 transitivePeerDependencies: @@ -9808,11 +9808,11 @@ snapshots: - wagmi - zod - '@wagmi/core@2.21.2(@tanstack/query-core@5.83.1)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76))': + '@wagmi/core@2.21.2(@tanstack/query-core@5.83.1)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12))': dependencies: eventemitter3: 5.0.1 mipd: 0.0.7(typescript@5.8.3) - viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12) + viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76) zustand: 5.0.0(react@18.3.1)(use-sync-external-store@1.4.0(react@18.3.1)) optionalDependencies: '@tanstack/query-core': 5.83.1 @@ -10365,11 +10365,6 @@ snapshots: typescript: 5.8.3 zod: 3.25.76 - abitype@1.0.8(typescript@5.8.3)(zod@4.1.12): - optionalDependencies: - typescript: 5.8.3 - zod: 4.1.12 - abitype@1.1.1(typescript@5.8.3)(zod@3.22.4): optionalDependencies: typescript: 5.8.3 @@ -10497,7 +10492,7 @@ snapshots: async@2.6.4: dependencies: - lodash: 4.17.21 + lodash: 4.17.23 async@3.2.5: {} @@ -12485,7 +12480,7 @@ snapshots: lodash.repeat@4.1.0: {} - lodash@4.17.21: {} + lodash@4.17.23: {} log-update@6.1.0: dependencies: @@ -12832,21 +12827,6 @@ snapshots: transitivePeerDependencies: - zod - ox@0.8.6(typescript@5.8.3)(zod@4.1.12): - dependencies: - '@adraffy/ens-normalize': 1.11.1 - '@noble/ciphers': 1.3.0 - '@noble/curves': 1.9.6 - '@noble/hashes': 1.8.0 - '@scure/bip32': 1.7.0 - '@scure/bip39': 1.6.0 - abitype: 1.1.1(typescript@5.8.3)(zod@4.1.12) - eventemitter3: 5.0.1 - optionalDependencies: - typescript: 5.8.3 - transitivePeerDependencies: - - zod - ox@0.9.8(typescript@5.8.3)(zod@4.1.12): dependencies: '@adraffy/ens-normalize': 1.11.1 @@ -13057,21 +13037,21 @@ snapshots: pony-cause@2.1.11: {} - porto@0.2.19(@tanstack/react-query@5.84.1(react@18.3.1))(@wagmi/core@2.21.2(@tanstack/query-core@5.83.1)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76)))(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76))(wagmi@2.17.5(@tanstack/query-core@5.83.1)(@tanstack/react-query@5.84.1(react@18.3.1))(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(react@18.3.1)(typescript@5.8.3)(utf-8-validate@5.0.10)(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76))(zod@3.25.76)): + porto@0.2.19(@tanstack/react-query@5.84.1(react@18.3.1))(@wagmi/core@2.21.2(@tanstack/query-core@5.83.1)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12)))(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12))(wagmi@2.17.5(@tanstack/query-core@5.83.1)(@tanstack/react-query@5.84.1(react@18.3.1))(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(react@18.3.1)(typescript@5.8.3)(utf-8-validate@5.0.10)(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12))(zod@3.25.76)): dependencies: - '@wagmi/core': 2.21.2(@tanstack/query-core@5.83.1)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76)) + '@wagmi/core': 2.21.2(@tanstack/query-core@5.83.1)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12)) hono: 4.9.10 idb-keyval: 6.2.2 mipd: 0.0.7(typescript@5.8.3) ox: 0.9.8(typescript@5.8.3)(zod@4.1.12) - viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12) + viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76) zod: 4.1.12 zustand: 5.0.8(react@18.3.1)(use-sync-external-store@1.4.0(react@18.3.1)) optionalDependencies: '@tanstack/react-query': 5.84.1(react@18.3.1) react: 18.3.1 typescript: 5.8.3 - wagmi: 2.17.5(@tanstack/query-core@5.83.1)(@tanstack/react-query@5.84.1(react@18.3.1))(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(react@18.3.1)(typescript@5.8.3)(utf-8-validate@5.0.10)(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76))(zod@4.1.12) + wagmi: 2.17.5(@tanstack/query-core@5.83.1)(@tanstack/react-query@5.84.1(react@18.3.1))(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(react@18.3.1)(typescript@5.8.3)(utf-8-validate@5.0.10)(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12))(zod@4.1.12) transitivePeerDependencies: - '@types/react' - immer @@ -13236,7 +13216,7 @@ snapshots: redis-info@3.1.0: dependencies: - lodash: 4.17.21 + lodash: 4.17.23 redis-parser@3.0.0: dependencies: @@ -13986,35 +13966,18 @@ snapshots: - utf-8-validate - zod - viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12): - dependencies: - '@noble/curves': 1.9.2 - '@noble/hashes': 1.8.0 - '@scure/bip32': 1.7.0 - '@scure/bip39': 1.6.0 - abitype: 1.0.8(typescript@5.8.3)(zod@4.1.12) - isows: 1.0.7(ws@8.18.2(bufferutil@4.0.9)(utf-8-validate@5.0.10)) - ox: 0.8.6(typescript@5.8.3)(zod@4.1.12) - ws: 8.18.2(bufferutil@4.0.9)(utf-8-validate@5.0.10) - optionalDependencies: - typescript: 5.8.3 - transitivePeerDependencies: - - bufferutil - - utf-8-validate - - zod - w3c-xmlserializer@5.0.0: dependencies: xml-name-validator: 5.0.0 - wagmi@2.17.5(@tanstack/query-core@5.83.1)(@tanstack/react-query@5.84.1(react@18.3.1))(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(react@18.3.1)(typescript@5.8.3)(utf-8-validate@5.0.10)(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76))(zod@4.1.12): + wagmi@2.17.5(@tanstack/query-core@5.83.1)(@tanstack/react-query@5.84.1(react@18.3.1))(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(react@18.3.1)(typescript@5.8.3)(utf-8-validate@5.0.10)(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12))(zod@4.1.12): dependencies: '@tanstack/react-query': 5.84.1(react@18.3.1) - '@wagmi/connectors': 5.11.2(@tanstack/react-query@5.84.1(react@18.3.1))(@wagmi/core@2.21.2(@tanstack/query-core@5.83.1)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76)))(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(utf-8-validate@5.0.10)(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76))(wagmi@2.17.5(@tanstack/query-core@5.83.1)(@tanstack/react-query@5.84.1(react@18.3.1))(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(react@18.3.1)(typescript@5.8.3)(utf-8-validate@5.0.10)(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76))(zod@3.25.76))(zod@3.25.76) - '@wagmi/core': 2.21.2(@tanstack/query-core@5.83.1)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76)) + '@wagmi/connectors': 5.11.2(@tanstack/react-query@5.84.1(react@18.3.1))(@wagmi/core@2.21.2(@tanstack/query-core@5.83.1)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12)))(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(utf-8-validate@5.0.10)(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12))(wagmi@2.17.5(@tanstack/query-core@5.83.1)(@tanstack/react-query@5.84.1(react@18.3.1))(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(react@18.3.1)(typescript@5.8.3)(utf-8-validate@5.0.10)(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12))(zod@3.25.76))(zod@3.25.76) + '@wagmi/core': 2.21.2(@tanstack/query-core@5.83.1)(react@18.3.1)(typescript@5.8.3)(use-sync-external-store@1.4.0(react@18.3.1))(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12)) react: 18.3.1 use-sync-external-store: 1.4.0(react@18.3.1) - viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12) + viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76) optionalDependencies: typescript: 5.8.3 transitivePeerDependencies: @@ -14223,8 +14186,8 @@ snapshots: '@solana-program/token-2022': 0.4.2(@solana/kit@2.3.0(fastestsmallesttextencoderdecoder@1.0.22)(typescript@5.8.3)(ws@8.18.0(bufferutil@4.0.9)(utf-8-validate@5.0.10)))(@solana/sysvars@2.3.0(fastestsmallesttextencoderdecoder@1.0.22)(typescript@5.8.3)) '@solana/kit': 2.3.0(fastestsmallesttextencoderdecoder@1.0.22)(typescript@5.8.3)(ws@8.18.0(bufferutil@4.0.9)(utf-8-validate@5.0.10)) '@solana/transaction-confirmation': 2.3.0(fastestsmallesttextencoderdecoder@1.0.22)(typescript@5.8.3)(ws@8.18.0(bufferutil@4.0.9)(utf-8-validate@5.0.10)) - viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12) - wagmi: 2.17.5(@tanstack/query-core@5.83.1)(@tanstack/react-query@5.84.1(react@18.3.1))(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(react@18.3.1)(typescript@5.8.3)(utf-8-validate@5.0.10)(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76))(zod@4.1.12) + viem: 2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@3.25.76) + wagmi: 2.17.5(@tanstack/query-core@5.83.1)(@tanstack/react-query@5.84.1(react@18.3.1))(bufferutil@4.0.9)(encoding@0.1.13)(ioredis@5.6.1)(react@18.3.1)(typescript@5.8.3)(utf-8-validate@5.0.10)(viem@2.33.2(bufferutil@4.0.9)(typescript@5.8.3)(utf-8-validate@5.0.10)(zod@4.1.12))(zod@4.1.12) zod: 3.25.76 transitivePeerDependencies: - '@azure/app-configuration' diff --git a/apps/api/src/__tests__/snips/v2/crawl.test.ts b/apps/api/src/__tests__/snips/v2/crawl.test.ts index 82111f9e56..2d4e439dae 100644 --- a/apps/api/src/__tests__/snips/v2/crawl.test.ts +++ b/apps/api/src/__tests__/snips/v2/crawl.test.ts @@ -13,6 +13,7 @@ import { crawl, crawlOngoing, crawlStart, + map, Identity, idmux, scrapeTimeout, @@ -23,6 +24,13 @@ import { describe, it, expect } from "@jest/globals"; let identity: Identity; +const normalizeUrlForCompare = (value: string) => { + const url = new URL(value); + url.hash = ""; + const href = url.href; + return href.endsWith("/") ? href.slice(0, -1) : href; +}; + beforeAll(async () => { identity = await idmux({ name: "crawl", @@ -69,6 +77,97 @@ describe("Crawl tests", () => { 10 * scrapeTimeout, ); + concurrentIf(ALLOW_TEST_SUITE_WEBSITE)( + "works with sitemap: only", + async () => { + const results = await crawl( + { + url: base, + limit: 10, + sitemap: "only", + }, + identity, + ); + + expect(results.completed).toBeGreaterThan(0); + }, + 10 * scrapeTimeout, + ); + + concurrentIf(ALLOW_TEST_SUITE_WEBSITE)( + "sitemap-only results are subset of map-only + start URL", + async () => { + const mapResponse = await map( + { + url: base, + sitemap: "only", + includeSubdomains: false, + ignoreQueryParameters: false, + limit: 500, + }, + identity, + ); + + expect(mapResponse.statusCode).toBe(200); + expect(mapResponse.body.success).toBe(true); + + const sitemapUrls = new Set( + mapResponse.body.links.map(link => normalizeUrlForCompare(link.url)), + ); + const baseNormalized = normalizeUrlForCompare(base); + + const results = await crawl( + { + url: base, + limit: 50, + sitemap: "only", + }, + identity, + ); + + expect(results.success).toBe(true); + if (results.success) { + for (const page of results.data) { + const pageUrl = + page.metadata.url ?? page.metadata.sourceURL ?? base; + const normalized = normalizeUrlForCompare(pageUrl); + expect( + normalized === baseNormalized || sitemapUrls.has(normalized), + ).toBe(true); + } + } + }, + 10 * scrapeTimeout, + ); + + concurrentIf(TEST_PRODUCTION)( + "no sitemap found -> start URL only", + async () => { + const noSitemapUrl = "https://example.com"; + const results = await crawl( + { + url: noSitemapUrl, + limit: 10, + sitemap: "only", + }, + identity, + ); + + expect(results.success).toBe(true); + if (results.success) { + expect(results.data.length).toBe(1); + const pageUrl = + results.data[0].metadata.url ?? + results.data[0].metadata.sourceURL ?? + noSitemapUrl; + expect(normalizeUrlForCompare(pageUrl)).toBe( + normalizeUrlForCompare(noSitemapUrl), + ); + } + }, + 10 * scrapeTimeout, + ); + concurrentIf(ALLOW_TEST_SUITE_WEBSITE)( "filters URLs properly", async () => { diff --git a/apps/api/src/__tests__/snips/v2/types-validation.test.ts b/apps/api/src/__tests__/snips/v2/types-validation.test.ts index cb1e98b8ed..cdee18ca67 100644 --- a/apps/api/src/__tests__/snips/v2/types-validation.test.ts +++ b/apps/api/src/__tests__/snips/v2/types-validation.test.ts @@ -19,6 +19,7 @@ import { BatchScrapeRequestInput, SearchRequest, SearchRequestInput, + toV2CrawlerOptions, } from "../../../controllers/v2/types"; describe("V2 Types Validation", () => { @@ -606,11 +607,11 @@ describe("V2 Types Validation", () => { it("should handle sitemap enum values", () => { const input: CrawlRequestInput = { url: "https://example.com", - sitemap: "include", + sitemap: "only", }; const result = crawlRequestSchema.parse(input); - expect(result.sitemap).toBe("include"); + expect(result.sitemap).toBe("only"); }); it("should reject invalid sitemap value", () => { @@ -621,6 +622,15 @@ describe("V2 Types Validation", () => { expect(() => crawlRequestSchema.parse(input)).toThrow(); }); + + it("should map sitemapOnly to sitemap=only", () => { + const result = toV2CrawlerOptions({ + sitemapOnly: true, + ignoreSitemap: false, + }); + + expect(result.sitemap).toBe("only"); + }); }); describe("mapRequestSchema", () => { diff --git a/apps/api/src/controllers/v2/crawl-params-preview.ts b/apps/api/src/controllers/v2/crawl-params-preview.ts index cdeaf2b28c..312a3af9af 100644 --- a/apps/api/src/controllers/v2/crawl-params-preview.ts +++ b/apps/api/src/controllers/v2/crawl-params-preview.ts @@ -29,7 +29,7 @@ type CrawlParamsPreviewResponse = crawlEntireDomain?: boolean; allowExternalLinks?: boolean; allowSubdomains?: boolean; - sitemap?: "skip" | "include"; + sitemap?: "skip" | "include" | "only"; ignoreQueryParameters?: boolean; deduplicateSimilarURLs?: boolean; delay?: number; diff --git a/apps/api/src/controllers/v2/types.ts b/apps/api/src/controllers/v2/types.ts index f7599f7861..4cb2b3beb6 100644 --- a/apps/api/src/controllers/v2/types.ts +++ b/apps/api/src/controllers/v2/types.ts @@ -868,7 +868,7 @@ export const crawlerOptions = z.strictObject({ allowExternalLinks: z.boolean().prefault(false), allowSubdomains: z.boolean().prefault(false), ignoreRobotsTxt: z.boolean().prefault(false), - sitemap: z.enum(["skip", "include"]).prefault("include"), + sitemap: z.enum(["skip", "include", "only"]).prefault("include"), deduplicateSimilarURLs: z.boolean().prefault(true), ignoreQueryParameters: z.boolean().prefault(false), regexOnFullURL: z.boolean().prefault(false), @@ -1283,6 +1283,7 @@ export function toV0CrawlerOptions(x: CrawlerOptions) { allowSubdomains: x.allowSubdomains, ignoreRobotsTxt: x.ignoreRobotsTxt, ignoreSitemap: x.sitemap === "skip", + sitemapOnly: x.sitemap === "only", deduplicateSimilarURLs: x.deduplicateSimilarURLs, ignoreQueryParameters: x.ignoreQueryParameters, regexOnFullURL: x.regexOnFullURL, @@ -1301,7 +1302,7 @@ export function toV2CrawlerOptions(x: any): CrawlerOptions { allowExternalLinks: x.allowExternalContentLinks, allowSubdomains: x.allowSubdomains, ignoreRobotsTxt: x.ignoreRobotsTxt, - sitemap: x.ignoreSitemap ? "skip" : "include", + sitemap: x.sitemapOnly ? "only" : x.ignoreSitemap ? "skip" : "include", deduplicateSimilarURLs: x.deduplicateSimilarURLs, ignoreQueryParameters: x.ignoreQueryParameters, regexOnFullURL: x.regexOnFullURL, @@ -1326,7 +1327,7 @@ function fromV0CrawlerOptions( allowExternalLinks: x.allowExternalContentLinks, allowSubdomains: x.allowSubdomains, ignoreRobotsTxt: x.ignoreRobotsTxt, - sitemap: x.ignoreSitemap ? "skip" : "include", + sitemap: x.sitemapOnly ? "only" : x.ignoreSitemap ? "skip" : "include", deduplicateSimilarURLs: x.deduplicateSimilarURLs, ignoreQueryParameters: x.ignoreQueryParameters, regexOnFullURL: x.regexOnFullURL, diff --git a/apps/api/src/services/worker/scrape-worker.ts b/apps/api/src/services/worker/scrape-worker.ts index 20aa1bbbdf..c5e0f0356d 100644 --- a/apps/api/src/services/worker/scrape-worker.ts +++ b/apps/api/src/services/worker/scrape-worker.ts @@ -356,78 +356,81 @@ async function processJob(job: NuQJob) { doc.metadata.url ?? doc.metadata.sourceURL ?? sc.originUrl!, ); - const links = await crawler.filterLinks( - await crawler.extractLinksFromHTML( - rawHtml ?? "", - doc.metadata?.url ?? doc.metadata?.sourceURL ?? sc.originUrl!, - ), - Infinity, - sc.crawlerOptions?.maxDepth ?? 10, - ); - logger.debug("Discovered " + links.links.length + " links...", { - linksLength: links.links.length, - }); + if (!sc.crawlerOptions?.sitemapOnly) { + const links = await crawler.filterLinks( + await crawler.extractLinksFromHTML( + rawHtml ?? "", + doc.metadata?.url ?? doc.metadata?.sourceURL ?? sc.originUrl!, + ), + Infinity, + sc.crawlerOptions?.maxDepth ?? 10, + ); + logger.debug("Discovered " + links.links.length + " links...", { + linksLength: links.links.length, + }); - // Store robots blocked URLs in Redis set - for (const [url, reason] of links.denialReasons) { - if (reason === "URL blocked by robots.txt") { - await recordRobotsBlocked(job.data.crawl_id, url); + // Store robots blocked URLs in Redis set + for (const [url, reason] of links.denialReasons) { + if (reason === "URL blocked by robots.txt") { + await recordRobotsBlocked(job.data.crawl_id, url); + } } - } - for (const link of links.links) { - if (await lockURL(job.data.crawl_id, sc, link)) { - // This seems to work really welel - const jobPriority = await getJobPriority({ - team_id: sc.team_id, - basePriority: job.data.crawl_id ? 20 : 10, - }); - const jobId = uuidv7(); - - logger.debug( - "Determined job priority " + - jobPriority + - " for URL " + - JSON.stringify(link), - { jobPriority, url: link }, - ); - - await addScrapeJob( - { - url: link, - mode: "single_urls", + for (const link of links.links) { + if (await lockURL(job.data.crawl_id, sc, link)) { + // This seems to work really welel + const jobPriority = await getJobPriority({ team_id: sc.team_id, - scrapeOptions: scrapeOptions.parse(sc.scrapeOptions), - internalOptions: sc.internalOptions, - crawlerOptions: { - ...sc.crawlerOptions, - currentDiscoveryDepth: - (job.data.crawlerOptions?.currentDiscoveryDepth ?? 0) + 1, + basePriority: job.data.crawl_id ? 20 : 10, + }); + const jobId = uuidv7(); + + logger.debug( + "Determined job priority " + + jobPriority + + " for URL " + + JSON.stringify(link), + { jobPriority, url: link }, + ); + + await addScrapeJob( + { + url: link, + mode: "single_urls", + team_id: sc.team_id, + scrapeOptions: scrapeOptions.parse(sc.scrapeOptions), + internalOptions: sc.internalOptions, + crawlerOptions: { + ...sc.crawlerOptions, + currentDiscoveryDepth: + (job.data.crawlerOptions?.currentDiscoveryDepth ?? 0) + + 1, + }, + origin: job.data.origin, + integration: job.data.integration, + crawl_id: job.data.crawl_id, + requestId: job.data.requestId, + webhook: job.data.webhook, + v1: job.data.v1, + zeroDataRetention: job.data.zeroDataRetention, + apiKeyId: job.data.apiKeyId, }, - origin: job.data.origin, - integration: job.data.integration, - crawl_id: job.data.crawl_id, - requestId: job.data.requestId, - webhook: job.data.webhook, - v1: job.data.v1, - zeroDataRetention: job.data.zeroDataRetention, - apiKeyId: job.data.apiKeyId, - }, - jobId, - jobPriority, - ); - - await addCrawlJob(job.data.crawl_id, jobId, logger); - logger.debug("Added job for URL " + JSON.stringify(link), { - jobPriority, - url: link, - newJobId: jobId, - }); - } else { - // TODO: removed this, ok? too many 'not useful' logs (?) Mogery! - // logger.debug("Could not lock URL " + JSON.stringify(link), { - // url: link, - // }); + jobId, + jobPriority, + ); + + await addCrawlJob(job.data.crawl_id, jobId, logger); + logger.debug("Added job for URL " + JSON.stringify(link), { + jobPriority, + url: link, + newJobId: jobId, + }); + } else { + // TODO: removed this, ok? too many 'not useful' logs (?) Mogery! + // logger.debug("Could not lock URL " + JSON.stringify(link), { + // url: link, + // }); + } } } @@ -719,7 +722,7 @@ async function kickoffGetIndexLinks( crawler: WebCrawler, url: string, ) { - if (sc.crawlerOptions.ignoreSitemap) { + if (sc.crawlerOptions.ignoreSitemap || sc.crawlerOptions.sitemapOnly) { return []; } diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 1c0eeeb373..9357ae19f3 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "4.11.2", + "version": "4.11.3", "description": "JavaScript SDK for Firecrawl API", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/apps/js-sdk/firecrawl/src/__tests__/e2e/v2/usage.test.ts b/apps/js-sdk/firecrawl/src/__tests__/e2e/v2/usage.test.ts index e11d7527e0..8cbb617521 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/e2e/v2/usage.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/e2e/v2/usage.test.ts @@ -23,15 +23,16 @@ describe("v2.usage e2e", () => { expect(typeof resp.maxConcurrency).toBe("number"); }, 60_000); - test("get_credit_usage", async () => { - const resp = await client.getCreditUsage(); - expect(typeof resp.remainingCredits).toBe("number"); - }, 60_000); - - test("get_token_usage", async () => { - const resp = await client.getTokenUsage(); - expect(typeof resp.remainingTokens).toBe("number"); - }, 60_000); + // NOTE: Disabled, broken on central team due to overflow + // test("get_credit_usage", async () => { + // const resp = await client.getCreditUsage(); + // expect(typeof resp.remainingCredits).toBe("number"); + // }, 60_000); + + // test("get_token_usage", async () => { + // const resp = await client.getTokenUsage(); + // expect(typeof resp.remainingTokens).toBe("number"); + // }, 60_000); test("get_queue_status", async () => { const resp = await client.getQueueStatus(); diff --git a/apps/js-sdk/firecrawl/src/__tests__/unit/v2/zodSchemaToJson.test.ts b/apps/js-sdk/firecrawl/src/__tests__/unit/v2/zodSchemaToJson.test.ts new file mode 100644 index 0000000000..6a20b236df --- /dev/null +++ b/apps/js-sdk/firecrawl/src/__tests__/unit/v2/zodSchemaToJson.test.ts @@ -0,0 +1,107 @@ +import { describe, test, expect } from "@jest/globals"; +import { z } from "zod"; +import { + isZodSchema, + zodSchemaToJsonSchema, + looksLikeZodShape, +} from "../../../utils/zodSchemaToJson"; + +describe("zodSchemaToJson utility", () => { + test("isZodSchema detects Zod schemas and rejects non-Zod values", () => { + expect(isZodSchema(z.object({ name: z.string() }))).toBe(true); + expect(isZodSchema(z.string())).toBe(true); + expect(isZodSchema(z.number())).toBe(true); + expect(isZodSchema(z.array(z.string()))).toBe(true); + expect(isZodSchema(z.enum(["A", "B"]))).toBe(true); + expect(isZodSchema(z.union([z.string(), z.number()]))).toBe(true); + expect(isZodSchema(z.string().optional())).toBe(true); + expect(isZodSchema(z.string().nullable())).toBe(true); + + expect(isZodSchema(null)).toBe(false); + expect(isZodSchema(undefined)).toBe(false); + expect(isZodSchema({ name: "test" })).toBe(false); + expect(isZodSchema({ type: "object", properties: {} })).toBe(false); + expect(isZodSchema("string")).toBe(false); + expect(isZodSchema(42)).toBe(false); + expect(isZodSchema([1, 2, 3])).toBe(false); + }); + + test("zodSchemaToJsonSchema converts Zod schemas to JSON Schema", () => { + const simpleSchema = z.object({ name: z.string() }); + const simpleResult = zodSchemaToJsonSchema(simpleSchema) as Record; + expect(simpleResult.type).toBe("object"); + expect(simpleResult.properties).toBeDefined(); + expect((simpleResult.properties as Record).name).toBeDefined(); + + const complexSchema = z.object({ + id: z.string().uuid(), + name: z.string().min(1).max(100), + age: z.number().min(0).max(150).optional(), + tags: z.array(z.string()), + status: z.enum(["active", "inactive"]), + metadata: z.object({ + createdAt: z.string(), + nested: z.object({ value: z.number() }), + }), + }); + const complexResult = zodSchemaToJsonSchema(complexSchema) as Record; + expect(complexResult.type).toBe("object"); + expect(complexResult.properties).toBeDefined(); + expect(complexResult.required).toContain("id"); + expect(complexResult.required).not.toContain("age"); + + const enumResult = zodSchemaToJsonSchema(z.enum(["a", "b", "c"])) as Record; + expect(enumResult.enum).toEqual(["a", "b", "c"]); + + const arrayResult = zodSchemaToJsonSchema(z.array(z.number())) as Record; + expect(arrayResult.type).toBe("array"); + expect(arrayResult.items).toBeDefined(); + }); + + test("zodSchemaToJsonSchema passes through non-Zod values unchanged", () => { + const jsonSchema = { type: "object", properties: { name: { type: "string" } } }; + expect(zodSchemaToJsonSchema(jsonSchema)).toEqual(jsonSchema); + expect(zodSchemaToJsonSchema(null)).toBe(null); + expect(zodSchemaToJsonSchema(undefined)).toBe(undefined); + expect(zodSchemaToJsonSchema("string")).toBe("string"); + expect(zodSchemaToJsonSchema(42)).toBe(42); + expect(zodSchemaToJsonSchema({ foo: "bar" })).toEqual({ foo: "bar" }); + }); + + test("looksLikeZodShape detects .shape property misuse", () => { + const schema = z.object({ title: z.string(), count: z.number() }); + expect(looksLikeZodShape(schema.shape)).toBe(true); + expect(looksLikeZodShape(schema)).toBe(false); + expect(looksLikeZodShape(null)).toBe(false); + expect(looksLikeZodShape(undefined)).toBe(false); + expect(looksLikeZodShape({ name: "test" })).toBe(false); + expect(looksLikeZodShape({})).toBe(false); + expect(looksLikeZodShape([1, 2, 3])).toBe(false); + expect(looksLikeZodShape({ type: "object", properties: {} })).toBe(false); + }); + + test("SDK-like usage: convert Zod schema or pass through JSON schema", () => { + const zodSchema = z.object({ + name: z.string(), + email: z.string().email(), + age: z.number().min(0), + }); + + if (isZodSchema(zodSchema)) { + const result = zodSchemaToJsonSchema(zodSchema) as Record; + expect(result.type).toBe("object"); + expect(result.properties).toBeDefined(); + } else { + throw new Error("Should detect Zod schema"); + } + + const existingJsonSchema = { + type: "object" as const, + properties: { title: { type: "string" as const } }, + required: ["title"] as string[], + }; + + expect(isZodSchema(existingJsonSchema)).toBe(false); + expect(zodSchemaToJsonSchema(existingJsonSchema)).toEqual(existingJsonSchema); + }); +}); diff --git a/apps/js-sdk/firecrawl/src/utils/zodSchemaToJson.ts b/apps/js-sdk/firecrawl/src/utils/zodSchemaToJson.ts new file mode 100644 index 0000000000..8f1dc70e25 --- /dev/null +++ b/apps/js-sdk/firecrawl/src/utils/zodSchemaToJson.ts @@ -0,0 +1,67 @@ +import { zodToJsonSchema as zodToJsonSchemaLib } from "zod-to-json-schema"; + +type SchemaConverter = (schema: unknown) => unknown; + +export function isZodSchema(value: unknown): boolean { + if (!value || typeof value !== "object") return false; + const schema = value as Record; + + const hasV3Markers = + "_def" in schema && + (typeof schema.safeParse === "function" || + typeof schema.parse === "function"); + + const hasV4Markers = "_zod" in schema && typeof schema._zod === "object"; + + return hasV3Markers || hasV4Markers; +} + +function isZodV4Schema(schema: unknown): boolean { + if (!schema || typeof schema !== "object") return false; + return "_zod" in schema && typeof (schema as Record)._zod === "object"; +} + +function tryZodV4Conversion(schema: unknown): Record | null { + if (!isZodV4Schema(schema)) return null; + + try { + const zodModule = (schema as Record).constructor?.prototype?.constructor; + if (zodModule && typeof (zodModule as Record).toJSONSchema === "function") { + return (zodModule as { toJSONSchema: SchemaConverter }).toJSONSchema(schema) as Record; + } + } catch { + // V4 conversion not available + } + + return null; +} + +export function zodSchemaToJsonSchema(schema: unknown): Record | unknown { + if (!isZodSchema(schema)) { + return schema; + } + + const v4Result = tryZodV4Conversion(schema); + if (v4Result) { + return v4Result; + } + + try { + return zodToJsonSchemaLib(schema as Parameters[0]) as Record; + } catch { + return schema; + } +} + +export function looksLikeZodShape(obj: unknown): boolean { + if (!obj || typeof obj !== "object" || Array.isArray(obj)) return false; + const values = Object.values(obj); + if (values.length === 0) return false; + return values.some( + (v) => + v && + typeof v === "object" && + (v as Record)._def && + typeof (v as Record).safeParse === "function" + ); +} diff --git a/apps/js-sdk/firecrawl/src/v1/index.ts b/apps/js-sdk/firecrawl/src/v1/index.ts index 21e935f651..ed72cb763f 100644 --- a/apps/js-sdk/firecrawl/src/v1/index.ts +++ b/apps/js-sdk/firecrawl/src/v1/index.ts @@ -1,6 +1,6 @@ import axios, { type AxiosResponse, type AxiosRequestHeaders, AxiosError } from "axios"; import * as zt from "zod"; -import { zodToJsonSchema } from "zod-to-json-schema"; +import { zodSchemaToJsonSchema } from "../utils/zodSchemaToJson"; import { TypedEventTarget } from "typescript-event-target"; /** @@ -707,36 +707,21 @@ export default class FirecrawlApp { } as AxiosRequestHeaders; let jsonData: any = { url, ...params, origin: typeof (params as any).origin === "string" && (params as any).origin.includes("mcp") ? (params as any).origin : `js-sdk@${this.version}` }; if (jsonData?.extract?.schema) { - let schema = jsonData.extract.schema; - - // Try parsing the schema as a Zod schema - try { - schema = zodToJsonSchema(schema); - } catch (error) { - - } jsonData = { ...jsonData, extract: { ...jsonData.extract, - schema: schema, + schema: zodSchemaToJsonSchema(jsonData.extract.schema), }, }; } if (jsonData?.jsonOptions?.schema) { - let schema = jsonData.jsonOptions.schema; - // Try parsing the schema as a Zod schema - try { - schema = zodToJsonSchema(schema); - } catch (error) { - - } jsonData = { ...jsonData, jsonOptions: { ...jsonData.jsonOptions, - schema: schema, + schema: zodSchemaToJsonSchema(jsonData.jsonOptions.schema), }, }; } @@ -793,21 +778,13 @@ export default class FirecrawlApp { }; if (jsonData?.scrapeOptions?.extract?.schema) { - let schema = jsonData.scrapeOptions.extract.schema; - - // Try parsing the schema as a Zod schema - try { - schema = zodToJsonSchema(schema); - } catch (error) { - - } jsonData = { ...jsonData, scrapeOptions: { ...jsonData.scrapeOptions, extract: { ...jsonData.scrapeOptions.extract, - schema: schema, + schema: zodSchemaToJsonSchema(jsonData.scrapeOptions.extract.schema), }, }, }; @@ -1105,36 +1082,20 @@ export default class FirecrawlApp { const headers = this.prepareHeaders(idempotencyKey); let jsonData: any = { urls, webhook, ignoreInvalidURLs, maxConcurrency, ...params, origin: typeof (params as any).origin === "string" && (params as any).origin.includes("mcp") ? (params as any).origin : `js-sdk@${this.version}` }; if (jsonData?.extract?.schema) { - let schema = jsonData.extract.schema; - - // Try parsing the schema as a Zod schema - try { - schema = zodToJsonSchema(schema); - } catch (error) { - - } jsonData = { ...jsonData, extract: { ...jsonData.extract, - schema: schema, + schema: zodSchemaToJsonSchema(jsonData.extract.schema), }, }; } if (jsonData?.jsonOptions?.schema) { - let schema = jsonData.jsonOptions.schema; - - // Try parsing the schema as a Zod schema - try { - schema = zodToJsonSchema(schema); - } catch (error) { - - } jsonData = { ...jsonData, jsonOptions: { ...jsonData.jsonOptions, - schema: schema, + schema: zodSchemaToJsonSchema(jsonData.jsonOptions.schema), }, }; } @@ -1326,20 +1287,7 @@ export default class FirecrawlApp { const headers = this.prepareHeaders(); let jsonData: { urls?: string[] } & ExtractParams = { urls: urls, ...params }; - let jsonSchema: any; - try { - if (!params?.schema) { - jsonSchema = undefined; - } else { - try { - jsonSchema = zodToJsonSchema(params.schema as zt.ZodType); - } catch (_) { - jsonSchema = params.schema; - } - } - } catch (error: any) { - throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400); - } + const jsonSchema = params?.schema ? zodSchemaToJsonSchema(params.schema) : undefined; try { const response: AxiosResponse = await this.postRequest( @@ -1396,22 +1344,8 @@ export default class FirecrawlApp { idempotencyKey?: string ): Promise { const headers = this.prepareHeaders(idempotencyKey); - let jsonData: any = { urls, ...params }; - let jsonSchema: any; - - try { - if (!params?.schema) { - jsonSchema = undefined; - } else { - try { - jsonSchema = zodToJsonSchema(params.schema as zt.ZodType); - } catch (_) { - jsonSchema = params.schema; - } - } - } catch (error: any) { - throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400); - } + const jsonData: any = { urls, ...params }; + const jsonSchema = params?.schema ? zodSchemaToJsonSchema(params.schema) : undefined; try { const response: AxiosResponse = await this.postRequest( @@ -1780,18 +1714,11 @@ export default class FirecrawlApp { let jsonData: any = { query, ...params, origin: typeof (params as any).origin === "string" && (params as any).origin.includes("mcp") ? (params as any).origin : `js-sdk@${this.version}` }; if (jsonData?.jsonOptions?.schema) { - let schema = jsonData.jsonOptions.schema; - // Try parsing the schema as a Zod schema - try { - schema = zodToJsonSchema(schema); - } catch (error) { - // Ignore error if schema can't be parsed as Zod - } jsonData = { ...jsonData, jsonOptions: { ...jsonData.jsonOptions, - schema: schema, + schema: zodSchemaToJsonSchema(jsonData.jsonOptions.schema), }, }; } diff --git a/apps/js-sdk/firecrawl/src/v2/methods/agent.ts b/apps/js-sdk/firecrawl/src/v2/methods/agent.ts index 9200cfce2f..ad7b4c482c 100644 --- a/apps/js-sdk/firecrawl/src/v2/methods/agent.ts +++ b/apps/js-sdk/firecrawl/src/v2/methods/agent.ts @@ -1,7 +1,7 @@ import { type AgentResponse, type AgentStatusResponse, type AgentWebhookConfig } from "../types"; import { HttpClient } from "../utils/httpClient"; import { normalizeAxiosError, throwForBadResponse } from "../utils/errorHandler"; -import { zodToJsonSchema } from "zod-to-json-schema"; +import { isZodSchema, zodSchemaToJsonSchema } from "../../utils/zodSchemaToJson"; import type { ZodTypeAny } from "zod"; function prepareAgentPayload(args: { @@ -18,9 +18,7 @@ function prepareAgentPayload(args: { if (args.urls) body.urls = args.urls; body.prompt = args.prompt; if (args.schema != null) { - const s: any = args.schema; - const isZod = s && (typeof s.safeParse === "function" || typeof s.parse === "function") && s._def; - body.schema = isZod ? zodToJsonSchema(s) : args.schema; + body.schema = isZodSchema(args.schema) ? zodSchemaToJsonSchema(args.schema) : args.schema; } if (args.integration && args.integration.trim()) body.integration = args.integration.trim(); if (args.maxCredits !== null && args.maxCredits !== undefined) body.maxCredits = args.maxCredits; diff --git a/apps/js-sdk/firecrawl/src/v2/methods/extract.ts b/apps/js-sdk/firecrawl/src/v2/methods/extract.ts index bf9bca09f4..11e523dca7 100644 --- a/apps/js-sdk/firecrawl/src/v2/methods/extract.ts +++ b/apps/js-sdk/firecrawl/src/v2/methods/extract.ts @@ -2,7 +2,7 @@ import { type ExtractResponse, type ScrapeOptions, type AgentOptions } from "../ import { HttpClient } from "../utils/httpClient"; import { ensureValidScrapeOptions } from "../utils/validation"; import { normalizeAxiosError, throwForBadResponse } from "../utils/errorHandler"; -import { zodToJsonSchema } from "zod-to-json-schema"; +import { isZodSchema, zodSchemaToJsonSchema } from "../../utils/zodSchemaToJson"; import type { ZodTypeAny } from "zod"; function prepareExtractPayload(args: { @@ -22,9 +22,7 @@ function prepareExtractPayload(args: { if (args.urls) body.urls = args.urls; if (args.prompt != null) body.prompt = args.prompt; if (args.schema != null) { - const s: any = args.schema; - const isZod = s && (typeof s.safeParse === "function" || typeof s.parse === "function") && s._def; - body.schema = isZod ? zodToJsonSchema(s) : args.schema; + body.schema = isZodSchema(args.schema) ? zodSchemaToJsonSchema(args.schema) : args.schema; } if (args.systemPrompt != null) body.systemPrompt = args.systemPrompt; if (args.allowExternalLinks != null) body.allowExternalLinks = args.allowExternalLinks; diff --git a/apps/js-sdk/firecrawl/src/v2/types.ts b/apps/js-sdk/firecrawl/src/v2/types.ts index 71e6d4e0a5..02be363360 100644 --- a/apps/js-sdk/firecrawl/src/v2/types.ts +++ b/apps/js-sdk/firecrawl/src/v2/types.ts @@ -460,7 +460,7 @@ export interface CrawlOptions { excludePaths?: string[] | null; includePaths?: string[] | null; maxDiscoveryDepth?: number | null; - sitemap?: 'skip' | 'include'; + sitemap?: 'skip' | 'include' | 'only'; ignoreQueryParameters?: boolean; limit?: number | null; crawlEntireDomain?: boolean; diff --git a/apps/js-sdk/firecrawl/src/v2/utils/validation.ts b/apps/js-sdk/firecrawl/src/v2/utils/validation.ts index c154a0938f..bfb1ab1ae6 100644 --- a/apps/js-sdk/firecrawl/src/v2/utils/validation.ts +++ b/apps/js-sdk/firecrawl/src/v2/utils/validation.ts @@ -1,18 +1,5 @@ import { type FormatOption, type JsonFormat, type ScrapeOptions, type ScreenshotFormat, type ChangeTrackingFormat } from "../types"; -import { zodToJsonSchema } from "zod-to-json-schema"; - -/** - * Detects if an object looks like a Zod schema's `.shape` property. - * When users mistakenly pass `schema.shape` instead of `schema`, the object - * will have Zod types as values but won't be a Zod schema itself. - */ -function looksLikeZodShape(obj: unknown): boolean { - if (!obj || typeof obj !== "object" || Array.isArray(obj)) return false; - const values = Object.values(obj); - if (values.length === 0) return false; - // Check if at least one value looks like a Zod type - return values.some((v: any) => v && typeof v === "object" && v._def && typeof v.safeParse === "function"); -} +import { isZodSchema, zodSchemaToJsonSchema, looksLikeZodShape } from "../../utils/zodSchemaToJson"; export function ensureValidFormats(formats?: FormatOption[]): void { if (!formats) return; @@ -28,17 +15,10 @@ export function ensureValidFormats(formats?: FormatOption[]): void { if (!j.prompt && !j.schema) { throw new Error("json format requires either 'prompt' or 'schema' (or both)"); } - // Flexibility: allow passing a Zod schema. Convert to JSON schema internally. - const maybeSchema: any = j.schema as any; - const isZod = !!maybeSchema && (typeof maybeSchema.safeParse === "function" || typeof maybeSchema.parse === "function") && !!maybeSchema._def; - if (isZod) { - try { - (j as any).schema = zodToJsonSchema(maybeSchema); - } catch { - // If conversion fails, leave as-is; server-side may still handle, or request will fail explicitly - } + const maybeSchema = j.schema; + if (isZodSchema(maybeSchema)) { + (j as any).schema = zodSchemaToJsonSchema(maybeSchema); } else if (looksLikeZodShape(maybeSchema)) { - // User likely passed schema.shape instead of the schema itself throw new Error( "json format schema appears to be a Zod schema's .shape property. " + "Pass the Zod schema directly (e.g., `schema: MySchema`) instead of `schema: MySchema.shape`. " + @@ -49,14 +29,9 @@ export function ensureValidFormats(formats?: FormatOption[]): void { } if ((fmt as ChangeTrackingFormat).type === "changeTracking") { const ct = fmt as ChangeTrackingFormat; - const maybeSchema: any = ct.schema as any; - const isZod = !!maybeSchema && (typeof maybeSchema.safeParse === "function" || typeof maybeSchema.parse === "function") && !!maybeSchema._def; - if (isZod) { - try { - (ct as any).schema = zodToJsonSchema(maybeSchema); - } catch { - // Best-effort conversion; if it fails, leave original value - } + const maybeSchema = ct.schema; + if (isZodSchema(maybeSchema)) { + (ct as any).schema = zodSchemaToJsonSchema(maybeSchema); } else if (looksLikeZodShape(maybeSchema)) { throw new Error( "changeTracking format schema appears to be a Zod schema's .shape property. " + diff --git a/apps/python-sdk/README.md b/apps/python-sdk/README.md index 253553b68e..50aa82ca27 100644 --- a/apps/python-sdk/README.md +++ b/apps/python-sdk/README.md @@ -87,6 +87,32 @@ crawl_status = firecrawl.get_crawl_status("") print(crawl_status) ``` +### Manual Pagination (v2) + +Crawl and batch scrape status responses may include a `next` URL when more data is available. The SDK auto-paginates by default; to page manually, disable auto-pagination and pass the opaque `next` URL back to the SDK. + +```python +from firecrawl.v2.types import PaginationConfig + +# Crawl: fetch one page at a time +crawl_job = firecrawl.start_crawl("https://firecrawl.dev", limit=100) +status = firecrawl.get_crawl_status( + crawl_job.id, + pagination_config=PaginationConfig(auto_paginate=False), +) +if status.next: + page2 = firecrawl.get_crawl_status_page(status.next) + +# Batch scrape: fetch one page at a time +batch_job = firecrawl.start_batch_scrape(["https://firecrawl.dev"]) +status = firecrawl.get_batch_scrape_status( + batch_job.id, + pagination_config=PaginationConfig(auto_paginate=False), +) +if status.next: + page2 = firecrawl.get_batch_scrape_status_page(status.next) +``` + ### Cancelling a Crawl To cancel an asynchronous crawl job, use the `cancel_crawl` method. It takes the job ID of the asynchronous crawl as a parameter and returns the cancellation status. @@ -184,4 +210,4 @@ firecrawl = Firecrawl(api_key="YOUR_API_KEY") doc_v1 = firecrawl.v1.scrape_url('https://firecrawl.dev', formats=['markdown', 'html']) crawl_v1 = firecrawl.v1.crawl_url('https://firecrawl.dev', limit=100) map_v1 = firecrawl.v1.map_url('https://firecrawl.dev') -``` \ No newline at end of file +``` diff --git a/apps/python-sdk/example_pagination.py b/apps/python-sdk/example_pagination.py index 106d578caf..589ff93923 100644 --- a/apps/python-sdk/example_pagination.py +++ b/apps/python-sdk/example_pagination.py @@ -18,7 +18,8 @@ print(f"Next URL: {crawl_result.next}") # Should be None since auto-pagination is enabled # Example 2: Manual crawl with pagination control -# Use this when you need to control how many pages to fetch or want to process results incrementally +# Use this when you need to control how many pages to fetch or want to process results incrementally. +# The next URL is opaque; pass it back to the SDK to fetch the next page. print("\n=== Example 2: Manual crawl with pagination control ===") crawl_job = firecrawl.start_crawl("https://example.com", limit=100) @@ -27,6 +28,9 @@ status = firecrawl.get_crawl_status(crawl_job.id, pagination_config=pagination_config) print(f"Documents from first page: {len(status.data)}") print(f"Next URL: {status.next}") # Will show the next page URL +if status.next: + next_page = firecrawl.get_crawl_status_page(status.next) + print(f"Documents from next page: {len(next_page.data)}") # Example 3: Limited pagination - fetch only 3 pages # Useful for controlling memory usage or processing time @@ -68,11 +72,16 @@ print(f"Batch scrape documents: {len(batch_result.data)}") # Example 8: Manual batch scrape with pagination control -# Use this when you need to control how many pages to fetch or want to process results incrementally +# Use this when you need to control how many pages to fetch or want to process results incrementally. +# The next URL is opaque; pass it back to the SDK to fetch the next page. print("\n=== Example 8: Manual batch scrape with pagination control ===") batch_job = firecrawl.start_batch_scrape(urls) -status = firecrawl.get_batch_scrape_status(batch_job.id) +status = firecrawl.get_batch_scrape_status(batch_job.id, pagination_config=PaginationConfig(auto_paginate=False)) print(f"Batch scrape documents: {len(status.data)}") +print(f"Next URL: {status.next}") +if status.next: + next_page = firecrawl.get_batch_scrape_status_page(status.next) + print(f"Batch scrape next page documents: {len(next_page.data)}") # Example 9: Async usage print("\n=== Example 9: Async pagination ===") @@ -94,6 +103,9 @@ async def async_example(): pagination_config=pagination_config ) print(f"Async crawl with pagination: {len(status.data)}") + if status.next: + next_page = await async_client.get_crawl_status_page(status.next) + print(f"Async crawl next page documents: {len(next_page.data)}") # Run async example # asyncio.run(async_example()) diff --git a/apps/python-sdk/firecrawl/__init__.py b/apps/python-sdk/firecrawl/__init__.py index 3ab4358b01..76deaa9be2 100644 --- a/apps/python-sdk/firecrawl/__init__.py +++ b/apps/python-sdk/firecrawl/__init__.py @@ -17,7 +17,7 @@ V1ChangeTrackingOptions, ) -__version__ = "4.13.1" +__version__ = "4.13.3" # Define the logger for the Firecrawl project logger: logging.Logger = logging.getLogger("firecrawl") diff --git a/apps/python-sdk/firecrawl/__tests__/e2e/v2/test_batch_scrape.py b/apps/python-sdk/firecrawl/__tests__/e2e/v2/test_batch_scrape.py index f5fb8ecbe0..a521d9b04c 100644 --- a/apps/python-sdk/firecrawl/__tests__/e2e/v2/test_batch_scrape.py +++ b/apps/python-sdk/firecrawl/__tests__/e2e/v2/test_batch_scrape.py @@ -1,8 +1,9 @@ import os +import time import pytest from dotenv import load_dotenv from firecrawl import Firecrawl -from firecrawl.v2.types import ScrapeOptions +from firecrawl.v2.types import ScrapeOptions, PaginationConfig load_dotenv() @@ -48,6 +49,39 @@ def test_start_batch_minimal_and_status(self): assert job.status in ["scraping", "completed", "failed"] assert job.total >= 0 + def test_get_batch_scrape_status_page(self): + """Fetch a single batch scrape page using the next URL.""" + urls = [f"https://docs.firecrawl.dev?batch={i}" for i in range(15)] + + start_resp = self.client.start_batch_scrape( + urls, + formats=["markdown"], + ignore_invalid_urls=True, + ) + assert start_resp.id is not None + + pagination_config = PaginationConfig(auto_paginate=False) + deadline = time.time() + 120 + status_job = None + while time.time() < deadline: + status_job = self.client.get_batch_scrape_status( + start_resp.id, + pagination_config=pagination_config, + ) + if status_job.next: + break + if status_job.status in ["completed", "failed", "cancelled"]: + break + time.sleep(2) + + assert status_job is not None + if not status_job.next: + pytest.skip("Batch scrape completed without pagination; skipping page fetch.") + + next_page = self.client.get_batch_scrape_status_page(status_job.next) + assert isinstance(next_page.data, list) + assert next_page.status in ["scraping", "completed", "failed", "cancelled"] + def test_wait_batch_with_all_params(self): """Blocking waiter with JSON and changeTracking formats plus many options.""" urls = [ @@ -103,4 +137,3 @@ def test_cancel_batch(self): cancelled = self.client.cancel_batch_scrape(start_resp.id) assert cancelled is True - diff --git a/apps/python-sdk/firecrawl/__tests__/e2e/v2/test_crawl.py b/apps/python-sdk/firecrawl/__tests__/e2e/v2/test_crawl.py index 2544445af3..6fa88780c0 100644 --- a/apps/python-sdk/firecrawl/__tests__/e2e/v2/test_crawl.py +++ b/apps/python-sdk/firecrawl/__tests__/e2e/v2/test_crawl.py @@ -3,7 +3,7 @@ import os from dotenv import load_dotenv from firecrawl import Firecrawl -from firecrawl.v2.types import ScrapeOptions +from firecrawl.v2.types import ScrapeOptions, PaginationConfig load_dotenv() @@ -66,6 +66,33 @@ def test_get_crawl_status(self): assert status_job.next is None assert isinstance(status_job.data, list) + def test_get_crawl_status_page(self): + """Fetch a single crawl page using the next URL.""" + start_job = self.client.start_crawl("https://docs.firecrawl.dev", limit=25) + assert start_job.id is not None + + pagination_config = PaginationConfig(auto_paginate=False) + deadline = time.time() + 120 + status_job = None + while time.time() < deadline: + status_job = self.client.get_crawl_status( + start_job.id, + pagination_config=pagination_config, + ) + if status_job.next: + break + if status_job.status in ["completed", "failed", "cancelled"]: + break + time.sleep(2) + + assert status_job is not None + if not status_job.next: + pytest.skip("Crawl completed without pagination; skipping page fetch.") + + next_page = self.client.get_crawl_status_page(status_job.next) + assert isinstance(next_page.data, list) + assert next_page.status in ["scraping", "completed", "failed", "cancelled"] + def test_cancel_crawl(self): """Test canceling a crawl.""" start_job = self.client.start_crawl("https://docs.firecrawl.dev", limit=3) @@ -275,4 +302,4 @@ def test_crawl_params_preview(self): assert params_data is not None assert params_data.limit is not None or params_data.include_paths is not None or params_data.max_discovery_depth is not None assert 'blog/.*' in params_data.include_paths - assert 'docs/.*' in params_data.include_paths \ No newline at end of file + assert 'docs/.*' in params_data.include_paths diff --git a/apps/python-sdk/firecrawl/__tests__/unit/v2/methods/test_crawl_params.py b/apps/python-sdk/firecrawl/__tests__/unit/v2/methods/test_crawl_params.py index 039870fb82..81ad035215 100644 --- a/apps/python-sdk/firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +++ b/apps/python-sdk/firecrawl/__tests__/unit/v2/methods/test_crawl_params.py @@ -42,7 +42,7 @@ def test_crawl_params_data_creation(self): assert data.include_paths is None assert data.exclude_paths is None assert data.max_discovery_depth is None - assert data.ignore_sitemap is False + assert data.sitemap is None assert data.limit is None assert data.crawl_entire_domain is False assert data.allow_external_links is False @@ -67,4 +67,4 @@ def test_crawl_params_data_with_values(self): assert data.limit == 50 assert data.crawl_entire_domain is True assert data.allow_external_links is False - assert data.warning == "Test warning" \ No newline at end of file + assert data.warning == "Test warning" diff --git a/apps/python-sdk/firecrawl/__tests__/unit/v2/methods/test_pagination.py b/apps/python-sdk/firecrawl/__tests__/unit/v2/methods/test_pagination.py index d05c562c7c..a872db84cb 100644 --- a/apps/python-sdk/firecrawl/__tests__/unit/v2/methods/test_pagination.py +++ b/apps/python-sdk/firecrawl/__tests__/unit/v2/methods/test_pagination.py @@ -14,10 +14,18 @@ Document, DocumentMetadata ) -from firecrawl.v2.methods.crawl import get_crawl_status, _fetch_all_pages -from firecrawl.v2.methods.batch import get_batch_scrape_status, _fetch_all_batch_pages -from firecrawl.v2.methods.aio.crawl import get_crawl_status as get_crawl_status_async, _fetch_all_pages_async -from firecrawl.v2.methods.aio.batch import get_batch_scrape_status as get_batch_scrape_status_async, _fetch_all_batch_pages_async +from firecrawl.v2.methods.crawl import get_crawl_status, get_crawl_status_page, _fetch_all_pages +from firecrawl.v2.methods.batch import get_batch_scrape_status, get_batch_scrape_status_page, _fetch_all_batch_pages +from firecrawl.v2.methods.aio.crawl import ( + get_crawl_status as get_crawl_status_async, + get_crawl_status_page as get_crawl_status_page_async, + _fetch_all_pages_async, +) +from firecrawl.v2.methods.aio.batch import ( + get_batch_scrape_status as get_batch_scrape_status_async, + get_batch_scrape_status_page as get_batch_scrape_status_page_async, + _fetch_all_batch_pages_async, +) class TestPaginationConfig: @@ -123,6 +131,59 @@ def test_get_crawl_status_propagates_request_timeout(self): self.mock_client.get.assert_called_with( f"/v2/crawl/{self.job_id}", timeout=timeout_seconds ) + + def test_get_crawl_status_page(self): + """Test get_crawl_status_page returns a single page.""" + mock_response = Mock() + mock_response.ok = True + mock_response.json.return_value = { + "success": True, + "status": "completed", + "completed": 10, + "total": 20, + "creditsUsed": 5, + "expiresAt": "2024-01-01T00:00:00Z", + "next": "https://api.firecrawl.dev/v2/crawl/test-crawl-123?page=3", + "data": [self.sample_doc], + } + + self.mock_client.get.return_value = mock_response + next_url = "https://api.firecrawl.dev/v2/crawl/test-crawl-123?page=2" + + result = get_crawl_status_page(self.mock_client, next_url) + + assert result.status == "completed" + assert result.next == "https://api.firecrawl.dev/v2/crawl/test-crawl-123?page=3" + assert len(result.data) == 1 + self.mock_client.get.assert_called_with(next_url, timeout=None) + + def test_get_crawl_status_page_propagates_request_timeout(self): + """Ensure request_timeout is forwarded to crawl status page requests.""" + mock_response = Mock() + mock_response.ok = True + mock_response.json.return_value = { + "success": True, + "status": "completed", + "completed": 1, + "total": 1, + "creditsUsed": 1, + "expiresAt": "2024-01-01T00:00:00Z", + "next": None, + "data": [self.sample_doc], + } + + self.mock_client.get.return_value = mock_response + + next_url = "https://api.firecrawl.dev/v2/crawl/test-crawl-123?page=2" + timeout_seconds = 4.2 + result = get_crawl_status_page( + self.mock_client, + next_url, + request_timeout=timeout_seconds, + ) + + assert result.status == "completed" + self.mock_client.get.assert_called_with(next_url, timeout=timeout_seconds) def test_get_crawl_status_with_pagination(self): """Test get_crawl_status with auto_paginate=True.""" @@ -326,6 +387,59 @@ def test_get_batch_scrape_status_no_pagination(self): assert result.next == "https://api.firecrawl.dev/v2/batch/scrape/test-batch-123?page=2" assert len(result.data) == 1 assert isinstance(result.data[0], Document) + + def test_get_batch_scrape_status_page(self): + """Test get_batch_scrape_status_page returns a single page.""" + mock_response = Mock() + mock_response.ok = True + mock_response.json.return_value = { + "success": True, + "status": "completed", + "completed": 10, + "total": 20, + "creditsUsed": 5, + "expiresAt": "2024-01-01T00:00:00Z", + "next": "https://api.firecrawl.dev/v2/batch/scrape/test-batch-123?page=3", + "data": [self.sample_doc], + } + + self.mock_client.get.return_value = mock_response + next_url = "https://api.firecrawl.dev/v2/batch/scrape/test-batch-123?page=2" + + result = get_batch_scrape_status_page(self.mock_client, next_url) + + assert result.status == "completed" + assert result.next == "https://api.firecrawl.dev/v2/batch/scrape/test-batch-123?page=3" + assert len(result.data) == 1 + self.mock_client.get.assert_called_with(next_url, timeout=None) + + def test_get_batch_scrape_status_page_propagates_request_timeout(self): + """Ensure request_timeout is forwarded to batch status page requests.""" + mock_response = Mock() + mock_response.ok = True + mock_response.json.return_value = { + "success": True, + "status": "completed", + "completed": 1, + "total": 1, + "creditsUsed": 1, + "expiresAt": "2024-01-01T00:00:00Z", + "next": None, + "data": [self.sample_doc], + } + + self.mock_client.get.return_value = mock_response + + next_url = "https://api.firecrawl.dev/v2/batch/scrape/test-batch-123?page=2" + timeout_seconds = 2.7 + result = get_batch_scrape_status_page( + self.mock_client, + next_url, + request_timeout=timeout_seconds, + ) + + assert result.status == "completed" + self.mock_client.get.assert_called_with(next_url, timeout=timeout_seconds) def test_get_batch_scrape_status_with_pagination(self): """Test get_batch_scrape_status with auto_paginate=True.""" @@ -493,6 +607,61 @@ async def test_get_crawl_status_async_propagates_request_timeout(self): f"/v2/crawl/{self.job_id}", timeout=timeout_seconds ) + @pytest.mark.asyncio + async def test_get_crawl_status_page_async(self): + """Test async get_crawl_status_page returns a single page.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "success": True, + "status": "completed", + "completed": 10, + "total": 20, + "creditsUsed": 5, + "expiresAt": "2024-01-01T00:00:00Z", + "next": "https://api.firecrawl.dev/v2/crawl/test-async-123?page=3", + "data": [self.sample_doc], + } + + self.mock_client.get.return_value = mock_response + next_url = "https://api.firecrawl.dev/v2/crawl/test-async-123?page=2" + + result = await get_crawl_status_page_async(self.mock_client, next_url) + + assert result.status == "completed" + assert result.next == "https://api.firecrawl.dev/v2/crawl/test-async-123?page=3" + assert len(result.data) == 1 + self.mock_client.get.assert_awaited_with(next_url, timeout=None) + + @pytest.mark.asyncio + async def test_get_crawl_status_page_async_propagates_request_timeout(self): + """Ensure async request_timeout is forwarded to crawl status page requests.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "success": True, + "status": "completed", + "completed": 1, + "total": 1, + "creditsUsed": 1, + "expiresAt": "2024-01-01T00:00:00Z", + "next": None, + "data": [self.sample_doc], + } + + self.mock_client.get.return_value = mock_response + + next_url = "https://api.firecrawl.dev/v2/crawl/test-async-123?page=2" + timeout_seconds = 6.1 + result = await get_crawl_status_page_async( + self.mock_client, + next_url, + request_timeout=timeout_seconds, + ) + + assert result.status == "completed" + self.mock_client.get.assert_awaited_with(next_url, timeout=timeout_seconds) + @pytest.mark.asyncio async def test_get_batch_scrape_status_async_with_pagination(self): """Test async get_batch_scrape_status with pagination.""" @@ -534,6 +703,61 @@ async def test_get_batch_scrape_status_async_with_pagination(self): assert result.next is None assert len(result.data) == 2 assert self.mock_client.get.call_count == 2 + + @pytest.mark.asyncio + async def test_get_batch_scrape_status_page_async(self): + """Test async get_batch_scrape_status_page returns a single page.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "success": True, + "status": "completed", + "completed": 10, + "total": 20, + "creditsUsed": 5, + "expiresAt": "2024-01-01T00:00:00Z", + "next": "https://api.firecrawl.dev/v2/batch/scrape/test-async-123?page=3", + "data": [self.sample_doc], + } + + self.mock_client.get.return_value = mock_response + next_url = "https://api.firecrawl.dev/v2/batch/scrape/test-async-123?page=2" + + result = await get_batch_scrape_status_page_async(self.mock_client, next_url) + + assert result.status == "completed" + assert result.next == "https://api.firecrawl.dev/v2/batch/scrape/test-async-123?page=3" + assert len(result.data) == 1 + self.mock_client.get.assert_awaited_with(next_url, timeout=None) + + @pytest.mark.asyncio + async def test_get_batch_scrape_status_page_async_propagates_request_timeout(self): + """Ensure async request_timeout is forwarded to batch status page requests.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "success": True, + "status": "completed", + "completed": 1, + "total": 1, + "creditsUsed": 1, + "expiresAt": "2024-01-01T00:00:00Z", + "next": None, + "data": [self.sample_doc], + } + + self.mock_client.get.return_value = mock_response + + next_url = "https://api.firecrawl.dev/v2/batch/scrape/test-async-123?page=2" + timeout_seconds = 4.4 + result = await get_batch_scrape_status_page_async( + self.mock_client, + next_url, + request_timeout=timeout_seconds, + ) + + assert result.status == "completed" + self.mock_client.get.assert_awaited_with(next_url, timeout=timeout_seconds) @pytest.mark.asyncio async def test_fetch_all_pages_async_limits(self): diff --git a/apps/python-sdk/firecrawl/client.py b/apps/python-sdk/firecrawl/client.py index db5c1c0a90..c15936fe0d 100644 --- a/apps/python-sdk/firecrawl/client.py +++ b/apps/python-sdk/firecrawl/client.py @@ -61,6 +61,7 @@ def __init__(self, client_instance: Optional[V2FirecrawlClient]): self.crawl = client_instance.crawl self.start_crawl = client_instance.start_crawl self.get_crawl_status = client_instance.get_crawl_status + self.get_crawl_status_page = client_instance.get_crawl_status_page self.cancel_crawl = client_instance.cancel_crawl self.get_crawl_errors = client_instance.get_crawl_errors self.get_active_crawls = client_instance.get_active_crawls @@ -78,6 +79,7 @@ def __init__(self, client_instance: Optional[V2FirecrawlClient]): self.start_batch_scrape = client_instance.start_batch_scrape self.get_batch_scrape_status = client_instance.get_batch_scrape_status + self.get_batch_scrape_status_page = client_instance.get_batch_scrape_status_page self.cancel_batch_scrape = client_instance.cancel_batch_scrape self.batch_scrape = client_instance.batch_scrape self.get_batch_scrape_errors = client_instance.get_batch_scrape_errors @@ -127,6 +129,7 @@ def __init__(self, client_instance: Optional[AsyncFirecrawlClient] = None): self.start_crawl = client_instance.start_crawl self.wait_crawl = client_instance.wait_crawl self.get_crawl_status = client_instance.get_crawl_status + self.get_crawl_status_page = client_instance.get_crawl_status_page self.cancel_crawl = client_instance.cancel_crawl self.get_crawl_errors = client_instance.get_crawl_errors self.get_active_crawls = client_instance.get_active_crawls @@ -144,6 +147,7 @@ def __init__(self, client_instance: Optional[AsyncFirecrawlClient] = None): self.start_batch_scrape = client_instance.start_batch_scrape self.get_batch_scrape_status = client_instance.get_batch_scrape_status + self.get_batch_scrape_status_page = client_instance.get_batch_scrape_status_page self.cancel_batch_scrape = client_instance.cancel_batch_scrape self.wait_batch_scrape = client_instance.wait_batch_scrape self.batch_scrape = client_instance.batch_scrape @@ -198,6 +202,7 @@ def __init__(self, api_key: str = None, api_url: str = "https://api.firecrawl.de self.start_crawl = self._v2_client.start_crawl self.crawl_params_preview = self._v2_client.crawl_params_preview self.get_crawl_status = self._v2_client.get_crawl_status + self.get_crawl_status_page = self._v2_client.get_crawl_status_page self.cancel_crawl = self._v2_client.cancel_crawl self.get_crawl_errors = self._v2_client.get_crawl_errors self.get_active_crawls = self._v2_client.get_active_crawls @@ -205,6 +210,7 @@ def __init__(self, api_key: str = None, api_url: str = "https://api.firecrawl.de self.start_batch_scrape = self._v2_client.start_batch_scrape self.get_batch_scrape_status = self._v2_client.get_batch_scrape_status + self.get_batch_scrape_status_page = self._v2_client.get_batch_scrape_status_page self.cancel_batch_scrape = self._v2_client.cancel_batch_scrape self.batch_scrape = self._v2_client.batch_scrape self.get_batch_scrape_errors = self._v2_client.get_batch_scrape_errors @@ -248,6 +254,7 @@ def __init__(self, api_key: str = None, api_url: str = "https://api.firecrawl.de self.start_crawl = self._v2_client.start_crawl self.get_crawl_status = self._v2_client.get_crawl_status + self.get_crawl_status_page = self._v2_client.get_crawl_status_page self.cancel_crawl = self._v2_client.cancel_crawl self.crawl = self._v2_client.crawl self.get_crawl_errors = self._v2_client.get_crawl_errors @@ -256,6 +263,7 @@ def __init__(self, api_key: str = None, api_url: str = "https://api.firecrawl.de self.start_batch_scrape = self._v2_client.start_batch_scrape self.get_batch_scrape_status = self._v2_client.get_batch_scrape_status + self.get_batch_scrape_status_page = self._v2_client.get_batch_scrape_status_page self.cancel_batch_scrape = self._v2_client.cancel_batch_scrape self.batch_scrape = self._v2_client.batch_scrape self.get_batch_scrape_errors = self._v2_client.get_batch_scrape_errors @@ -278,4 +286,4 @@ def __init__(self, api_key: str = None, api_url: str = "https://api.firecrawl.de # Export Firecrawl as an alias for FirecrawlApp FirecrawlApp = Firecrawl -AsyncFirecrawlApp = AsyncFirecrawl \ No newline at end of file +AsyncFirecrawlApp = AsyncFirecrawl diff --git a/apps/python-sdk/firecrawl/v2/client.py b/apps/python-sdk/firecrawl/v2/client.py index 71e5b0bdde..8a476169ed 100644 --- a/apps/python-sdk/firecrawl/v2/client.py +++ b/apps/python-sdk/firecrawl/v2/client.py @@ -229,7 +229,8 @@ def crawl( exclude_paths: Optional[List[str]] = None, include_paths: Optional[List[str]] = None, max_discovery_depth: Optional[int] = None, - ignore_sitemap: bool = False, + sitemap: Optional[Literal["only", "include", "skip"]] = None, + ignore_sitemap: Optional[bool] = None, ignore_query_parameters: bool = False, limit: Optional[int] = None, crawl_entire_domain: bool = False, @@ -254,7 +255,8 @@ def crawl( exclude_paths: Patterns of URLs to exclude include_paths: Patterns of URLs to include max_discovery_depth: Maximum depth for finding new URLs - ignore_sitemap: Skip sitemap.xml processing + sitemap: Sitemap usage mode ("only" | "include" | "skip") + ignore_sitemap: Deprecated alias for sitemap ("skip" when true, "include" when false) ignore_query_parameters: Ignore URL parameters limit: Maximum pages to crawl crawl_entire_domain: Follow parent directory links @@ -277,25 +279,32 @@ def crawl( Exception: If the crawl fails to start or complete TimeoutError: If timeout is reached """ - request = CrawlRequest( - url=url, - prompt=prompt, - exclude_paths=exclude_paths, - include_paths=include_paths, - max_discovery_depth=max_discovery_depth, - ignore_sitemap=ignore_sitemap, - ignore_query_parameters=ignore_query_parameters, - limit=limit, - crawl_entire_domain=crawl_entire_domain, - allow_external_links=allow_external_links, - allow_subdomains=allow_subdomains, - delay=delay, - max_concurrency=max_concurrency, - webhook=webhook, - scrape_options=scrape_options, - zero_data_retention=zero_data_retention, - integration=integration, - ) + resolved_sitemap = sitemap + if resolved_sitemap is None and ignore_sitemap is not None: + resolved_sitemap = "skip" if ignore_sitemap else "include" + + request_kwargs = { + "url": url, + "prompt": prompt, + "exclude_paths": exclude_paths, + "include_paths": include_paths, + "max_discovery_depth": max_discovery_depth, + "ignore_query_parameters": ignore_query_parameters, + "limit": limit, + "crawl_entire_domain": crawl_entire_domain, + "allow_external_links": allow_external_links, + "allow_subdomains": allow_subdomains, + "delay": delay, + "max_concurrency": max_concurrency, + "webhook": webhook, + "scrape_options": scrape_options, + "zero_data_retention": zero_data_retention, + "integration": integration, + } + if resolved_sitemap is not None: + request_kwargs["sitemap"] = resolved_sitemap + + request = CrawlRequest(**request_kwargs) return crawl_module.crawl( self.http_client, @@ -313,7 +322,8 @@ def start_crawl( exclude_paths: Optional[List[str]] = None, include_paths: Optional[List[str]] = None, max_discovery_depth: Optional[int] = None, - ignore_sitemap: bool = False, + sitemap: Optional[Literal["only", "include", "skip"]] = None, + ignore_sitemap: Optional[bool] = None, ignore_query_parameters: bool = False, limit: Optional[int] = None, crawl_entire_domain: bool = False, @@ -335,7 +345,8 @@ def start_crawl( exclude_paths: Patterns of URLs to exclude include_paths: Patterns of URLs to include max_discovery_depth: Maximum depth for finding new URLs - ignore_sitemap: Skip sitemap.xml processing + sitemap: Sitemap usage mode ("only" | "include" | "skip") + ignore_sitemap: Deprecated alias for sitemap ("skip" when true, "include" when false) ignore_query_parameters: Ignore URL parameters limit: Maximum pages to crawl crawl_entire_domain: Follow parent directory links @@ -354,25 +365,32 @@ def start_crawl( ValueError: If request is invalid Exception: If the crawl operation fails to start """ - request = CrawlRequest( - url=url, - prompt=prompt, - exclude_paths=exclude_paths, - include_paths=include_paths, - max_discovery_depth=max_discovery_depth, - ignore_sitemap=ignore_sitemap, - ignore_query_parameters=ignore_query_parameters, - limit=limit, - crawl_entire_domain=crawl_entire_domain, - allow_external_links=allow_external_links, - allow_subdomains=allow_subdomains, - delay=delay, - max_concurrency=max_concurrency, - webhook=webhook, - scrape_options=scrape_options, - zero_data_retention=zero_data_retention, - integration=integration, - ) + resolved_sitemap = sitemap + if resolved_sitemap is None and ignore_sitemap is not None: + resolved_sitemap = "skip" if ignore_sitemap else "include" + + request_kwargs = { + "url": url, + "prompt": prompt, + "exclude_paths": exclude_paths, + "include_paths": include_paths, + "max_discovery_depth": max_discovery_depth, + "ignore_query_parameters": ignore_query_parameters, + "limit": limit, + "crawl_entire_domain": crawl_entire_domain, + "allow_external_links": allow_external_links, + "allow_subdomains": allow_subdomains, + "delay": delay, + "max_concurrency": max_concurrency, + "webhook": webhook, + "scrape_options": scrape_options, + "zero_data_retention": zero_data_retention, + "integration": integration, + } + if resolved_sitemap is not None: + request_kwargs["sitemap"] = resolved_sitemap + + request = CrawlRequest(**request_kwargs) return crawl_module.start_crawl(self.http_client, request) @@ -405,6 +423,28 @@ def get_crawl_status( pagination_config=pagination_config, request_timeout=request_timeout, ) + + def get_crawl_status_page( + self, + next_url: str, + *, + request_timeout: Optional[float] = None, + ) -> CrawlJob: + """ + Fetch a single page of crawl results using a next URL. + + Args: + next_url: Opaque next URL from a prior crawl status response + request_timeout: Timeout (in seconds) for the HTTP request + + Returns: + CrawlJob with the page data and next URL (if any) + """ + return crawl_module.get_crawl_status_page( + self.http_client, + next_url, + request_timeout=request_timeout, + ) def get_crawl_errors(self, crawl_id: str) -> CrawlErrorsResponse: """ @@ -723,6 +763,27 @@ def get_batch_scrape_status( pagination_config=pagination_config ) + def get_batch_scrape_status_page( + self, + next_url: str, + *, + request_timeout: Optional[float] = None, + ): + """Fetch a single page of batch scrape results using a next URL. + + Args: + next_url: Opaque next URL from a prior batch scrape status response + request_timeout: Timeout (in seconds) for the HTTP request + + Returns: + BatchScrapeJob with the page data and next URL (if any) + """ + return batch_module.get_batch_scrape_status_page( + self.http_client, + next_url, + request_timeout=request_timeout, + ) + def cancel_batch_scrape(self, job_id: str) -> bool: """Cancel a running batch scrape job. @@ -977,4 +1038,4 @@ def batch_scrape( poll_interval=poll_interval, timeout=wait_timeout, ) - \ No newline at end of file + diff --git a/apps/python-sdk/firecrawl/v2/client_async.py b/apps/python-sdk/firecrawl/v2/client_async.py index ee20f34e37..5bd4829777 100644 --- a/apps/python-sdk/firecrawl/v2/client_async.py +++ b/apps/python-sdk/firecrawl/v2/client_async.py @@ -81,6 +81,13 @@ async def search( return await async_search.search(self.async_http_client, request) async def start_crawl(self, url: str, **kwargs) -> CrawlResponse: + sitemap = kwargs.pop("sitemap", None) + ignore_sitemap = kwargs.pop("ignore_sitemap", None) + if sitemap is None and ignore_sitemap is not None: + sitemap = "skip" if ignore_sitemap else "include" + if sitemap is not None: + kwargs["sitemap"] = sitemap + request = CrawlRequest(url=url, **kwargs) return await async_crawl.start_crawl(self.async_http_client, request) @@ -171,6 +178,28 @@ async def get_crawl_status( request_timeout=request_timeout, ) + async def get_crawl_status_page( + self, + next_url: str, + *, + request_timeout: Optional[float] = None, + ) -> CrawlJob: + """ + Fetch a single page of crawl results using a next URL. + + Args: + next_url: Opaque next URL from a prior crawl status response + request_timeout: Timeout (in seconds) for the HTTP request + + Returns: + CrawlJob with the page data and next URL (if any) + """ + return await async_crawl.get_crawl_status_page( + self.async_http_client, + next_url, + request_timeout=request_timeout, + ) + async def cancel_crawl(self, job_id: str) -> bool: return await async_crawl.cancel_crawl(self.async_http_client, job_id) @@ -241,6 +270,18 @@ async def get_batch_scrape_status( pagination_config=pagination_config ) + async def get_batch_scrape_status_page( + self, + next_url: str, + *, + request_timeout: Optional[float] = None, + ): + return await async_batch.get_batch_scrape_status_page( + self.async_http_client, + next_url, + request_timeout=request_timeout, + ) + async def cancel_batch_scrape(self, job_id: str) -> bool: return await async_batch.cancel_batch_scrape(self.async_http_client, job_id) @@ -414,4 +455,3 @@ def watcher( timeout: Optional[int] = None, ) -> AsyncWatcher: return AsyncWatcher(self, job_id, kind=kind, poll_interval=poll_interval, timeout=timeout) - diff --git a/apps/python-sdk/firecrawl/v2/methods/aio/batch.py b/apps/python-sdk/firecrawl/v2/methods/aio/batch.py index 958cd78db0..4585f44439 100644 --- a/apps/python-sdk/firecrawl/v2/methods/aio/batch.py +++ b/apps/python-sdk/firecrawl/v2/methods/aio/batch.py @@ -7,6 +7,29 @@ from ...methods.batch import validate_batch_urls import time +def _parse_batch_scrape_documents(data_list: Optional[List[Any]]) -> List[Document]: + documents: List[Document] = [] + for doc in data_list or []: + if isinstance(doc, dict): + normalized = normalize_document_input(doc) + documents.append(Document(**normalized)) + return documents + + +def _parse_batch_scrape_status_response(body: Dict[str, Any]) -> Dict[str, Any]: + if not body.get("success"): + raise Exception(body.get("error", "Unknown error occurred")) + + return { + "status": body.get("status"), + "completed": body.get("completed", 0), + "total": body.get("total", 0), + "credits_used": body.get("creditsUsed"), + "expires_at": body.get("expiresAt"), + "next": body.get("next"), + "data": _parse_batch_scrape_documents(body.get("data", []) or []), + } + def _prepare(urls: List[str], *, options: Optional[ScrapeOptions] = None, **kwargs) -> Dict[str, Any]: if not urls: raise ValueError("URLs list cannot be empty") @@ -68,35 +91,66 @@ async def get_batch_scrape_status( if response.status_code >= 400: handle_response_error(response, "get batch scrape status") body = response.json() - if not body.get("success"): - raise Exception(body.get("error", "Unknown error occurred")) - docs: List[Document] = [] - for doc in body.get("data", []) or []: - if isinstance(doc, dict): - normalized = normalize_document_input(doc) - docs.append(Document(**normalized)) + payload = _parse_batch_scrape_status_response(body) + docs = payload["data"] # Handle pagination if requested auto_paginate = pagination_config.auto_paginate if pagination_config else True - if auto_paginate and body.get("next"): + if auto_paginate and payload["next"]: docs = await _fetch_all_batch_pages_async( client, - body.get("next"), + payload["next"], docs, pagination_config ) return BatchScrapeJob( - status=body.get("status"), - completed=body.get("completed", 0), - total=body.get("total", 0), - credits_used=body.get("creditsUsed"), - expires_at=body.get("expiresAt"), - next=body.get("next") if not auto_paginate else None, + status=payload["status"], + completed=payload["completed"], + total=payload["total"], + credits_used=payload["credits_used"], + expires_at=payload["expires_at"], + next=payload["next"] if not auto_paginate else None, data=docs, ) +async def get_batch_scrape_status_page( + client: AsyncHttpClient, + next_url: str, + *, + request_timeout: Optional[float] = None, +) -> BatchScrapeJob: + """ + Fetch a single page of batch scrape results using the provided next URL. + + Args: + client: Async HTTP client instance + next_url: Opaque next URL from a prior batch scrape status response + request_timeout: Timeout (in seconds) for the HTTP request + + Returns: + BatchScrapeJob with the page data and next URL (if any) + + Raises: + Exception: If the request fails or returns an error response + """ + response = await client.get(next_url, timeout=request_timeout) + if response.status_code >= 400: + handle_response_error(response, "get batch scrape status page") + body = response.json() + payload = _parse_batch_scrape_status_response(body) + return BatchScrapeJob( + status=payload["status"], + completed=payload["completed"], + total=payload["total"], + credits_used=payload["credits_used"], + expires_at=payload["expires_at"], + next=payload["next"], + data=payload["data"], + ) + + async def _fetch_all_batch_pages_async( client: AsyncHttpClient, next_url: str, @@ -145,25 +199,24 @@ async def _fetch_all_batch_pages_async( break page_data = response.json() - - if not page_data.get("success"): + try: + page_payload = _parse_batch_scrape_status_response(page_data) + except Exception: break # Add documents from this page - for doc in page_data.get("data", []) or []: - if isinstance(doc, dict): - # Check max_results limit - if (max_results is not None) and (len(documents) >= max_results): - break - normalized = normalize_document_input(doc) - documents.append(Document(**normalized)) + for document in page_payload["data"]: + # Check max_results limit + if (max_results is not None) and (len(documents) >= max_results): + break + documents.append(document) # Check if we hit max_results limit if (max_results is not None) and (len(documents) >= max_results): break # Get next URL - current_url = page_data.get("next") + current_url = page_payload["next"] page_count += 1 return documents @@ -185,4 +238,3 @@ async def get_batch_scrape_errors(client: AsyncHttpClient, job_id: str) -> Dict[ if not body.get("success"): raise Exception(body.get("error", "Unknown error occurred")) return body - diff --git a/apps/python-sdk/firecrawl/v2/methods/aio/crawl.py b/apps/python-sdk/firecrawl/v2/methods/aio/crawl.py index 5d1d788303..3547353452 100644 --- a/apps/python-sdk/firecrawl/v2/methods/aio/crawl.py +++ b/apps/python-sdk/firecrawl/v2/methods/aio/crawl.py @@ -43,7 +43,7 @@ def _prepare_crawl_request(request: CrawlRequest) -> dict: "include_paths": "includePaths", "exclude_paths": "excludePaths", "max_discovery_depth": "maxDiscoveryDepth", - "ignore_sitemap": "ignoreSitemap", + "sitemap": "sitemap", "ignore_query_parameters": "ignoreQueryParameters", "crawl_entire_domain": "crawlEntireDomain", "allow_external_links": "allowExternalLinks", @@ -61,6 +61,30 @@ def _prepare_crawl_request(request: CrawlRequest) -> dict: return data +def _parse_crawl_documents(data_list: Optional[List[Any]]) -> List[Document]: + documents: List[Document] = [] + for doc_data in data_list or []: + if isinstance(doc_data, dict): + normalized = normalize_document_input(doc_data) + documents.append(Document(**normalized)) + return documents + + +def _parse_crawl_status_response(body: Dict[str, Any]) -> Dict[str, Any]: + if not body.get("success"): + raise Exception(body.get("error", "Unknown error occurred")) + + return { + "status": body.get("status"), + "completed": body.get("completed", 0), + "total": body.get("total", 0), + "credits_used": body.get("creditsUsed", 0), + "expires_at": body.get("expiresAt"), + "next": body.get("next"), + "data": _parse_crawl_documents(body.get("data", [])), + } + + async def start_crawl(client: AsyncHttpClient, request: CrawlRequest) -> CrawlResponse: """ Start a crawl job for a website. @@ -114,34 +138,66 @@ async def get_crawl_status( if response.status_code >= 400: handle_response_error(response, "get crawl status") body = response.json() - if body.get("success"): - documents = [] - for doc_data in body.get("data", []): - if isinstance(doc_data, dict): - normalized = normalize_document_input(doc_data) - documents.append(Document(**normalized)) - - # Handle pagination if requested - auto_paginate = pagination_config.auto_paginate if pagination_config else True - if auto_paginate and body.get("next"): - documents = await _fetch_all_pages_async( - client, - body.get("next"), - documents, - pagination_config, - request_timeout=request_timeout, - ) - - return CrawlJob( - status=body.get("status"), - completed=body.get("completed", 0), - total=body.get("total", 0), - credits_used=body.get("creditsUsed", 0), - expires_at=body.get("expiresAt"), - next=body.get("next") if not auto_paginate else None, - data=documents, + payload = _parse_crawl_status_response(body) + + documents = payload["data"] + + # Handle pagination if requested + auto_paginate = pagination_config.auto_paginate if pagination_config else True + if auto_paginate and payload["next"]: + documents = await _fetch_all_pages_async( + client, + payload["next"], + documents, + pagination_config, + request_timeout=request_timeout, ) - raise Exception(body.get("error", "Unknown error occurred")) + + return CrawlJob( + status=payload["status"], + completed=payload["completed"], + total=payload["total"], + credits_used=payload["credits_used"], + expires_at=payload["expires_at"], + next=payload["next"] if not auto_paginate else None, + data=documents, + ) + + +async def get_crawl_status_page( + client: AsyncHttpClient, + next_url: str, + *, + request_timeout: Optional[float] = None, +) -> CrawlJob: + """ + Fetch a single page of crawl results using the provided next URL. + + Args: + client: Async HTTP client instance + next_url: Opaque next URL from a prior crawl status response + request_timeout: Timeout (in seconds) for the HTTP request + + Returns: + CrawlJob with the page data and next URL (if any) + + Raises: + Exception: If the request fails or returns an error response + """ + response = await client.get(next_url, timeout=request_timeout) + if response.status_code >= 400: + handle_response_error(response, "get crawl status page") + body = response.json() + payload = _parse_crawl_status_response(body) + return CrawlJob( + status=payload["status"], + completed=payload["completed"], + total=payload["total"], + credits_used=payload["credits_used"], + expires_at=payload["expires_at"], + next=payload["next"], + data=payload["data"], + ) async def _fetch_all_pages_async( @@ -195,25 +251,24 @@ async def _fetch_all_pages_async( break page_data = response.json() - - if not page_data.get("success"): + try: + page_payload = _parse_crawl_status_response(page_data) + except Exception: break # Add documents from this page - for doc_data in page_data.get("data", []): - if isinstance(doc_data, dict): - # Check max_results limit - if (max_results is not None) and (len(documents) >= max_results): - break - normalized = normalize_document_input(doc_data) - documents.append(Document(**normalized)) + for document in page_payload["data"]: + # Check max_results limit + if (max_results is not None) and (len(documents) >= max_results): + break + documents.append(document) # Check if we hit max_results limit if (max_results is not None) and (len(documents) >= max_results): break # Get next URL - current_url = page_data.get("next") + current_url = page_payload["next"] page_count += 1 return documents @@ -272,7 +327,7 @@ async def crawl_params_preview(client: AsyncHttpClient, request: CrawlParamsRequ "includePaths": "include_paths", "excludePaths": "exclude_paths", "maxDiscoveryDepth": "max_discovery_depth", - "ignoreSitemap": "ignore_sitemap", + "sitemap": "sitemap", "ignoreQueryParameters": "ignore_query_parameters", "crawlEntireDomain": "crawl_entire_domain", "allowExternalLinks": "allow_external_links", @@ -348,4 +403,3 @@ async def get_active_crawls(client: AsyncHttpClient) -> ActiveCrawlsResponse: "options": c.get("options"), }) return ActiveCrawlsResponse(success=True, crawls=[ActiveCrawl(**nc) for nc in normalized]) - diff --git a/apps/python-sdk/firecrawl/v2/methods/batch.py b/apps/python-sdk/firecrawl/v2/methods/batch.py index 53d2ea44dc..2b3477d663 100644 --- a/apps/python-sdk/firecrawl/v2/methods/batch.py +++ b/apps/python-sdk/firecrawl/v2/methods/batch.py @@ -18,6 +18,30 @@ from ..types import CrawlErrorsResponse +def _parse_batch_scrape_documents(data_list: Optional[List[Any]]) -> List[Document]: + documents: List[Document] = [] + for doc in data_list or []: + if isinstance(doc, dict): + normalized = normalize_document_input(doc) + documents.append(Document(**normalized)) + return documents + + +def _parse_batch_scrape_status_response(body: Dict[str, Any]) -> Dict[str, Any]: + if not body.get("success"): + raise Exception(body.get("error", "Unknown error occurred")) + + return { + "status": body.get("status"), + "completed": body.get("completed", 0), + "total": body.get("total", 0), + "credits_used": body.get("creditsUsed"), + "expires_at": body.get("expiresAt"), + "next": body.get("next"), + "data": _parse_batch_scrape_documents(body.get("data", []) or []), + } + + def start_batch_scrape( client: HttpClient, urls: List[str], @@ -104,37 +128,69 @@ def get_batch_scrape_status( # Parse response body = response.json() - if not body.get("success"): - raise Exception(body.get("error", "Unknown error occurred")) - - # Convert documents - documents: List[Document] = [] - for doc in body.get("data", []) or []: - if isinstance(doc, dict): - normalized = normalize_document_input(doc) - documents.append(Document(**normalized)) + payload = _parse_batch_scrape_status_response(body) + documents = payload["data"] # Handle pagination if requested auto_paginate = pagination_config.auto_paginate if pagination_config else True - if auto_paginate and body.get("next"): + if auto_paginate and payload["next"]: documents = _fetch_all_batch_pages( client, - body.get("next"), + payload["next"], documents, pagination_config ) return BatchScrapeJob( - status=body.get("status"), - completed=body.get("completed", 0), - total=body.get("total", 0), - credits_used=body.get("creditsUsed"), - expires_at=body.get("expiresAt"), - next=body.get("next") if not auto_paginate else None, + status=payload["status"], + completed=payload["completed"], + total=payload["total"], + credits_used=payload["credits_used"], + expires_at=payload["expires_at"], + next=payload["next"] if not auto_paginate else None, data=documents, ) +def get_batch_scrape_status_page( + client: HttpClient, + next_url: str, + *, + request_timeout: Optional[float] = None, +) -> BatchScrapeJob: + """ + Fetch a single page of batch scrape results using the provided next URL. + + Args: + client: HTTP client instance + next_url: Opaque next URL from a prior batch scrape status response + request_timeout: Timeout (in seconds) for the HTTP request + + Returns: + BatchScrapeJob with the page data and next URL (if any) + + Raises: + Exception: If the request fails or returns an error response + """ + response = client.get(next_url, timeout=request_timeout) + + if not response.ok: + handle_response_error(response, "get batch scrape status page") + + body = response.json() + payload = _parse_batch_scrape_status_response(body) + + return BatchScrapeJob( + status=payload["status"], + completed=payload["completed"], + total=payload["total"], + credits_used=payload["credits_used"], + expires_at=payload["expires_at"], + next=payload["next"], + data=payload["data"], + ) + + def _fetch_all_batch_pages( client: HttpClient, next_url: str, @@ -183,25 +239,24 @@ def _fetch_all_batch_pages( break page_data = response.json() - - if not page_data.get("success"): + try: + page_payload = _parse_batch_scrape_status_response(page_data) + except Exception: break # Add documents from this page - for doc in page_data.get("data", []) or []: - if isinstance(doc, dict): - # Check max_results limit - if max_results is not None and len(documents) >= max_results: - break - normalized = normalize_document_input(doc) - documents.append(Document(**normalized)) + for document in page_payload["data"]: + # Check max_results limit + if max_results is not None and len(documents) >= max_results: + break + documents.append(document) # Check if we hit max_results limit after adding all docs from this page if max_results is not None and len(documents) >= max_results: break # Get next URL - current_url = page_data.get("next") + current_url = page_payload["next"] page_count += 1 return documents @@ -496,4 +551,4 @@ def get_batch_scrape_errors(client: HttpClient, job_id: str) -> CrawlErrorsRespo "errors": payload.get("errors", []), "robots_blocked": payload.get("robotsBlocked", payload.get("robots_blocked", [])), } - return CrawlErrorsResponse(**normalized) \ No newline at end of file + return CrawlErrorsResponse(**normalized) diff --git a/apps/python-sdk/firecrawl/v2/methods/crawl.py b/apps/python-sdk/firecrawl/v2/methods/crawl.py index 5c91b02d4c..1730fc7263 100644 --- a/apps/python-sdk/firecrawl/v2/methods/crawl.py +++ b/apps/python-sdk/firecrawl/v2/methods/crawl.py @@ -106,6 +106,29 @@ def _prepare_crawl_request(request: CrawlRequest) -> dict: return data +def _parse_crawl_documents(data_list: Optional[List[Any]]) -> List[Document]: + documents: List[Document] = [] + for doc_data in data_list or []: + if isinstance(doc_data, dict): + documents.append(Document(**normalize_document_input(doc_data))) + return documents + + +def _parse_crawl_status_response(response_data: Dict[str, Any]) -> Dict[str, Any]: + if not response_data.get("success"): + raise Exception(response_data.get("error", "Unknown error occurred")) + + return { + "status": response_data.get("status"), + "completed": response_data.get("completed", 0), + "total": response_data.get("total", 0), + "credits_used": response_data.get("creditsUsed", 0), + "expires_at": response_data.get("expiresAt"), + "next": response_data.get("next"), + "data": _parse_crawl_documents(response_data.get("data", [])), + } + + def start_crawl(client: HttpClient, request: CrawlRequest) -> CrawlResponse: """ Start a crawl job for a website. @@ -175,47 +198,74 @@ def get_crawl_status( # Parse response response_data = response.json() - if response_data.get("success"): - # The API returns status fields at the top level, not in a data field - - # Convert documents - documents = [] - data_list = response_data.get("data", []) - for doc_data in data_list: - if isinstance(doc_data, str): - # Handle case where API returns just URLs - this shouldn't happen for crawl - # but we'll handle it gracefully - continue - else: - documents.append(Document(**normalize_document_input(doc_data))) - - # Handle pagination if requested - auto_paginate = pagination_config.auto_paginate if pagination_config else True - if auto_paginate and response_data.get("next") and not ( - pagination_config - and pagination_config.max_results is not None - and len(documents) >= pagination_config.max_results - ): - documents = _fetch_all_pages( - client, - response_data.get("next"), - documents, - pagination_config, - request_timeout=request_timeout, - ) - - # Create CrawlJob with current status and data - return CrawlJob( - status=response_data.get("status"), - completed=response_data.get("completed", 0), - total=response_data.get("total", 0), - credits_used=response_data.get("creditsUsed", 0), - expires_at=response_data.get("expiresAt"), - next=response_data.get("next", None) if not auto_paginate else None, - data=documents + payload = _parse_crawl_status_response(response_data) + + documents = payload["data"] + + # Handle pagination if requested + auto_paginate = pagination_config.auto_paginate if pagination_config else True + if auto_paginate and payload["next"] and not ( + pagination_config + and pagination_config.max_results is not None + and len(documents) >= pagination_config.max_results + ): + documents = _fetch_all_pages( + client, + payload["next"], + documents, + pagination_config, + request_timeout=request_timeout, ) - else: - raise Exception(response_data.get("error", "Unknown error occurred")) + + # Create CrawlJob with current status and data + return CrawlJob( + status=payload["status"], + completed=payload["completed"], + total=payload["total"], + credits_used=payload["credits_used"], + expires_at=payload["expires_at"], + next=payload["next"] if not auto_paginate else None, + data=documents, + ) + + +def get_crawl_status_page( + client: HttpClient, + next_url: str, + *, + request_timeout: Optional[float] = None, +) -> CrawlJob: + """ + Fetch a single page of crawl results using the provided next URL. + + Args: + client: HTTP client instance + next_url: Opaque next URL from a prior crawl status response + request_timeout: Timeout (in seconds) for the HTTP request + + Returns: + CrawlJob with the page data and next URL (if any) + + Raises: + Exception: If the request fails or returns an error response + """ + response = client.get(next_url, timeout=request_timeout) + + if not response.ok: + handle_response_error(response, "get crawl status page") + + response_data = response.json() + payload = _parse_crawl_status_response(response_data) + + return CrawlJob( + status=payload["status"], + completed=payload["completed"], + total=payload["total"], + credits_used=payload["credits_used"], + expires_at=payload["expires_at"], + next=payload["next"], + data=payload["data"], + ) def _fetch_all_pages( @@ -270,26 +320,24 @@ def _fetch_all_pages( page_data = response.json() - if not page_data.get("success"): + try: + page_payload = _parse_crawl_status_response(page_data) + except Exception: break # Add documents from this page - data_list = page_data.get("data", []) - for doc_data in data_list: - if isinstance(doc_data, str): - continue - else: - # Check max_results limit BEFORE adding each document - if max_results is not None and len(documents) >= max_results: - break - documents.append(Document(**normalize_document_input(doc_data))) + for document in page_payload["data"]: + # Check max_results limit BEFORE adding each document + if max_results is not None and len(documents) >= max_results: + break + documents.append(document) # Check if we hit max_results limit if max_results is not None and len(documents) >= max_results: break # Get next URL - current_url = page_data.get("next") + current_url = page_payload["next"] page_count += 1 return documents diff --git a/apps/python-sdk/firecrawl/v2/types.py b/apps/python-sdk/firecrawl/v2/types.py index 6738709397..297332a042 100644 --- a/apps/python-sdk/firecrawl/v2/types.py +++ b/apps/python-sdk/firecrawl/v2/types.py @@ -572,7 +572,7 @@ class CrawlRequest(BaseModel): exclude_paths: Optional[List[str]] = None include_paths: Optional[List[str]] = None max_discovery_depth: Optional[int] = None - sitemap: Literal["skip", "include"] = "include" + sitemap: Literal["skip", "include", "only"] = "include" ignore_query_parameters: bool = False limit: Optional[int] = None crawl_entire_domain: bool = False @@ -664,7 +664,7 @@ class CrawlParamsData(BaseModel): include_paths: Optional[List[str]] = None exclude_paths: Optional[List[str]] = None max_discovery_depth: Optional[int] = None - ignore_sitemap: bool = False + sitemap: Optional[Literal["skip", "include", "only"]] = None ignore_query_parameters: bool = False limit: Optional[int] = None crawl_entire_domain: bool = False diff --git a/apps/test-suite/package.json b/apps/test-suite/package.json index 9ebc39cb90..1419dfc26b 100644 --- a/apps/test-suite/package.json +++ b/apps/test-suite/package.json @@ -33,6 +33,7 @@ }, "pnpm": { "overrides": { + "lodash@>=4.0.0 <=4.17.22": ">=4.17.23", "braces@<3.0.3": ">=3.0.3", "micromatch@<4.0.8": ">=4.0.8", "nanoid@<3.3.8": ">=3.3.8", diff --git a/apps/test-suite/pnpm-lock.yaml b/apps/test-suite/pnpm-lock.yaml index 8f578a6f48..f17a458b27 100644 --- a/apps/test-suite/pnpm-lock.yaml +++ b/apps/test-suite/pnpm-lock.yaml @@ -5,6 +5,7 @@ settings: excludeLinksFromLockfile: false overrides: + lodash@>=4.0.0 <=4.17.22: '>=4.17.23' braces@<3.0.3: '>=3.0.3' micromatch@<4.0.8: '>=4.0.8' nanoid@<3.3.8: '>=3.3.8' @@ -3432,8 +3433,8 @@ packages: lodash.once@4.1.1: resolution: {integrity: sha512-Sb487aTOCr9drQVL8pIxOzVhafOjZN9UU54hiN8PU3uAiSV7lx1yYNpbNmex2PK6dSJoNTSJUUswT651yww3Mg==} - lodash@4.17.21: - resolution: {integrity: sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==} + lodash@4.17.23: + resolution: {integrity: sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==} log-symbols@3.0.0: resolution: {integrity: sha512-dSkNGuI7iG3mfvDzUuYZyvk5dD9ocYCYzNU6CYDE6+Xqd+gwme6Z00NS3dUh8mq/73HaEtT7m6W+yUPtU6BZnQ==} @@ -4345,7 +4346,7 @@ snapshots: deep-for-each: 3.0.0 espree: 9.6.1 jsonpath-plus: 10.3.0 - lodash: 4.17.21 + lodash: 4.17.23 ms: 2.1.3 transitivePeerDependencies: - supports-color @@ -4373,7 +4374,7 @@ snapshots: got: 11.8.6 hpagent: 0.1.2 https-proxy-agent: 5.0.1 - lodash: 4.17.21 + lodash: 4.17.23 ms: 2.1.3 protobufjs: 7.3.2 socket.io-client: 4.7.5 @@ -8059,7 +8060,7 @@ snapshots: chalk: 4.1.2 debug: 4.4.1(supports-color@8.1.1) jmespath: 0.16.0 - lodash: 4.17.21 + lodash: 4.17.23 transitivePeerDependencies: - supports-color @@ -8159,7 +8160,7 @@ snapshots: joi: 17.13.3 js-yaml: 3.14.1 jsonwebtoken: 9.0.3 - lodash: 4.17.21 + lodash: 4.17.23 moment: 2.30.1 nanoid: 5.1.5 ora: 4.1.1 @@ -8181,7 +8182,7 @@ snapshots: async@2.6.4: dependencies: - lodash: 4.17.21 + lodash: 4.17.23 async@3.2.5: {} @@ -8649,7 +8650,7 @@ snapshots: dependencies: extend: 3.0.2 json-bigint: 1.0.0 - lodash: 4.17.21 + lodash: 4.17.23 minimist: 1.2.8 rc: 1.2.8 @@ -9723,7 +9724,7 @@ snapshots: lodash.once@4.1.1: {} - lodash@4.17.21: {} + lodash@4.17.23: {} log-symbols@3.0.0: dependencies: