diff --git a/packages/apify/src/actor.ts b/packages/apify/src/actor.ts index 0a8f1e0601..45ff47086e 100644 --- a/packages/apify/src/actor.ts +++ b/packages/apify/src/actor.ts @@ -43,6 +43,7 @@ import { decryptInputSecrets } from '@apify/input_secrets'; import log from '@apify/log'; import { addTimeoutToPromise } from '@apify/timeout'; +import { ApifyStorageClient } from './apify_storage_client.js'; import type { ChargeOptions, ChargeResult } from './charging.js'; import { ChargingManager } from './charging.js'; import type { ConfigurationOptions } from './configuration.js'; @@ -495,7 +496,9 @@ export class Actor { if (this.isAtHome()) { // availableMemoryRatio and disableBrowserSandbox are now set via // conditional defaults in the Configuration constructor (isAtHome check) - serviceLocator.setStorageClient(this.apifyClient); + serviceLocator.setStorageClient( + new ApifyStorageClient(this.apifyClient), + ); serviceLocator.setEventManager(this.eventManager); } else if (options.storage) { serviceLocator.setStorageClient(options.storage); @@ -1310,7 +1313,7 @@ export class Actor { // eslint-disable-next-line dot-notation queue['initialCount'] = - (await queue.client.get())?.totalRequestCount ?? 0; + (await queue.client.getMetadata())?.totalRequestCount ?? 0; return queue; } @@ -2235,13 +2238,10 @@ export class Actor { id?: string, options: OpenStorageOptions = {}, ) { - const client = options.forceCloud ? this.apifyClient : undefined; - return StorageManager.openStorage( - storageClass, - id, - client, - this.config, - ); + const client = options.forceCloud + ? new ApifyStorageClient(this.apifyClient) + : undefined; + return StorageManager.openStorage(storageClass, id, client); } private _ensureActorInit(methodCalled: string) { diff --git a/packages/apify/src/apify_storage_client.ts b/packages/apify/src/apify_storage_client.ts new file mode 100644 index 0000000000..6301411010 --- /dev/null +++ b/packages/apify/src/apify_storage_client.ts @@ -0,0 +1,68 @@ +import type { + CreateDatasetClientOptions, + CreateKeyValueStoreClientOptions, + CreateRequestQueueClientOptions, + DatasetClient, + KeyValueStoreClient, + RequestQueueClient, + StorageClient, +} from '@crawlee/types'; +import type { ApifyClient } from 'apify-client'; + +/** + * Bridges `apify-client`'s synchronous resource accessors (`dataset(id)`, + * `keyValueStore(id)`, `requestQueue(id, options?)`) to crawlee v4's + * `StorageClient` interface (async factory methods accepting either an `id` + * or a `name`). + * + * When only a `name` is provided, we resolve it to a concrete ID via the + * collection client's `getOrCreate(name)` — matching the behaviour the SDK + * relied on in v3 when storages were opened by name. + */ +export class ApifyStorageClient implements StorageClient { + constructor(private readonly client: ApifyClient) {} + + async createDatasetClient( + options?: CreateDatasetClientOptions, + ): Promise { + const id = + options?.id ?? + (options?.name + ? (await this.client.datasets().getOrCreate(options.name)).id + : undefined); + // apify-client's resource clients overlap with `@crawlee/types`' shapes + // but don't yet implement the v4-added members (`getMetadata`, + // `getRecordPublicUrl`). Cast through for now; a follow-up should + // bring apify-client into structural alignment. + return this.client.dataset(id ?? '') as unknown as DatasetClient; + } + + async createKeyValueStoreClient( + options?: CreateKeyValueStoreClientOptions, + ): Promise { + const id = + options?.id ?? + (options?.name + ? (await this.client.keyValueStores().getOrCreate(options.name)) + .id + : undefined); + return this.client.keyValueStore( + id ?? '', + ) as unknown as KeyValueStoreClient; + } + + async createRequestQueueClient( + options?: CreateRequestQueueClientOptions, + ): Promise { + const id = + options?.id ?? + (options?.name + ? (await this.client.requestQueues().getOrCreate(options.name)) + .id + : undefined); + return this.client.requestQueue( + id ?? '', + options?.clientKey ? { clientKey: options.clientKey } : undefined, + ) as unknown as RequestQueueClient; + } +} diff --git a/packages/apify/src/key_value_store.ts b/packages/apify/src/key_value_store.ts index a26a12e8f1..356180bdcd 100644 --- a/packages/apify/src/key_value_store.ts +++ b/packages/apify/src/key_value_store.ts @@ -1,12 +1,18 @@ import type { StorageManagerOptions } from '@crawlee/core'; import { KeyValueStore as CoreKeyValueStore } from '@crawlee/core'; +import type { KeyValueStoreInfo } from '@crawlee/types'; import { createHmacSignature } from '@apify/utilities'; import type { Configuration } from './configuration.js'; -// @ts-ignore newer crawlee versions already declare this method in core -const { getPublicUrl } = CoreKeyValueStore.prototype; +// crawlee v4 dropped the `storageObject` cache from `KeyValueStore`, so the +// per-store `urlSigningSecretKey` (which is part of the platform's metadata +// response but not declared on `@crawlee/types`' `KeyValueStoreInfo`) has to +// be fetched on demand and accessed through a structural-typed augmentation. +type ApifyKeyValueStoreInfo = KeyValueStoreInfo & { + urlSigningSecretKey?: string; +}; /** * @inheritDoc @@ -15,24 +21,35 @@ export class KeyValueStore extends CoreKeyValueStore { /** * Returns a URL for the given key that may be used to publicly * access the value in the remote key-value store. + * + * On the Apify platform the URL is signed with the store's + * `urlSigningSecretKey` so that anyone with the URL can read the record + * without authentication. Locally we delegate to crawlee's default + * implementation (which produces a `file://` URL or returns `undefined`). */ - override getPublicUrl(key: string): string { + override async getPublicUrl(key: string): Promise { const config = this.config as Configuration; - if (!config.isAtHome && getPublicUrl) { - return getPublicUrl.call(this, key); + if (!config.isAtHome) { + return super.getPublicUrl(key); } const publicUrl = new URL( `${config.apiPublicBaseUrl}/v2/key-value-stores/${this.id}/records/${key}`, ); - if (this.storageObject?.urlSigningSecretKey) { + // `client` is `private` on `CoreKeyValueStore`; bypass the visibility + // check to fetch the per-store secret. There is no public crawlee API + // surface for this yet — track upstream exposure as a follow-up. + const metadata = (await ( + this as unknown as { + client: { getMetadata(): Promise }; + } + ).client.getMetadata()) as ApifyKeyValueStoreInfo; + + if (metadata?.urlSigningSecretKey) { publicUrl.searchParams.append( 'signature', - createHmacSignature( - this.storageObject.urlSigningSecretKey as string, - key, - ), + createHmacSignature(metadata.urlSigningSecretKey, key), ); } @@ -49,6 +66,3 @@ export class KeyValueStore extends CoreKeyValueStore { return super.open(storeIdOrName, options) as unknown as KeyValueStore; } } - -// @ts-ignore newer crawlee versions already declare this method in core -CoreKeyValueStore.prototype.getPublicUrl = KeyValueStore.prototype.getPublicUrl; diff --git a/test/MemoryStorageEmulator.ts b/test/MemoryStorageEmulator.ts index c5d4511236..b061ea2473 100644 --- a/test/MemoryStorageEmulator.ts +++ b/test/MemoryStorageEmulator.ts @@ -1,14 +1,39 @@ +import { AsyncLocalStorage } from 'node:async_hooks'; import { rm } from 'node:fs/promises'; import { resolve } from 'node:path'; -import { StorageManager } from '@crawlee/core'; +import { serviceLocator } from '@crawlee/core'; import { MemoryStorage } from '@crawlee/memory-storage'; -import { Configuration } from 'apify'; +import { Actor, Configuration } from 'apify'; import { ensureDir } from 'fs-extra'; import log from '@apify/log'; import { cryptoRandomObjectId } from '@apify/utilities'; +function resetGlobalState() { + serviceLocator.reset(); + // The SDK's `Configuration` keeps its own static singleton (separate + // from crawlee's serviceLocator), and `Actor` caches a default + // instance with the resolved config. Both must be cleared so each + // test starts with a fresh config that reads the env vars it just set. + ( + Configuration as unknown as { globalConfig?: Configuration } + ).globalConfig = undefined; + // eslint-disable-next-line no-underscore-dangle -- `_instance` is the upstream Actor singleton field + (Actor as unknown as { _instance?: Actor })._instance = undefined; + // `Actor.init()` calls `Configuration.storage.enterWith(this.config)`, + // which sticks the resolved config onto the *outer* async context + // (vitest's test runner). `enterWith(undefined)` from a child context + // (this beforeEach) doesn't propagate back up, so on Node 22 the next + // test still sees the stale store. Replace the entire AsyncLocalStorage + // instance to guarantee `getStore()` returns `undefined` everywhere. + ( + Configuration as unknown as { + storage: AsyncLocalStorage; + } + ).storage = new AsyncLocalStorage(); +} + const LOCAL_EMULATION_DIR = resolve( __dirname, '..', @@ -20,7 +45,10 @@ export class MemoryStorageEmulator { protected localStorageDirectories: string[] = []; async init(dirName = cryptoRandomObjectId(10)) { - StorageManager.clearCache(); + // crawlee v4 dropped `StorageManager.clearCache()` and + // `Configuration.useStorageClient()`; reset the service locator + // and re-register the in-memory client instead. + resetGlobalState(); const localStorageDir = resolve(LOCAL_EMULATION_DIR, dirName); this.localStorageDirectories.push(localStorageDir); await ensureDir(localStorageDir); @@ -28,7 +56,7 @@ export class MemoryStorageEmulator { const storage = new MemoryStorage({ localDataDirectory: localStorageDir, }); - Configuration.getGlobalConfig().useStorageClient(storage); + serviceLocator.setStorageClient(storage); log.debug( `Initialized emulated memory storage in folder ${localStorageDir}`, ); @@ -40,7 +68,7 @@ export class MemoryStorageEmulator { }); await Promise.all(promises); - StorageManager.clearCache(); + resetGlobalState(); } static toString() { diff --git a/test/apify/actor.test.ts b/test/apify/actor.test.ts index b3cfe7e84b..d93032cb1f 100644 --- a/test/apify/actor.test.ts +++ b/test/apify/actor.test.ts @@ -764,13 +764,26 @@ describe('Actor', () => { 'openStorage', ); + // crawlee v4's `RequestQueueClient` exposes metadata via + // `getMetadata()` (the v3 `get()` was dropped). const mockRQ = { - client: { get: () => ({ totalRequestCount: 10 }) }, + client: { + getMetadata: async () => ({ totalRequestCount: 10 }), + }, }; openStorageSpy.mockImplementationOnce(async () => mockRQ); const queue = await sdk.openRequestQueue(queueId, options); - expect(openStorageSpy).toBeCalledWith(queueId, sdk.apifyClient); + // The SDK now wraps `apifyClient` in an `ApifyStorageClient` + // adapter to satisfy crawlee v4's `StorageClient` interface. + expect(openStorageSpy).toBeCalledWith( + queueId, + expect.objectContaining({ + createDatasetClient: expect.any(Function), + createKeyValueStoreClient: expect.any(Function), + createRequestQueueClient: expect.any(Function), + }), + ); expect(openStorageSpy).toBeCalledTimes(1); // @ts-expect-error private prop @@ -789,7 +802,11 @@ describe('Actor', () => { expect(mockOpenStorage).toBeCalledTimes(1); expect(mockOpenStorage).toBeCalledWith( datasetName, - sdk.apifyClient, + expect.objectContaining({ + createDatasetClient: expect.any(Function), + createKeyValueStoreClient: expect.any(Function), + createRequestQueueClient: expect.any(Function), + }), ); }); });