Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
392dcea
feat: redesign Configuration class for v4
B4nan Mar 13, 2026
18a23d9
fix: update configuration integration for crawlee Configuration redesign
B4nan Mar 25, 2026
4f7f6f9
fix: preserve storageClientOptions pass-through in Actor.newClient
B4nan Apr 20, 2026
b6c34ea
chore: bump crawlee to ^4.0.0-beta.49 and finish config-redesign inte…
B4nan Apr 30, 2026
a99be15
fix(actor): adapt PlatformEventManager to crawlee v4 EventManager API
B4nan Apr 30, 2026
ccf96c5
fix: adapt SDK storage layer to crawlee v4 StorageClient interface
B4nan Apr 30, 2026
93d2cae
fix: adapt SDK ProxyConfiguration to crawlee v4 API
B4nan Apr 30, 2026
9d11039
test(actor): reset service locator and Actor singleton between newCli…
B4nan Apr 30, 2026
c1b4815
test(events): use serviceLocator.setEventManager and reset between cases
B4nan Apr 30, 2026
90f0f87
test: migrate MemoryStorageEmulator to crawlee v4 service locator
B4nan Apr 30, 2026
075907c
test(events): seed env vars before resolving Configuration
B4nan Apr 30, 2026
be6811f
test: also reset SDK Configuration.globalConfig between cases
B4nan Apr 30, 2026
c890b8d
chore: fix import sort in actor.ts after ApifyStorageClient addition
B4nan Apr 30, 2026
a4325b9
chore: fix import sort in proxy_configuration.ts
B4nan Apr 30, 2026
58ce536
chore: silence no-underscore-dangle for Actor._instance reset
B4nan Apr 30, 2026
5a34b34
chore: prettier
B4nan Apr 30, 2026
c01e89d
chore: prettier
B4nan Apr 30, 2026
56f0c3e
chore: prettier
B4nan Apr 30, 2026
31344fe
chore: prettier
B4nan Apr 30, 2026
fda4317
test: also reset SDK Configuration.globalConfig and Actor singleton o…
B4nan Apr 30, 2026
99e5683
test: align actor.test.ts mocks/expectations with v4 StorageClient ad…
B4nan Apr 30, 2026
4fd8e3e
test(reboot): use serviceLocator.getEventManager (v4 replacement)
B4nan Apr 30, 2026
b42b603
test(actor): adapt to crawlee v4 eager Configuration resolution
B4nan Apr 30, 2026
2d90549
test(getInput): reset cached singletons before each fresh Actor build
B4nan Apr 30, 2026
ecacdc6
fix(proxy): preserve v3 rotation/username/validation semantics
B4nan Apr 30, 2026
41e6689
test(getInput): also overwrite actor.config explicitly for env-var re…
B4nan Apr 30, 2026
ae1d911
fix(proxy): support legacy (sessionId, options) two-arg form; trim Pr…
B4nan Apr 30, 2026
327eb31
fix(proxy): decode username; reset cached singletons in createProxyCo…
B4nan Apr 30, 2026
09fdf11
test(actor): import SDK Configuration in test file (not crawlee's)
B4nan Apr 30, 2026
5b6598a
chore: add cheerio as devDep — workaround missing @crawlee/linkedom dep
B4nan Apr 30, 2026
32634c2
test: clear Configuration AsyncLocalStorage between tests (Node 22 fix)
B4nan Apr 30, 2026
2711b95
chore: bump crawlee to ^4.0.0-beta.51 and drop cheerio workaround
B4nan Apr 30, 2026
40d296c
fix(proxy): drop tieredProxyUrls/tieredProxyConfig support
B4nan Apr 30, 2026
6a8a6bc
test: replace Configuration.storage with fresh AsyncLocalStorage on r…
B4nan Apr 30, 2026
6e28da6
merge: event-manager-v4-adapt
B4nan Apr 30, 2026
7288a0f
Merge remote-tracking branch 'origin/fix/storage-client-v4-adapt' int…
B4nan Apr 30, 2026
f0ced4f
merge: storage-client-v4-adapt
B4nan Apr 30, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,770 changes: 970 additions & 800 deletions package-lock.json

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@
"@apify/input_secrets": "^1.1.72",
"@apify/tsconfig": "^0.1.1",
"@commitlint/config-conventional": "^19.8.1",
"@crawlee/core": "^4.0.0-beta.51",
"@crawlee/types": "^4.0.0-beta.51",
"@crawlee/utils": "^4.0.0-beta.51",
"@playwright/browser-chromium": "^1.52.0",
"@types/content-type": "^1.1.8",
"@types/fs-extra": "^11.0.4",
Expand All @@ -78,10 +81,7 @@
"@types/tough-cookie": "^4.0.5",
"@types/ws": "^8.18.1",
"commitlint": "^19.8.1",
"crawlee": "^4.0.0-beta.0",
"@crawlee/core": "^4.0.0-beta.0",
"@crawlee/types": "^4.0.0-beta.0",
"@crawlee/utils": "^4.0.0-beta.0",
"crawlee": "^4.0.0-beta.51",
"eslint": "^9.27.0",
"eslint-config-prettier": "^10.1.5",
"fs-extra": "^11.3.0",
Expand Down
9 changes: 5 additions & 4 deletions packages/apify/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,16 @@
"@apify/log": "^2.5.18",
"@apify/timeout": "^0.3.2",
"@apify/utilities": "^2.15.5",
"@crawlee/core": "^4.0.0-beta.0",
"@crawlee/types": "^4.0.0-beta.0",
"@crawlee/utils": "^4.0.0-beta.0",
"@crawlee/core": "^4.0.0-beta.51",
"@crawlee/types": "^4.0.0-beta.51",
"@crawlee/utils": "^4.0.0-beta.51",
"apify-client": "^2.12.4",
"fs-extra": "^11.3.0",
"got-scraping": "^4.1.1",
"ow": "^2.0.0",
"semver": "^7.7.2",
"tslib": "^2.8.1",
"ws": "^8.18.2"
"ws": "^8.18.2",
"zod": "^3.24.0 || ^4.0.0"
}
}
89 changes: 38 additions & 51 deletions packages/apify/src/actor.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
import { createPrivateKey } from 'node:crypto';

import type {
ConfigurationOptions,
EventManager,
EventTypeName,
IStorage,
RecordOptions,
UseStateOptions,
} from '@crawlee/core';
import {
Configuration as CoreConfiguration,
Dataset,
EventType,
purgeDefaultStorages,
RequestQueue,
serviceLocator,
StorageManager,
} from '@crawlee/core';
import type {
Expand Down Expand Up @@ -44,8 +43,10 @@ import { decryptInputSecrets } from '@apify/input_secrets';
import log from '@apify/log';
import { addTimeoutToPromise } from '@apify/timeout';

import { ApifyStorageClient } from './apify_storage_client.js';
import type { ChargeOptions, ChargeResult } from './charging.js';
import { ChargingManager } from './charging.js';
import type { ConfigurationOptions } from './configuration.js';
import { Configuration } from './configuration.js';
import { KeyValueStore } from './key_value_store.js';
import { PlatformEventManager } from './platform_event_manager.js';
Expand Down Expand Up @@ -490,19 +491,21 @@ export class Actor<Data extends Dictionary = Dictionary> {
printOutdatedSdkWarning();

// reset global config instance to respect APIFY_ prefixed env vars
CoreConfiguration.globalConfig = Configuration.getGlobalConfig();
serviceLocator.setConfiguration(Configuration.getGlobalConfig());

if (this.isAtHome()) {
this.config.set('availableMemoryRatio', 1);
this.config.set('disableBrowserSandbox', true); // for browser launcher, adds `--no-sandbox` to args
this.config.useStorageClient(this.apifyClient);
this.config.useEventManager(this.eventManager);
// availableMemoryRatio and disableBrowserSandbox are now set via
// conditional defaults in the Configuration constructor (isAtHome check)
serviceLocator.setStorageClient(
new ApifyStorageClient(this.apifyClient),
);
serviceLocator.setEventManager(this.eventManager);
} else if (options.storage) {
this.config.useStorageClient(options.storage);
serviceLocator.setStorageClient(options.storage);
}

// Init the event manager the config uses
await this.config.getEventManager().init();
await serviceLocator.getEventManager().init();
log.debug(`Events initialized`);

await purgeDefaultStorages({
Expand Down Expand Up @@ -534,8 +537,8 @@ export class Actor<Data extends Dictionary = Dictionary> {
options.exit ??= true;
options.exitCode ??= EXIT_CODES.SUCCESS;
options.timeoutSecs ??= 30;
const client = this.config.getStorageClient();
const events = this.config.getEventManager();
const client = serviceLocator.getStorageClient();
const events = serviceLocator.getEventManager();

// Close the event manager and emit the final PERSIST_STATE event
await events.close();
Expand Down Expand Up @@ -601,14 +604,14 @@ export class Actor<Data extends Dictionary = Dictionary> {
* @ignore
*/
on(event: EventTypeName, listener: (...args: any[]) => any): void {
this.config.getEventManager().on(event, listener);
serviceLocator.getEventManager().on(event, listener);
}

/**
* @ignore
*/
off(event: EventTypeName, listener?: (...args: any[]) => any): void {
this.config.getEventManager().off(event, listener);
serviceLocator.getEventManager().off(event, listener);
}

/**
Expand Down Expand Up @@ -776,12 +779,10 @@ export class Actor<Data extends Dictionary = Dictionary> {
}

const {
customAfterSleepMillis = this.config.get(
'metamorphAfterSleepMillis',
),
customAfterSleepMillis = this.config.metamorphAfterSleepMillis,
...metamorphOpts
} = options;
const runId = this.config.get('actorRunId')!;
const runId = this.config.actorRunId!;
await this.apifyClient
.run(runId)
.metamorph(targetActorId, input, metamorphOpts);
Expand Down Expand Up @@ -815,27 +816,24 @@ export class Actor<Data extends Dictionary = Dictionary> {
this.isRebooting = true;

// Waiting for all the listeners to finish, as `.reboot()` kills the container.
const eventManager = serviceLocator.getEventManager();
await Promise.all([
// `persistState` for individual RequestLists, RequestQueue... instances to be persisted
...this.config
.getEventManager()
...eventManager
.listeners(EventType.PERSIST_STATE)
.map(async (x) => x()),
.map(async (x: (...args: any[]) => any) => x()),
// `migrating` to pause Apify crawlers
...this.config
.getEventManager()
...eventManager
.listeners(EventType.MIGRATING)
.map(async (x) => x()),
.map(async (x: (...args: any[]) => any) => x()),
]);

const runId = this.config.get('actorRunId')!;
const runId = this.config.actorRunId!;
await this.apifyClient.run(runId).reboot();

// Wait some time for container to be stopped.
const {
customAfterSleepMillis = this.config.get(
'metamorphAfterSleepMillis',
),
customAfterSleepMillis = this.config.metamorphAfterSleepMillis,
} = options;
await sleep(customAfterSleepMillis);
}
Expand Down Expand Up @@ -873,7 +871,7 @@ export class Actor<Data extends Dictionary = Dictionary> {
return undefined;
}

const runId = this.config.get('actorRunId')!;
const runId = this.config.actorRunId!;
if (!runId) {
throw new Error(
`Environment variable ${ACTOR_ENV_VARS.RUN_ID} is not set!`,
Expand Down Expand Up @@ -924,7 +922,7 @@ export class Actor<Data extends Dictionary = Dictionary> {
break;
}

const client = this.config.getStorageClient();
const client = serviceLocator.getStorageClient();

// just to be sure, this should be fast
await addTimeoutToPromise(
Expand All @@ -937,7 +935,7 @@ export class Actor<Data extends Dictionary = Dictionary> {
'Setting status message timed out after 1s',
).catch((e) => log.warning(e.message));

const runId = this.config.get('actorRunId')!;
const runId = this.config.actorRunId!;

if (runId) {
// just to be sure, this should be fast
Expand Down Expand Up @@ -1213,13 +1211,9 @@ export class Actor<Data extends Dictionary = Dictionary> {
async getInput<T = Dictionary | string | Buffer>(): Promise<T | null> {
this._ensureActorInit('getInput');

const inputSecretsPrivateKeyFile = this.config.get(
'inputSecretsPrivateKeyFile',
);
const inputSecretsPrivateKeyPassphrase = this.config.get(
'inputSecretsPrivateKeyPassphrase',
);
const input = await this.getValue<T>(this.config.get('inputKey'));
const { inputSecretsPrivateKeyFile } = this.config;
const { inputSecretsPrivateKeyPassphrase } = this.config;
const input = await this.getValue<T>(this.config.inputKey);
if (
ow.isValid(input, ow.object.nonEmpty) &&
inputSecretsPrivateKeyFile &&
Expand Down Expand Up @@ -1319,7 +1313,7 @@ export class Actor<Data extends Dictionary = Dictionary> {

// eslint-disable-next-line dot-notation
queue['initialCount'] =
(await queue.client.get())?.totalRequestCount ?? 0;
(await queue.client.getMetadata())?.totalRequestCount ?? 0;

return queue;
}
Expand Down Expand Up @@ -1476,18 +1470,14 @@ export class Actor<Data extends Dictionary = Dictionary> {
* @ignore
*/
newClient(options: ApifyClientOptions = {}): ApifyClient {
const { storageDir, ...storageClientOptions } = this.config.get(
'storageClientOptions',
) as Dictionary;
const { apifyVersion, crawleeVersion } = getSystemInfo();
return new ApifyClient({
baseUrl: this.config.get('apiBaseUrl'),
token: this.config.get('token'),
baseUrl: this.config.apiBaseUrl,
token: this.config.token,
userAgentSuffix: [
`SDK/${apifyVersion}`,
`Crawlee/${crawleeVersion}`,
],
...storageClientOptions,
...options, // allow overriding the instance configuration
});
}
Expand Down Expand Up @@ -2245,13 +2235,10 @@ export class Actor<Data extends Dictionary = Dictionary> {
id?: string,
options: OpenStorageOptions = {},
) {
const client = options.forceCloud ? this.apifyClient : undefined;
return StorageManager.openStorage<T>(
storageClass,
id,
client,
this.config,
);
const client = options.forceCloud
? new ApifyStorageClient(this.apifyClient)
: undefined;
return StorageManager.openStorage<T>(storageClass, id, client);
}

private _ensureActorInit(methodCalled: string) {
Expand Down
68 changes: 68 additions & 0 deletions packages/apify/src/apify_storage_client.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import type {
CreateDatasetClientOptions,
CreateKeyValueStoreClientOptions,
CreateRequestQueueClientOptions,
DatasetClient,
KeyValueStoreClient,
RequestQueueClient,
StorageClient,
} from '@crawlee/types';
import type { ApifyClient } from 'apify-client';

/**
* Bridges `apify-client`'s synchronous resource accessors (`dataset(id)`,
* `keyValueStore(id)`, `requestQueue(id, options?)`) to crawlee v4's
* `StorageClient` interface (async factory methods accepting either an `id`
* or a `name`).
*
* When only a `name` is provided, we resolve it to a concrete ID via the
* collection client's `getOrCreate(name)` — matching the behaviour the SDK
* relied on in v3 when storages were opened by name.
*/
export class ApifyStorageClient implements StorageClient {
constructor(private readonly client: ApifyClient) {}

async createDatasetClient(
options?: CreateDatasetClientOptions,
): Promise<DatasetClient> {
const id =
options?.id ??
(options?.name
? (await this.client.datasets().getOrCreate(options.name)).id
: undefined);
// apify-client's resource clients overlap with `@crawlee/types`' shapes
// but don't yet implement the v4-added members (`getMetadata`,
// `getRecordPublicUrl`). Cast through for now; a follow-up should
// bring apify-client into structural alignment.
return this.client.dataset(id ?? '') as unknown as DatasetClient;
}

async createKeyValueStoreClient(
options?: CreateKeyValueStoreClientOptions,
): Promise<KeyValueStoreClient> {
const id =
options?.id ??
(options?.name
? (await this.client.keyValueStores().getOrCreate(options.name))
.id
: undefined);
return this.client.keyValueStore(
id ?? '',
) as unknown as KeyValueStoreClient;
}

async createRequestQueueClient(
options?: CreateRequestQueueClientOptions,
): Promise<RequestQueueClient> {
const id =
options?.id ??
(options?.name
? (await this.client.requestQueues().getOrCreate(options.name))
.id
: undefined);
return this.client.requestQueue(
id ?? '',
options?.clientKey ? { clientKey: options.clientKey } : undefined,
) as unknown as RequestQueueClient;
}
}
13 changes: 6 additions & 7 deletions packages/apify/src/charging.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,20 +87,19 @@ export class ChargingManager {
private apifyClient: ApifyClient;

constructor(configuration: Configuration, apifyClient: ApifyClient) {
this.maxTotalChargeUsd =
configuration.get('maxTotalChargeUsd') || Infinity; // convert `0` to `Infinity` in case the value is an empty string
this.isAtHome = configuration.get('isAtHome');
this.actorRunId = configuration.get('actorRunId');
this.purgeChargingLogDataset = configuration.get('purgeOnStart');
this.useChargingLogDataset = configuration.get('useChargingLogDataset');
this.maxTotalChargeUsd = configuration.maxTotalChargeUsd || Infinity; // convert `0` to `Infinity` in case the value is an empty string
this.isAtHome = !!configuration.isAtHome;
this.actorRunId = configuration.actorRunId;
this.purgeChargingLogDataset = configuration.purgeOnStart;
this.useChargingLogDataset = configuration.useChargingLogDataset;

if (this.useChargingLogDataset && this.isAtHome) {
throw new Error(
'Using the ACTOR_USE_CHARGING_LOG_DATASET environment variable is only supported in a local development environment',
);
}

if (configuration.get('testPayPerEvent')) {
if (configuration.testPayPerEvent) {
if (this.isAtHome) {
throw new Error(
'Using the ACTOR_TEST_PAY_PER_EVENT environment variable is only supported in a local development environment',
Expand Down
Loading
Loading