Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions electron/gateway/startup-recovery.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ const TRANSIENT_START_ERROR_PATTERNS: RegExp[] = [
/Port \d+ still occupied after \d+ms/i,
];

/**
* Patterns that indicate the gateway is already managed by systemd and cannot
* be started as a child process. Retrying is pointless in this scenario.
*/
const SYSTEMD_CONFLICT_PATTERNS: RegExp[] = [
/already running under systemd/i,
];

function normalizeLogLine(value: string): string {
return value.trim();
}
Expand Down Expand Up @@ -75,6 +83,24 @@ export function isTransientGatewayStartError(error: unknown): boolean {
return TRANSIENT_START_ERROR_PATTERNS.some((pattern) => pattern.test(errorText));
}

/**
* Returns true when the gateway stderr indicates it is already supervised by
* systemd. In that case ClawX cannot own the process and retrying startup
* will only produce the same result.
*/
export function isSystemdConflictSignal(text: string): boolean {
const normalized = text.trim();
if (!normalized) return false;
return SYSTEMD_CONFLICT_PATTERNS.some((pattern) => pattern.test(normalized));
}

/**
* Returns true when any startup stderr line signals a systemd conflict.
*/
export function hasSystemdConflictSignal(startupStderrLines: string[]): boolean {
return startupStderrLines.some(isSystemdConflictSignal);
}

export type GatewayStartupRecoveryAction = 'repair' | 'retry' | 'fail';

export function getGatewayStartupRecoveryAction(options: {
Expand All @@ -84,6 +110,13 @@ export function getGatewayStartupRecoveryAction(options: {
attempt: number;
maxAttempts: number;
}): GatewayStartupRecoveryAction {
// If the gateway reports it's already managed by systemd, retrying will not
// help. Fail immediately so the user gets a clear error state instead of
// a long retry loop.
if (hasSystemdConflictSignal(options.startupStderrLines)) {
return 'fail';
}

if (shouldAttemptConfigAutoRepair(
options.startupError,
options.startupStderrLines,
Expand Down
8 changes: 8 additions & 0 deletions electron/gateway/startup-stderr.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ export function classifyGatewayStderrMessage(message: string): GatewayStderrClas
return { level: 'debug', normalized: msg };
}

// The gateway binary reports this when it detects a systemd supervisor.
// ClawX will fail fast rather than retry, so downgrade to debug to avoid
// flooding the log with repeated identical lines during the (brief) window
// before the process exits.
if (msg.includes('already running under systemd')) {
return { level: 'debug', normalized: msg };
Comment on lines +45 to +46

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Keep systemd conflict line visible in production logs

Downgrading already running under systemd to debug hides the only specific root-cause signal in packaged builds, because the logger defaults to INFO there and debug messages are suppressed. In the failure path, startup still surfaces a generic error (Gateway process exited before becoming ready), so this change makes real systemd conflicts much harder to diagnose from user logs/support bundles even though the app now fails fast.

Useful? React with 👍 / 👎.

}

return { level: 'warn', normalized: msg };
}

Expand Down
9 changes: 7 additions & 2 deletions electron/utils/channel-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -795,8 +795,13 @@ export async function getChannelConfig(channelType: string, accountId?: string):
return accounts[resolvedAccountId];
}

// Backward compat: fall back to flat top-level config (legacy format without accounts)
if (!accounts || Object.keys(accounts).length === 0) {
// Backward compat: fall back to flat top-level config (legacy format without accounts).
// Only apply when no specific account ID is requested, or when the default account is
// requested, to avoid treating the legacy flat config as data for a newly-generated
// account ID. Without this guard, isSameConfigValues() falsely reports "no change"
// when a user adds a second account with the same credentials as the existing one
// stored in legacy flat format, silently skipping the save (fixes #831).
if ((!accounts || Object.keys(accounts).length === 0) && (!accountId || resolvedAccountId === DEFAULT_ACCOUNT_ID)) {
return channelSection;
}

Expand Down
58 changes: 58 additions & 0 deletions tests/unit/gateway-startup-recovery.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
hasInvalidConfigFailureSignal,
isInvalidConfigSignal,
shouldAttemptConfigAutoRepair,
isSystemdConflictSignal,
hasSystemdConflictSignal,
getGatewayStartupRecoveryAction,
} from '@electron/gateway/startup-recovery';

Check failure on line 9 in tests/unit/gateway-startup-recovery.test.ts

View workflow job for this annotation

GitHub Actions / check

'getGatewayStartupRecoveryAction' is already defined

describe('gateway startup recovery heuristics', () => {
it('detects invalid-config signal from stderr lines', () => {
Expand Down Expand Up @@ -48,5 +51,60 @@
expect(isInvalidConfigSignal('Run: openclaw doctor --fix')).toBe(true);
expect(isInvalidConfigSignal('Gateway ready after 3 attempts')).toBe(false);
});

describe('systemd conflict detection', () => {
it('detects already-running-under-systemd signal', () => {
expect(isSystemdConflictSignal('2026-03-27T13:08:36.125+11:00 [gateway] already running under systemd; waiting 5000ms before retrying startup')).toBe(true);
expect(isSystemdConflictSignal('already running under systemd')).toBe(true);
expect(isSystemdConflictSignal('ALREADY RUNNING UNDER SYSTEMD')).toBe(true);
});

it('does not false-positive on unrelated messages', () => {
expect(isSystemdConflictSignal('Gateway process exited (code=1)')).toBe(false);
expect(isSystemdConflictSignal('WebSocket closed before handshake')).toBe(false);
expect(isSystemdConflictSignal('')).toBe(false);
});

it('hasSystemdConflictSignal returns true when any line matches', () => {
const lines = [
'Starting gateway...',
'[gateway] already running under systemd; waiting 5000ms before retrying startup',
'Retrying...',
];
expect(hasSystemdConflictSignal(lines)).toBe(true);
});

it('hasSystemdConflictSignal returns false when no lines match', () => {
const lines = ['Gateway ready', 'Listening on port 18789'];
expect(hasSystemdConflictSignal(lines)).toBe(false);
});

it('getGatewayStartupRecoveryAction returns fail immediately on systemd conflict', () => {
const stderrLines = [
'[gateway] already running under systemd; waiting 5000ms before retrying startup',
];
// Should fail even on the first attempt and even for an error that would
// normally be classified as transient.
const action = getGatewayStartupRecoveryAction({
startupError: new Error('Gateway process exited before becoming ready (code=1)'),
startupStderrLines: stderrLines,
configRepairAttempted: false,
attempt: 1,
maxAttempts: 3,
});
expect(action).toBe('fail');
});

it('getGatewayStartupRecoveryAction still retries transient errors without systemd signal', () => {
const action = getGatewayStartupRecoveryAction({
startupError: new Error('Gateway process exited before becoming ready (code=1)'),
startupStderrLines: [],
configRepairAttempted: false,
attempt: 1,
maxAttempts: 3,
});
expect(action).toBe('retry');
});
});
});

Loading