Skip to content

Commit 991bc8f

Browse files
committed
fix(client): prevent race conditions when starting multiple sessions concurrently
1 parent 4246289 commit 991bc8f

File tree

4 files changed

+63
-52
lines changed

4 files changed

+63
-52
lines changed

nodejs/src/client.ts

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,8 @@ export class CopilotClient {
232232
* Parse CLI URL into host and port
233233
* Supports formats: "host:port", "http://host:port", "https://host:port", or just "port"
234234
*/
235+
private startPromise: Promise<void> | null = null;
236+
235237
private parseCliUrl(url: string): { host: string; port: number } {
236238
// Remove protocol if present
237239
let cleanUrl = url.replace(/^https?:\/\//, "");
@@ -282,25 +284,34 @@ export class CopilotClient {
282284
return;
283285
}
284286

285-
this.state = "connecting";
287+
if (this.startPromise) {
288+
return this.startPromise;
289+
}
286290

287-
try {
288-
// Only start CLI server process if not connecting to external server
289-
if (!this.isExternalServer) {
290-
await this.startCLIServer();
291-
}
291+
this.startPromise = (async () => {
292+
this.state = "connecting";
293+
294+
try {
295+
// Only start CLI server process if not connecting to external server
296+
if (!this.isExternalServer) {
297+
await this.startCLIServer();
298+
}
292299

293-
// Connect to the server
294-
await this.connectToServer();
300+
// Connect to the server
301+
await this.connectToServer();
295302

296-
// Verify protocol version compatibility
297-
await this.verifyProtocolVersion();
303+
// Verify protocol version compatibility
304+
await this.verifyProtocolVersion();
298305

299-
this.state = "connected";
300-
} catch (error) {
301-
this.state = "error";
302-
throw error;
303-
}
306+
this.state = "connected";
307+
} catch (error) {
308+
this.state = "error";
309+
this.startPromise = null;
310+
throw error;
311+
}
312+
})();
313+
314+
return this.startPromise;
304315
}
305316

306317
/**
@@ -403,6 +414,7 @@ export class CopilotClient {
403414
}
404415

405416
this.state = "disconnected";
417+
this.startPromise = null;
406418
this.actualPort = null;
407419
this.stderrBuffer = "";
408420
this.processExitPromise = null;
@@ -475,6 +487,7 @@ export class CopilotClient {
475487
}
476488

477489
this.state = "disconnected";
490+
this.startPromise = null;
478491
this.actualPort = null;
479492
this.stderrBuffer = "";
480493
this.processExitPromise = null;

nodejs/test/e2e/session.test.ts

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -130,11 +130,7 @@ describe("Sessions", async () => {
130130
expect(functionNames).not.toContain("view");
131131
});
132132

133-
// TODO: This test shows there's a race condition inside client.ts. If createSession is called
134-
// concurrently and autoStart is on, it may start multiple child processes. This needs to be fixed.
135-
// Right now it manifests as being unable to delete the temp directories during afterAll even though
136-
// we stopped all the clients (one or more child processes were left orphaned).
137-
it.skip("should handle multiple concurrent sessions", async () => {
133+
it("should handle multiple concurrent sessions", async () => {
138134
const [s1, s2, s3] = await Promise.all([
139135
client.createSession({ onPermissionRequest: approveAll }),
140136
client.createSession({ onPermissionRequest: approveAll }),

python/copilot/client.py

Lines changed: 33 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ def __init__(self, options: CopilotClientOptions | None = None):
201201
self._process: subprocess.Popen | None = None
202202
self._client: JsonRpcClient | None = None
203203
self._state: ConnectionState = "disconnected"
204+
self._start_lock = asyncio.Lock()
204205
self._sessions: dict[str, CopilotSession] = {}
205206
self._sessions_lock = threading.Lock()
206207
self._models_cache: list[ModelInfo] | None = None
@@ -281,39 +282,40 @@ async def start(self) -> None:
281282
>>> await client.start()
282283
>>> # Now ready to create sessions
283284
"""
284-
if self._state == "connected":
285-
return
285+
async with self._start_lock:
286+
if self._state == "connected":
287+
return
286288

287-
self._state = "connecting"
289+
self._state = "connecting"
288290

289-
try:
290-
# Only start CLI server process if not connecting to external server
291-
if not self._is_external_server:
292-
await self._start_cli_server()
293-
294-
# Connect to the server
295-
await self._connect_to_server()
296-
297-
# Verify protocol version compatibility
298-
await self._verify_protocol_version()
299-
300-
self._state = "connected"
301-
except ProcessExitedError as e:
302-
# Process exited with error - reraise as RuntimeError with stderr
303-
self._state = "error"
304-
raise RuntimeError(str(e)) from None
305-
except Exception as e:
306-
self._state = "error"
307-
# Check if process exited and capture any remaining stderr
308-
if self._process and hasattr(self._process, "poll"):
309-
return_code = self._process.poll()
310-
if return_code is not None and self._client:
311-
stderr_output = self._client.get_stderr_output()
312-
if stderr_output:
313-
raise RuntimeError(
314-
f"CLI process exited with code {return_code}\nstderr: {stderr_output}"
315-
) from e
316-
raise
291+
try:
292+
# Only start CLI server process if not connecting to external server
293+
if not self._is_external_server:
294+
await self._start_cli_server()
295+
296+
# Connect to the server
297+
await self._connect_to_server()
298+
299+
# Verify protocol version compatibility
300+
await self._verify_protocol_version()
301+
302+
self._state = "connected"
303+
except ProcessExitedError as e:
304+
# Process exited with error - reraise as RuntimeError with stderr
305+
self._state = "error"
306+
raise RuntimeError(str(e)) from None
307+
except Exception as e:
308+
self._state = "error"
309+
# Check if process exited and capture any remaining stderr
310+
if self._process and hasattr(self._process, "poll"):
311+
return_code = self._process.poll()
312+
if return_code is not None and self._client:
313+
stderr_output = self._client.get_stderr_output()
314+
if stderr_output:
315+
raise RuntimeError(
316+
f"CLI process exited with code {return_code}\nstderr: {stderr_output}"
317+
) from e
318+
raise
317319

318320
async def stop(self) -> None:
319321
"""

python/e2e/test_session.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ async def test_should_create_a_session_with_excludedTools(self, ctx: E2ETestCont
127127
# is called concurrently and autoStart is on, it may start multiple child processes.
128128
# This needs to be fixed. Right now it manifests as being unable to delete the temp
129129
# directories during afterAll even though we stopped all the clients.
130-
@pytest.mark.skip(reason="Known race condition - see TypeScript test")
130+
# @pytest.mark.skip
131131
async def test_should_handle_multiple_concurrent_sessions(self, ctx: E2ETestContext):
132132
import asyncio
133133

0 commit comments

Comments
 (0)