Skip to content

Commit 64c606f

Browse files
stephentoubCopilot
andauthored
De-flake builtin_tools E2E tests with a longer send timeout (#1538)
The builtin_tools E2E test `should_capture_exit_code_in_output` flaked once on the Windows Rust CI job: `send_and_wait` hit its 60s default and the test panicked. The re-run passed. The failure clustered in a ~90s window where multiple unrelated tests were simultaneously slow, which points to transient Windows-runner contention (e2e runs 4-wide on a 4-vCPU runner, each test spawning a CLI + replay proxy + a real shell) rather than a deadlock or a genuinely slow command. Raise the per-send wait timeout for the builtin_tools E2E suites from 60s to 120s across all five SDKs (Rust, Python, Go, Node, .NET) so transient slowness no longer turns into a hard failure, while still failing fast on a real hang. Assertions, snapshots, Windows skips, and CI concurrency are unchanged. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 84a0106 commit 64c606f

5 files changed

Lines changed: 224 additions & 99 deletions

File tree

dotnet/test/E2E/BuiltinToolsE2ETests.cs

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,20 @@ namespace GitHub.Copilot.Test.E2E;
1616
public class BuiltinToolsE2ETests(E2ETestFixture fixture, ITestOutputHelper output)
1717
: E2ETestBase(fixture, "builtin_tools", output)
1818
{
19+
// Built-in tool tests spawn a real CLI subprocess and execute actual shell /
20+
// file tools. Under slow/concurrent CI (notably Windows) this agent loop can
21+
// briefly exceed the 60s SendAndWaitAsync default, so give it extra headroom
22+
// while still failing fast on a genuine hang.
23+
private static readonly TimeSpan SendTimeout = TimeSpan.FromSeconds(120);
24+
1925
[Fact]
2026
public async Task Should_Capture_Exit_Code_In_Output()
2127
{
2228
var session = await CreateSessionAsync();
2329
var msg = await session.SendAndWaitAsync(new MessageOptions
2430
{
2531
Prompt = "Run 'echo hello && echo world'. Tell me the exact output.",
26-
});
32+
}, SendTimeout);
2733
var content = msg?.Data.Content ?? string.Empty;
2834
Assert.Contains("hello", content);
2935
Assert.Contains("world", content);
@@ -44,7 +50,7 @@ public async Task Should_Capture_Stderr_Output()
4450
var msg = await session.SendAndWaitAsync(new MessageOptions
4551
{
4652
Prompt = "Run 'echo error_msg >&2; echo ok' and tell me what stderr said. Reply with just the stderr content.",
47-
});
53+
}, SendTimeout);
4854
Assert.Contains("error_msg", msg?.Data.Content ?? string.Empty);
4955
}
5056

@@ -56,7 +62,7 @@ public async Task Should_Read_File_With_Line_Range()
5662
var msg = await session.SendAndWaitAsync(new MessageOptions
5763
{
5864
Prompt = "Read lines 2 through 4 of the file 'lines.txt' in this directory. Tell me what those lines contain.",
59-
});
65+
}, SendTimeout);
6066
var content = msg?.Data.Content ?? string.Empty;
6167
Assert.Contains("line2", content);
6268
Assert.Contains("line4", content);
@@ -69,7 +75,7 @@ public async Task Should_Handle_Nonexistent_File_Gracefully()
6975
var msg = await session.SendAndWaitAsync(new MessageOptions
7076
{
7177
Prompt = "Try to read the file 'does_not_exist.txt'. If it doesn't exist, say 'FILE_NOT_FOUND'.",
72-
});
78+
}, SendTimeout);
7379
var content = (msg?.Data.Content ?? string.Empty).ToUpperInvariant();
7480
// Match any of the common phrasings for a missing-file response.
7581
Assert.True(
@@ -90,7 +96,7 @@ public async Task Should_Edit_A_File_Successfully()
9096
var msg = await session.SendAndWaitAsync(new MessageOptions
9197
{
9298
Prompt = "Edit the file 'edit_me.txt': replace 'Hello World' with 'Hi Universe'. Then read it back and tell me its contents.",
93-
});
99+
}, SendTimeout);
94100
Assert.Contains("Hi Universe", msg?.Data.Content ?? string.Empty);
95101
}
96102

@@ -101,7 +107,7 @@ public async Task Should_Create_A_New_File()
101107
var msg = await session.SendAndWaitAsync(new MessageOptions
102108
{
103109
Prompt = "Create a file called 'new_file.txt' with the content 'Created by test'. Then read it back to confirm.",
104-
});
110+
}, SendTimeout);
105111
Assert.Contains("Created by test", msg?.Data.Content ?? string.Empty);
106112
}
107113

@@ -113,7 +119,7 @@ public async Task Should_Search_For_Patterns_In_Files()
113119
var msg = await session.SendAndWaitAsync(new MessageOptions
114120
{
115121
Prompt = "Search for lines starting with 'ap' in the file 'data.txt'. Tell me which lines matched.",
116-
});
122+
}, SendTimeout);
117123
var content = msg?.Data.Content ?? string.Empty;
118124
Assert.Contains("apple", content);
119125
Assert.Contains("apricot", content);
@@ -130,7 +136,7 @@ public async Task Should_Find_Files_By_Pattern()
130136
var msg = await session.SendAndWaitAsync(new MessageOptions
131137
{
132138
Prompt = "Find all .ts files in this directory (recursively). List the filenames you found.",
133-
});
139+
}, SendTimeout);
134140
Assert.Contains("index.ts", msg?.Data.Content ?? string.Empty);
135141
}
136142
}

go/internal/e2e/builtin_tools_e2e_test.go

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,24 @@
11
package e2e
22

33
import (
4+
"context"
45
"os"
56
"path/filepath"
67
"runtime"
78
"strings"
89
"testing"
10+
"time"
911

1012
copilot "github.com/github/copilot-sdk/go"
1113
"github.com/github/copilot-sdk/go/internal/e2e/testharness"
1214
)
1315

16+
// Built-in tool tests spawn a real CLI subprocess and execute actual shell /
17+
// file tools. Under slow/concurrent CI (notably Windows) this agent loop can
18+
// briefly exceed the 60s SendAndWait default, so give it extra headroom while
19+
// still failing fast on a genuine hang.
20+
const sendTimeout = 120 * time.Second
21+
1422
func TestBuiltinToolsE2E(t *testing.T) {
1523
ctx := testharness.NewTestContext(t)
1624
client := ctx.NewClient()
@@ -27,7 +35,9 @@ func TestBuiltinToolsE2E(t *testing.T) {
2735
}
2836
t.Cleanup(func() { _ = session.Disconnect() })
2937

30-
msg, err := session.SendAndWait(t.Context(), copilot.MessageOptions{
38+
sendCtx, cancel := context.WithTimeout(t.Context(), sendTimeout)
39+
defer cancel()
40+
msg, err := session.SendAndWait(sendCtx, copilot.MessageOptions{
3141
Prompt: "Run 'echo hello && echo world'. Tell me the exact output.",
3242
})
3343
if err != nil {
@@ -55,7 +65,9 @@ func TestBuiltinToolsE2E(t *testing.T) {
5565
}
5666
t.Cleanup(func() { _ = session.Disconnect() })
5767

58-
msg, err := session.SendAndWait(t.Context(), copilot.MessageOptions{
68+
sendCtx, cancel := context.WithTimeout(t.Context(), sendTimeout)
69+
defer cancel()
70+
msg, err := session.SendAndWait(sendCtx, copilot.MessageOptions{
5971
Prompt: "Run 'echo error_msg >&2; echo ok' and tell me what stderr said. Reply with just the stderr content.",
6072
})
6173
if err != nil {
@@ -82,7 +94,9 @@ func TestBuiltinToolsE2E(t *testing.T) {
8294
}
8395
t.Cleanup(func() { _ = session.Disconnect() })
8496

85-
msg, err := session.SendAndWait(t.Context(), copilot.MessageOptions{
97+
sendCtx, cancel := context.WithTimeout(t.Context(), sendTimeout)
98+
defer cancel()
99+
msg, err := session.SendAndWait(sendCtx, copilot.MessageOptions{
86100
Prompt: "Read lines 2 through 4 of the file 'lines.txt' in this directory. Tell me what those lines contain.",
87101
})
88102
if err != nil {
@@ -106,7 +120,9 @@ func TestBuiltinToolsE2E(t *testing.T) {
106120
}
107121
t.Cleanup(func() { _ = session.Disconnect() })
108122

109-
msg, err := session.SendAndWait(t.Context(), copilot.MessageOptions{
123+
sendCtx, cancel := context.WithTimeout(t.Context(), sendTimeout)
124+
defer cancel()
125+
msg, err := session.SendAndWait(sendCtx, copilot.MessageOptions{
110126
Prompt: "Try to read the file 'does_not_exist.txt'. If it doesn't exist, say 'FILE_NOT_FOUND'.",
111127
})
112128
if err != nil {
@@ -139,7 +155,9 @@ func TestBuiltinToolsE2E(t *testing.T) {
139155
}
140156
t.Cleanup(func() { _ = session.Disconnect() })
141157

142-
msg, err := session.SendAndWait(t.Context(), copilot.MessageOptions{
158+
sendCtx, cancel := context.WithTimeout(t.Context(), sendTimeout)
159+
defer cancel()
160+
msg, err := session.SendAndWait(sendCtx, copilot.MessageOptions{
143161
Prompt: "Edit the file 'edit_me.txt': replace 'Hello World' with 'Hi Universe'. Then read it back and tell me its contents.",
144162
})
145163
if err != nil {
@@ -162,7 +180,9 @@ func TestBuiltinToolsE2E(t *testing.T) {
162180
}
163181
t.Cleanup(func() { _ = session.Disconnect() })
164182

165-
msg, err := session.SendAndWait(t.Context(), copilot.MessageOptions{
183+
sendCtx, cancel := context.WithTimeout(t.Context(), sendTimeout)
184+
defer cancel()
185+
msg, err := session.SendAndWait(sendCtx, copilot.MessageOptions{
166186
Prompt: "Create a file called 'new_file.txt' with the content 'Created by test'. Then read it back to confirm.",
167187
})
168188
if err != nil {
@@ -189,7 +209,9 @@ func TestBuiltinToolsE2E(t *testing.T) {
189209
}
190210
t.Cleanup(func() { _ = session.Disconnect() })
191211

192-
msg, err := session.SendAndWait(t.Context(), copilot.MessageOptions{
212+
sendCtx, cancel := context.WithTimeout(t.Context(), sendTimeout)
213+
defer cancel()
214+
msg, err := session.SendAndWait(sendCtx, copilot.MessageOptions{
193215
Prompt: "Search for lines starting with 'ap' in the file 'data.txt'. Tell me which lines matched.",
194216
})
195217
if err != nil {
@@ -223,7 +245,9 @@ func TestBuiltinToolsE2E(t *testing.T) {
223245
}
224246
t.Cleanup(func() { _ = session.Disconnect() })
225247

226-
msg, err := session.SendAndWait(t.Context(), copilot.MessageOptions{
248+
sendCtx, cancel := context.WithTimeout(t.Context(), sendTimeout)
249+
defer cancel()
250+
msg, err := session.SendAndWait(sendCtx, copilot.MessageOptions{
227251
Prompt: "Find all .ts files in this directory (recursively). List the filenames you found.",
228252
})
229253
if err != nil {

0 commit comments

Comments
 (0)