Skip to content

Commit a2e2dde

Browse files
committed
Fix race condition in timeout handler causing activator crashes
Add synchronization for HTTP header map access during request timeouts. When a timeout occurs, capture a snapshot of headers to prevent concurrent modification by the inner handler after the timeout handler returns. Fixes #15850
1 parent 41fafd1 commit a2e2dde

File tree

2 files changed

+122
-1
lines changed

2 files changed

+122
-1
lines changed

pkg/http/handler/timeout.go

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"io"
2323
"net"
2424
"net/http"
25+
"slices"
2526
"sync"
2627
"time"
2728

@@ -169,6 +170,9 @@ type timeoutWriter struct {
169170
mu sync.Mutex
170171
timedOut bool
171172
lastWriteTime time.Time
173+
// headers is a snapshot of headers taken when timeout occurs
174+
// to prevent concurrent map access
175+
headers http.Header
172176
}
173177

174178
var (
@@ -201,7 +205,23 @@ func (tw *timeoutWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
201205
return websocket.HijackIfPossible(tw.w)
202206
}
203207

204-
func (tw *timeoutWriter) Header() http.Header { return tw.w.Header() }
208+
func (tw *timeoutWriter) Header() http.Header {
209+
tw.mu.Lock()
210+
timedOut := tw.timedOut
211+
headers := tw.headers
212+
tw.mu.Unlock()
213+
214+
if timedOut {
215+
// Return the snapshot of headers taken at timeout to prevent
216+
// concurrent modification of the header map
217+
if headers == nil {
218+
// If no headers were captured, return an empty map
219+
return make(http.Header)
220+
}
221+
return headers
222+
}
223+
return tw.w.Header()
224+
}
205225

206226
func (tw *timeoutWriter) Write(p []byte) (int, error) {
207227
tw.mu.Lock()
@@ -279,6 +299,13 @@ func (tw *timeoutWriter) tryIdleTimeoutAndWriteError(curTime time.Time, idleTime
279299
}
280300

281301
func (tw *timeoutWriter) timeoutAndWriteError(msg string) {
302+
// Capture a snapshot of headers before marking as timed out
303+
// to prevent concurrent access to the underlying header map
304+
tw.headers = make(http.Header)
305+
for k, v := range tw.w.Header() {
306+
tw.headers[k] = slices.Clone(v)
307+
}
308+
282309
tw.w.WriteHeader(http.StatusGatewayTimeout)
283310
io.WriteString(tw.w, msg)
284311

pkg/http/handler/timeout_test.go

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"net/http"
2323
"net/http/httptest"
2424
"sync"
25+
"sync/atomic"
2526
"testing"
2627
"time"
2728

@@ -626,6 +627,99 @@ func BenchmarkTimeoutHandler(b *testing.B) {
626627
})
627628
}
628629

630+
func TestTimeoutHandlerConcurrentHeaderAccess(t *testing.T) {
631+
// This test verifies the fix for the race condition when requests time out.
632+
// It simulates the scenario where the timeout handler completes while the
633+
// inner handler is still trying to modify headers. The key is that this
634+
// should not panic with a concurrent map access error.
635+
636+
var completedCount atomic.Int32
637+
var panicCount int32
638+
innerHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
639+
// Simulate work that takes around the same time as timeout
640+
time.Sleep(55 * time.Millisecond)
641+
642+
// After potential context cancellation, try to access headers
643+
// This simulates what the error handler does
644+
if r.Context().Err() != nil {
645+
// Try to modify headers - this should not cause a panic
646+
// even if timeout has occurred
647+
w.Header().Set("X-Test-Header", "value")
648+
http.Error(w, "context canceled", http.StatusBadGateway)
649+
} else {
650+
// If no timeout, write normally
651+
w.WriteHeader(http.StatusOK)
652+
}
653+
completedCount.Add(1)
654+
})
655+
656+
timeoutHandler := NewTimeoutHandler(
657+
innerHandler,
658+
"timeout",
659+
func(r *http.Request) (time.Duration, time.Duration, time.Duration) {
660+
return 50 * time.Millisecond, 0, 0
661+
},
662+
zaptest.NewLogger(t).Sugar(),
663+
)
664+
665+
// Run multiple concurrent requests to increase chances of hitting the race
666+
var wg sync.WaitGroup
667+
var timeoutResponses atomic.Int32
668+
var normalResponses atomic.Int32
669+
for range 10 {
670+
wg.Add(1)
671+
go func() {
672+
defer wg.Done()
673+
defer func() {
674+
if r := recover(); r != nil {
675+
// Should not panic with concurrent map access
676+
atomic.AddInt32(&panicCount, 1)
677+
t.Errorf("Unexpected panic: %v", r)
678+
}
679+
}()
680+
681+
req, err := http.NewRequest(http.MethodGet, "/", nil)
682+
if err != nil {
683+
t.Error(err)
684+
return
685+
}
686+
687+
rec := httptest.NewRecorder()
688+
689+
// This should not panic with concurrent map access
690+
timeoutHandler.ServeHTTP(rec, req)
691+
692+
// We may get either a timeout or a normal response depending on timing
693+
// The key is that we don't panic
694+
switch rec.Code {
695+
case http.StatusGatewayTimeout:
696+
timeoutResponses.Add(1)
697+
case http.StatusOK:
698+
normalResponses.Add(1)
699+
default:
700+
t.Errorf("Unexpected status code: %d", rec.Code)
701+
}
702+
}()
703+
}
704+
705+
wg.Wait()
706+
707+
// Give a bit more time for any lingering goroutines to complete
708+
time.Sleep(100 * time.Millisecond)
709+
710+
// Check that no panics occurred
711+
if panicCount > 0 {
712+
t.Errorf("Got %d panics, expected 0", panicCount)
713+
}
714+
715+
// At least some requests should have timed out
716+
if timeoutResponses.Load() == 0 {
717+
t.Error("Expected at least some timeout responses")
718+
}
719+
720+
t.Logf("Got %d timeout responses and %d normal responses", timeoutResponses.Load(), normalResponses.Load())
721+
}
722+
629723
func StaticTimeoutFunc(timeout time.Duration, requestStart time.Duration, idle time.Duration) TimeoutFunc {
630724
return func(req *http.Request) (time.Duration, time.Duration, time.Duration) {
631725
return timeout, requestStart, idle

0 commit comments

Comments
 (0)