-
Notifications
You must be signed in to change notification settings - Fork 7
Open
Description
Summary
Add benchmark tests to measure and track performance of critical SDK operations. Currently the SDK has 0 benchmark tests despite having 20,834 lines of functional test code.
Current State
- Test files: 20 files, 20,834 lines total
- Benchmark tests: 0
- Fuzz tests: 0 (separate issue)
The SDK has comprehensive functional testing but lacks performance baselines.
Proposed Benchmarks
1. JSON Parser Benchmarks (internal/parser/json_bench_test.go)
func BenchmarkParser_SimpleMessage(b *testing.B) {
parser := NewParser(MaxBufferSize)
line := `{"type":"user","content":"hello world"}`
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = parser.ProcessLine(line)
}
}
func BenchmarkParser_ComplexMessage(b *testing.B) {
parser := NewParser(MaxBufferSize)
// Assistant message with multiple content blocks
line := `{"type":"assistant","content":[{"type":"text","text":"..."},{"type":"tool_use","id":"123","name":"read","input":{}}]}`
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = parser.ProcessLine(line)
}
}
func BenchmarkParser_LargePayload(b *testing.B) {
parser := NewParser(MaxBufferSize)
// Simulate large tool result
content := strings.Repeat("x", 100*1024) // 100KB
line := fmt.Sprintf(`{"type":"tool_result","tool_use_id":"123","content":"%s"}`, content)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_, _ = parser.ProcessLine(line)
}
}
func BenchmarkParser_StreamingChunks(b *testing.B) {
parser := NewParser(MaxBufferSize)
chunks := []string{
`{"type":"user","content":"`,
`hello world`,
`"}`,
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, chunk := range chunks {
_, _ = parser.ProcessLine(chunk)
}
}
}2. Options Builder Benchmarks (options_bench_test.go)
func BenchmarkOptions_Minimal(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = NewOptions()
}
}
func BenchmarkOptions_FullyConfigured(b *testing.B) {
model := "claude-sonnet-4-5"
prompt := "You are a helpful assistant"
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = NewOptions(
WithModel(model),
WithSystemPrompt(prompt),
WithMaxThinkingTokens(16000),
WithAllowedTools("Read", "Write", "Bash"),
WithPermissionMode(PermissionModeAuto),
)
}
}
func BenchmarkOptions_MCPServers(b *testing.B) {
servers := map[string]MCPServerConfig{
"aws": {Command: "aws-mcp", Args: []string{}},
"db": {Command: "db-mcp", Args: []string{}},
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = NewOptions(WithMCPServers(servers))
}
}3. Message Type Discrimination Benchmarks (internal/shared/message_bench_test.go)
func BenchmarkMessage_Unmarshal_User(b *testing.B) {
data := []byte(`{"type":"user","content":"test"}`)
b.ResetTimer()
for i := 0; i < b.N; i++ {
var msg UserMessage
_ = json.Unmarshal(data, &msg)
}
}
func BenchmarkMessage_Unmarshal_Assistant(b *testing.B) {
data := []byte(`{"type":"assistant","content":[{"type":"text","text":"hello"}]}`)
b.ResetTimer()
for i := 0; i < b.N; i++ {
var msg AssistantMessage
_ = json.Unmarshal(data, &msg)
}
}
func BenchmarkMessage_Discrimination(b *testing.B) {
messages := [][]byte{
[]byte(`{"type":"user","content":"test"}`),
[]byte(`{"type":"assistant","content":[]}`),
[]byte(`{"type":"system","subtype":"init"}`),
[]byte(`{"type":"result","subtype":"success"}`),
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, data := range messages {
var raw map[string]any
_ = json.Unmarshal(data, &raw)
_ = raw["type"].(string)
}
}
}4. Control Protocol Benchmarks (internal/control/protocol_bench_test.go)
func BenchmarkProtocol_RequestIDGeneration(b *testing.B) {
p := &Protocol{}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = p.generateRequestID()
}
}
func BenchmarkProtocol_MessageRouting(b *testing.B) {
p := setupProtocolForBenchmark(b)
msg := map[string]any{
"type": "control_response",
"response": map[string]any{
"subtype": "can_use_tool",
"request_id": "req_1_abc123",
},
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = p.routeMessage(msg)
}
}5. CLI Command Building Benchmarks (internal/cli/discovery_bench_test.go)
func BenchmarkBuildCommand_Minimal(b *testing.B) {
opts := &shared.Options{}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = BuildCommand("/usr/bin/claude", opts, false)
}
}
func BenchmarkBuildCommand_FullOptions(b *testing.B) {
opts := createFullOptions()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = BuildCommand("/usr/bin/claude", opts, false)
}
}Running Benchmarks
# Run all benchmarks
go test -bench=. -benchmem ./...
# Run specific package benchmarks
go test -bench=. -benchmem ./internal/parser/
# Run with memory profiling
go test -bench=. -benchmem -memprofile=mem.prof ./internal/parser/
# Compare benchmark runs
go test -bench=. -count=5 ./... | tee new.txt
benchstat old.txt new.txtAcceptance Criteria
- Add
*_bench_test.gofiles for parser, options, message, protocol, CLI - Each benchmark uses
b.ReportAllocs()for memory tracking - Benchmarks cover hot paths: JSON parsing, message discrimination, options building
- Document baseline results in PR description
- Add benchmark run to CI (optional, for regression detection)
Priority
Low - Nice to have for performance optimization work, not blocking functionality.
Related
- Fuzz tests: Separate issue for parser fuzzing
- Go benchmarking: https://pkg.go.dev/testing#hdr-Benchmarks
Metadata
Metadata
Assignees
Labels
No labels