Skip to content

test: Add Benchmark Tests for Performance-Critical Paths #74

@severity1

Description

@severity1

Summary

Add benchmark tests to measure and track performance of critical SDK operations. Currently the SDK has 0 benchmark tests despite having 20,834 lines of functional test code.

Current State

  • Test files: 20 files, 20,834 lines total
  • Benchmark tests: 0
  • Fuzz tests: 0 (separate issue)

The SDK has comprehensive functional testing but lacks performance baselines.

Proposed Benchmarks

1. JSON Parser Benchmarks (internal/parser/json_bench_test.go)

func BenchmarkParser_SimpleMessage(b *testing.B) {
    parser := NewParser(MaxBufferSize)
    line := `{"type":"user","content":"hello world"}`
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        _, _ = parser.ProcessLine(line)
    }
}

func BenchmarkParser_ComplexMessage(b *testing.B) {
    parser := NewParser(MaxBufferSize)
    // Assistant message with multiple content blocks
    line := `{"type":"assistant","content":[{"type":"text","text":"..."},{"type":"tool_use","id":"123","name":"read","input":{}}]}`
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        _, _ = parser.ProcessLine(line)
    }
}

func BenchmarkParser_LargePayload(b *testing.B) {
    parser := NewParser(MaxBufferSize)
    // Simulate large tool result
    content := strings.Repeat("x", 100*1024) // 100KB
    line := fmt.Sprintf(`{"type":"tool_result","tool_use_id":"123","content":"%s"}`, content)
    
    b.ResetTimer()
    b.ReportAllocs()
    for i := 0; i < b.N; i++ {
        _, _ = parser.ProcessLine(line)
    }
}

func BenchmarkParser_StreamingChunks(b *testing.B) {
    parser := NewParser(MaxBufferSize)
    chunks := []string{
        `{"type":"user","content":"`,
        `hello world`,
        `"}`,
    }
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        for _, chunk := range chunks {
            _, _ = parser.ProcessLine(chunk)
        }
    }
}

2. Options Builder Benchmarks (options_bench_test.go)

func BenchmarkOptions_Minimal(b *testing.B) {
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        _, _ = NewOptions()
    }
}

func BenchmarkOptions_FullyConfigured(b *testing.B) {
    model := "claude-sonnet-4-5"
    prompt := "You are a helpful assistant"
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        _, _ = NewOptions(
            WithModel(model),
            WithSystemPrompt(prompt),
            WithMaxThinkingTokens(16000),
            WithAllowedTools("Read", "Write", "Bash"),
            WithPermissionMode(PermissionModeAuto),
        )
    }
}

func BenchmarkOptions_MCPServers(b *testing.B) {
    servers := map[string]MCPServerConfig{
        "aws": {Command: "aws-mcp", Args: []string{}},
        "db":  {Command: "db-mcp", Args: []string{}},
    }
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        _, _ = NewOptions(WithMCPServers(servers))
    }
}

3. Message Type Discrimination Benchmarks (internal/shared/message_bench_test.go)

func BenchmarkMessage_Unmarshal_User(b *testing.B) {
    data := []byte(`{"type":"user","content":"test"}`)
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        var msg UserMessage
        _ = json.Unmarshal(data, &msg)
    }
}

func BenchmarkMessage_Unmarshal_Assistant(b *testing.B) {
    data := []byte(`{"type":"assistant","content":[{"type":"text","text":"hello"}]}`)
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        var msg AssistantMessage
        _ = json.Unmarshal(data, &msg)
    }
}

func BenchmarkMessage_Discrimination(b *testing.B) {
    messages := [][]byte{
        []byte(`{"type":"user","content":"test"}`),
        []byte(`{"type":"assistant","content":[]}`),
        []byte(`{"type":"system","subtype":"init"}`),
        []byte(`{"type":"result","subtype":"success"}`),
    }
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        for _, data := range messages {
            var raw map[string]any
            _ = json.Unmarshal(data, &raw)
            _ = raw["type"].(string)
        }
    }
}

4. Control Protocol Benchmarks (internal/control/protocol_bench_test.go)

func BenchmarkProtocol_RequestIDGeneration(b *testing.B) {
    p := &Protocol{}
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        _ = p.generateRequestID()
    }
}

func BenchmarkProtocol_MessageRouting(b *testing.B) {
    p := setupProtocolForBenchmark(b)
    msg := map[string]any{
        "type": "control_response",
        "response": map[string]any{
            "subtype":    "can_use_tool",
            "request_id": "req_1_abc123",
        },
    }
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        _ = p.routeMessage(msg)
    }
}

5. CLI Command Building Benchmarks (internal/cli/discovery_bench_test.go)

func BenchmarkBuildCommand_Minimal(b *testing.B) {
    opts := &shared.Options{}
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        _ = BuildCommand("/usr/bin/claude", opts, false)
    }
}

func BenchmarkBuildCommand_FullOptions(b *testing.B) {
    opts := createFullOptions()
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        _ = BuildCommand("/usr/bin/claude", opts, false)
    }
}

Running Benchmarks

# Run all benchmarks
go test -bench=. -benchmem ./...

# Run specific package benchmarks
go test -bench=. -benchmem ./internal/parser/

# Run with memory profiling
go test -bench=. -benchmem -memprofile=mem.prof ./internal/parser/

# Compare benchmark runs
go test -bench=. -count=5 ./... | tee new.txt
benchstat old.txt new.txt

Acceptance Criteria

  • Add *_bench_test.go files for parser, options, message, protocol, CLI
  • Each benchmark uses b.ReportAllocs() for memory tracking
  • Benchmarks cover hot paths: JSON parsing, message discrimination, options building
  • Document baseline results in PR description
  • Add benchmark run to CI (optional, for regression detection)

Priority

Low - Nice to have for performance optimization work, not blocking functionality.

Related

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions