diff --git a/internal/logs/sanitizer.go b/internal/logs/sanitizer.go index 282b511cc..f450866e8 100644 --- a/internal/logs/sanitizer.go +++ b/internal/logs/sanitizer.go @@ -39,9 +39,11 @@ func NewSecretSanitizer(core zapcore.Core) *SecretSanitizer { // registerDefaultPatterns registers patterns for common secret formats func (s *SecretSanitizer) registerDefaultPatterns() { // GitHub tokens (ghp_, gho_, ghu_, ghs_, ghr_) + // Open-ended length ({36,}): the new stateless token format can be ~520 chars, + // and an alphanumeric run has no \b boundary mid-token to stop a fixed upper bound. s.patterns = append(s.patterns, &secretPattern{ name: "github_token", - regex: regexp.MustCompile(`\b(gh[poushr]_[A-Za-z0-9]{36,255})\b`), + regex: regexp.MustCompile(`\b(gh[poushr]_[A-Za-z0-9]{36,})\b`), maskFunc: func(token string) string { if len(token) <= 7 { return "****" diff --git a/internal/logs/sanitizer_test.go b/internal/logs/sanitizer_test.go new file mode 100644 index 000000000..15db58ee3 --- /dev/null +++ b/internal/logs/sanitizer_test.go @@ -0,0 +1,50 @@ +package logs + +import ( + "strings" + "sync" + "testing" +) + +// newTestSanitizer builds a SecretSanitizer with the default patterns registered +// but no wrapped core, suitable for exercising sanitizeString directly. +func newTestSanitizer() *SecretSanitizer { + s := &SecretSanitizer{resolvedCache: &sync.Map{}} + s.registerDefaultPatterns() + return s +} + +func TestSanitizer_GitHubTokens(t *testing.T) { + s := newTestSanitizer() + + tests := []struct { + name string + token string + }{ + {"classic ghp_ (40 chars)", "ghp_1234567890abcdefghijABCDEFGHIJ123456"}, + {"installation ghs_ (40 chars)", "ghs_1234567890abcdefghijABCDEFGHIJ123456"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + out := s.sanitizeString("token=" + tt.token) + if strings.Contains(out, tt.token) { + t.Fatalf("token leaked unmasked: %q", out) + } + }) + } +} + +// TestSanitizer_LongStatelessGitHubToken verifies the new ~520-char stateless +// GitHub token format is masked. The previous {36,255} upper bound left these +// tokens unmasked because the alphanumeric run had no \b boundary within range. +func TestSanitizer_LongStatelessGitHubToken(t *testing.T) { + s := newTestSanitizer() + + const tail = 516 // total length 520 incl. "ghs_" prefix + token := "ghs_" + strings.Repeat("aB3", (tail/3)+1)[:tail] + + out := s.sanitizeString("Authorization context token=" + token) + if strings.Contains(out, token) { + t.Fatalf("long stateless token leaked unmasked (len %d)", len(token)) + } +} diff --git a/internal/security/patterns/tokens.go b/internal/security/patterns/tokens.go index 995adb856..dc8cfcaad 100644 --- a/internal/security/patterns/tokens.go +++ b/internal/security/patterns/tokens.go @@ -42,8 +42,9 @@ func GetTokenPatterns() []*Pattern { func githubPATPattern() *Pattern { // ghp_ = classic PAT, github_pat_ = fine-grained PAT // Fine-grained format: github_pat__ (variable lengths) + // Length is open-ended ({36,}): GitHub's new stateless token format can be ~520 chars. return NewPattern("github_pat"). - WithRegex(`(?:ghp_[a-zA-Z0-9]{36}|github_pat_[a-zA-Z0-9]+_[a-zA-Z0-9]{30,})`). + WithRegex(`(?:ghp_[a-zA-Z0-9]{36,}|github_pat_[a-zA-Z0-9]+_[a-zA-Z0-9]{30,})`). WithCategory(CategoryAPIToken). WithSeverity(SeverityCritical). WithDescription("GitHub Personal Access Token"). @@ -53,7 +54,7 @@ func githubPATPattern() *Pattern { // GitHub OAuth Token func githubOAuthPattern() *Pattern { return NewPattern("github_oauth"). - WithRegex(`gho_[a-zA-Z0-9]{36}`). + WithRegex(`gho_[a-zA-Z0-9]{36,}`). WithCategory(CategoryAPIToken). WithSeverity(SeverityHigh). WithDescription("GitHub OAuth access token"). @@ -63,7 +64,7 @@ func githubOAuthPattern() *Pattern { // GitHub App Installation Token func githubAppPattern() *Pattern { return NewPattern("github_app"). - WithRegex(`ghs_[a-zA-Z0-9]{36}`). + WithRegex(`ghs_[a-zA-Z0-9]{36,}`). WithCategory(CategoryAPIToken). WithSeverity(SeverityHigh). WithDescription("GitHub App installation access token"). @@ -73,7 +74,7 @@ func githubAppPattern() *Pattern { // GitHub App Refresh Token func githubRefreshPattern() *Pattern { return NewPattern("github_refresh"). - WithRegex(`ghr_[a-zA-Z0-9]{36}`). + WithRegex(`ghr_[a-zA-Z0-9]{36,}`). WithCategory(CategoryAPIToken). WithSeverity(SeverityHigh). WithDescription("GitHub App refresh token"). diff --git a/internal/security/patterns/tokens_test.go b/internal/security/patterns/tokens_test.go index eaf0f6e75..2f4f6206a 100644 --- a/internal/security/patterns/tokens_test.go +++ b/internal/security/patterns/tokens_test.go @@ -84,6 +84,42 @@ func TestGitHubTokenPatterns(t *testing.T) { } } +// TestGitHubTokenPatterns_LongStatelessFormat verifies the new long stateless +// GitHub token format (~520 chars, e.g. ghs_ App installation tokens) is matched +// in full. A fixed {36} length truncates the match and leaks the token tail. +func TestGitHubTokenPatterns_LongStatelessFormat(t *testing.T) { + const tail = 516 // total length 520 incl. "ghs_"/"ghp_"/etc. prefix + body := strings.Repeat("aB3", (tail/3)+1)[:tail] + + tests := []struct { + name string + input string + patternName string + }{ + {"ghs_ stateless installation token", "ghs_" + body, "github_app"}, + {"ghp_ long PAT", "ghp_" + body, "github_pat"}, + {"gho_ long OAuth token", "gho_" + body, "github_oauth"}, + {"ghr_ long refresh token", "ghr_" + body, "github_refresh"}, + } + + patterns := GetTokenPatterns() + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pattern := findPatternByName(patterns, tt.patternName) + if pattern == nil { + t.Fatalf("%s pattern not found", tt.patternName) + } + matches := pattern.Match(tt.input) + assert.NotEmpty(t, matches, "expected match for long token: %s", tt.input) + if len(matches) > 0 { + assert.Equal(t, tt.input, matches[0], + "expected full token captured, got truncated match (len %d of %d)", + len(matches[0]), len(tt.input)) + } + }) + } +} + // Test GitLab Token patterns func TestGitLabTokenPatterns(t *testing.T) { tests := []struct {