From ba9e7fbf681b81c59064303d5977e9cfa4bfd231 Mon Sep 17 00:00:00 2001 From: sheeki003 <36009418+sheeki03@users.noreply.github.com> Date: Tue, 24 Mar 2026 16:18:15 +0530 Subject: [PATCH 1/2] feat: add credential leak detection (known patterns + entropy) --- crates/tirith-core/Cargo.toml | 5 + .../assets/data/credential_patterns.toml | 131 ++++ crates/tirith-core/build.rs | 92 +++ crates/tirith-core/src/engine.rs | 7 + crates/tirith-core/src/redact.rs | 99 ++- crates/tirith-core/src/rules/command.rs | 9 +- crates/tirith-core/src/rules/credential.rs | 610 ++++++++++++++++++ crates/tirith-core/src/rules/mod.rs | 2 + crates/tirith-core/src/rules/shared.rs | 13 + crates/tirith-core/src/verdict.rs | 5 + crates/tirith-core/tests/golden_fixtures.rs | 32 + tests/fixtures/credential.toml | 79 +++ 12 files changed, 1063 insertions(+), 21 deletions(-) create mode 100644 crates/tirith-core/assets/data/credential_patterns.toml create mode 100644 crates/tirith-core/src/rules/credential.rs create mode 100644 crates/tirith-core/src/rules/shared.rs create mode 100644 tests/fixtures/credential.toml diff --git a/crates/tirith-core/Cargo.toml b/crates/tirith-core/Cargo.toml index e306f9c..9b909af 100644 --- a/crates/tirith-core/Cargo.toml +++ b/crates/tirith-core/Cargo.toml @@ -32,12 +32,17 @@ thiserror = { workspace = true } lopdf = { workspace = true } uuid = { workspace = true } tempfile = "3" +toml = "0.8" ed25519-dalek = { workspace = true } [target.'cfg(unix)'.dependencies] libc = { workspace = true } reqwest = { version = "0.12", default-features = false, features = ["blocking", "rustls-tls"] } +[build-dependencies] +toml = "0.8" +serde = { version = "1", features = ["derive"] } + [dev-dependencies] toml = "0.8" criterion = { version = "0.5", features = ["html_reports"] } diff --git a/crates/tirith-core/assets/data/credential_patterns.toml b/crates/tirith-core/assets/data/credential_patterns.toml new file mode 100644 index 0000000..0e544e4 --- /dev/null +++ b/crates/tirith-core/assets/data/credential_patterns.toml @@ -0,0 +1,131 @@ +# Credential detection patterns — single source of truth. +# +# This file drives three consumers (no drift possible): +# 1. build.rs — generates tier-1 gate fragments +# 2. credential.rs — runtime pattern matching +# 3. redact.rs — evidence redaction +# +# Provider patterns sourced from: +# - gitleaks (MIT, Copyright 2019 Zachary Rice) +# - ripsecrets (MIT, Copyright 2021 ripsecrets contributors) + +# ── Cloud Providers ────────────────────────────────────────────────────── + +[[pattern]] +id = "aws_access_key" +name = "AWS Access Key" +regex = '(?:\bA3T[A-Z0-9]|\bAKIA|\bASIA|\bABIA|\bACCA)[A-Z2-7]{16}\b' +tier1_fragment = '(?:AKIA|ASIA|ABIA|ACCA)[A-Z2-7]' +redact_prefix_len = 4 +severity = "high" + +[[pattern]] +id = "gcp_api_key" +name = "GCP API Key" +regex = '\bAIzaSy[0-9A-Za-z_-]{33}\b' +tier1_fragment = 'AIzaSy' +redact_prefix_len = 6 +severity = "high" + +# ── Source Control ──────────────────────────────────────────────────────── + +[[pattern]] +id = "github_pat" +name = "GitHub PAT" +regex = '\bgh[oprsu]_[0-9a-zA-Z]{36}\b' +tier1_fragment = 'gh[oprsu]_[0-9a-zA-Z]' +redact_prefix_len = 4 +severity = "high" + +[[pattern]] +id = "github_fine_grained_pat" +name = "GitHub Fine-Grained PAT" +regex = '\bgithub_pat_\w{82}\b' +tier1_fragment = 'github_pat_' +redact_prefix_len = 11 +severity = "high" + +[[pattern]] +id = "gitlab_pat" +name = "GitLab PAT" +regex = '\bglpat-[0-9A-Za-z_=-]{20,22}\b' +tier1_fragment = 'glpat-' +redact_prefix_len = 6 +severity = "high" + +# ── AI Providers ────────────────────────────────────────────────────────── + +[[pattern]] +id = "anthropic_api_key" +name = "Anthropic API Key" +regex = '\bsk-ant-api03-[a-zA-Z0-9_-]{93}AA\b' +tier1_fragment = 'sk-ant-' +redact_prefix_len = 7 +severity = "high" + +# ── Messaging / Communication ──────────────────────────────────────────── + +[[pattern]] +id = "slack_token" +name = "Slack Token" +regex = '\bxox[aboprs]-(?:\d+-)+[\da-zA-Z]+' +tier1_fragment = 'xox[aboprs]-' +redact_prefix_len = 5 +severity = "high" + +[[pattern]] +id = "sendgrid_api_key" +name = "SendGrid API Key" +regex = '\bSG\.[a-zA-Z0-9=_-]{66}\b' +tier1_fragment = 'SG\.' +redact_prefix_len = 3 +severity = "high" + +[[pattern]] +id = "twilio_api_key" +name = "Twilio API Key" +regex = '\b(?:AC|SK)[0-9a-f]{32}\b' +tier1_fragment = '(?:AC|SK)[0-9a-f]{32}' +redact_prefix_len = 2 +severity = "high" + +# ── Payment Processing ─────────────────────────────────────────────────── + +[[pattern]] +id = "stripe_key" +name = "Stripe Key" +regex = '\b(?:sk|rk)_(?:test|live|prod)_[A-Za-z0-9]{16,}\b' +tier1_fragment = '(?:sk|rk)_(?:test|live|prod)_' +redact_prefix_len = 8 +severity = "high" + +# ── Package Registries ─────────────────────────────────────────────────── + +[[pattern]] +id = "npm_token" +name = "npm Token" +regex = '\bnpm_[0-9A-Za-z]{36}\b' +tier1_fragment = 'npm_[0-9A-Za-z]' +redact_prefix_len = 4 +severity = "high" + +# ── Encryption ─────────────────────────────────────────────────────────── + +[[pattern]] +id = "age_secret_key" +name = "age Secret Key" +regex = '\bAGE-SECRET-KEY-1[0-9A-Z]{58}\b' +tier1_fragment = 'AGE-SECRET-KEY-' +redact_prefix_len = 15 +severity = "high" + +# ── Private Keys (separate RuleId: PrivateKeyExposed) ──────────────────── + +[[private_key_pattern]] +id = "private_key" +name = "Private Key Block" +regex = '-----BEGIN\s[A-Z0-9 ]*PRIVATE KEY-----' +tier1_fragment = '-----BEGIN\s' +# Full PEM block regex for redaction (header + base64 body + footer). +# Detection only needs the header; redaction must scrub the entire block. +redact_regex = '-----BEGIN\s[A-Z0-9 ]*PRIVATE KEY-----[\s\S]*?-----END\s[A-Z0-9 ]*PRIVATE KEY-----' diff --git a/crates/tirith-core/build.rs b/crates/tirith-core/build.rs index c2997c4..91ba628 100644 --- a/crates/tirith-core/build.rs +++ b/crates/tirith-core/build.rs @@ -1,7 +1,40 @@ +use serde::Deserialize; use std::env; use std::fs; use std::path::Path; +#[derive(Deserialize)] +struct CredentialPatternsFile { + pattern: Option>, + private_key_pattern: Option>, +} + +#[derive(Deserialize)] +struct CredPattern { + tier1_fragment: String, + #[allow(dead_code)] + id: String, + #[allow(dead_code)] + name: String, + #[allow(dead_code)] + regex: String, + #[allow(dead_code)] + redact_prefix_len: Option, + #[allow(dead_code)] + severity: String, +} + +#[derive(Deserialize)] +struct PrivKeyPattern { + tier1_fragment: String, + #[allow(dead_code)] + id: String, + #[allow(dead_code)] + name: String, + #[allow(dead_code)] + regex: String, +} + fn main() { let out_dir = env::var("OUT_DIR").unwrap(); let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); @@ -21,6 +54,7 @@ fn main() { println!("cargo:rerun-if-changed=assets/data/popular_repos.csv"); println!("cargo:rerun-if-changed=assets/data/public_suffix_list.dat"); println!("cargo:rerun-if-changed=assets/data/ocr_confusions.tsv"); + println!("cargo:rerun-if-changed=assets/data/credential_patterns.toml"); println!("cargo:rerun-if-changed=build.rs"); } @@ -449,6 +483,64 @@ fn generate_tier1_regex(out_dir: &str) { } } + // Load credential patterns from TOML and inject tier-1 entries + let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); + let cred_path = Path::new(&manifest_dir) + .join("assets") + .join("data") + .join("credential_patterns.toml"); + let cred_content = fs::read_to_string(&cred_path) + .unwrap_or_else(|e| panic!("Failed to read credential_patterns.toml: {e}")); + let cred_file: CredentialPatternsFile = toml::from_str(&cred_content) + .unwrap_or_else(|e| panic!("Failed to parse credential_patterns.toml: {e}")); + + // credential_known — exec fragments from all [[pattern]] entries + { + let mut known_frags: Vec = Vec::new(); + if let Some(ref patterns) = cred_file.pattern { + for p in patterns { + known_frags.push(p.tier1_fragment.clone()); + } + } + assert!( + !known_frags.is_empty(), + "credential_patterns.toml has no [[pattern]] entries" + ); + ids.push("credential_known".to_string()); + for frag in &known_frags { + exec_fragments.push(frag.clone()); + paste_fragments.push(frag.clone()); + } + } + + // credential_private_key — exec fragment from [[private_key_pattern]] + { + let pk_patterns = cred_file + .private_key_pattern + .as_ref() + .expect("credential_patterns.toml has no [[private_key_pattern]]"); + assert!( + !pk_patterns.is_empty(), + "credential_patterns.toml [[private_key_pattern]] is empty" + ); + ids.push("credential_private_key".to_string()); + for pk in pk_patterns { + exec_fragments.push(pk.tier1_fragment.clone()); + paste_fragments.push(pk.tier1_fragment.clone()); + } + } + + // credential_generic — paste-only fragment for generic key=value patterns + { + // Tier-1 must be a superset of GENERIC_SECRET_RE. The runtime regex + // allows optional quote/bracket before the operator (["']?\]?), which + // cannot contain literal " in the r"..." generated output. We use + // .{0,2} as a permissive stand-in for the optional quote+bracket. + let generic_frag = r"(?i:key|token|secret|password)\w*.{0,2}\s*(?:[:=]|:=|=>|<-|>)"; + ids.push("credential_generic".to_string()); + paste_fragments.push(generic_frag.to_string()); + } + let exec_regex = format!("(?:{})", exec_fragments.join("|")); let paste_regex = format!("(?:{})", paste_fragments.join("|")); diff --git a/crates/tirith-core/src/engine.rs b/crates/tirith-core/src/engine.rs index 9fe75c7..ddc5259 100644 --- a/crates/tirith-core/src/engine.rs +++ b/crates/tirith-core/src/engine.rs @@ -608,6 +608,11 @@ pub fn analyze(ctx: &AnalysisContext) -> Verdict { ); findings.extend(command_findings); + // Run credential leak detection rules + let cred_findings = + crate::rules::credential::check(&ctx.input, ctx.shell, ctx.scan_context); + findings.extend(cred_findings); + // Run environment rules let env_findings = crate::rules::environment::check(&crate::rules::environment::RealEnv); findings.extend(env_findings); @@ -917,6 +922,8 @@ fn mitre_id_for_rule(rule_id: crate::verdict::RuleId) -> Option<&'static str> { RuleId::ShellInjectionEnv => Some("T1546.004"), // Shell Config Modification // Credential Access + RuleId::CredentialInText | RuleId::HighEntropySecret => Some("T1552"), // Unsecured Credentials + RuleId::PrivateKeyExposed => Some("T1552.004"), // Private Keys RuleId::MetadataEndpoint => Some("T1552.005"), // Unsecured Credentials: Cloud Instance Metadata RuleId::SensitiveEnvExport => Some("T1552.001"), // Credentials In Files diff --git a/crates/tirith-core/src/redact.rs b/crates/tirith-core/src/redact.rs index a50e02f..eb9aaa6 100644 --- a/crates/tirith-core/src/redact.rs +++ b/crates/tirith-core/src/redact.rs @@ -1,6 +1,61 @@ use once_cell::sync::Lazy; use regex::Regex; +/// Credential redaction entry: (label, regex, prefix_len). +/// prefix_len chars are kept visible, the rest is replaced with [REDACTED]. +struct CredRedactEntry { + regex: Regex, + prefix_len: usize, +} + +/// Credential patterns loaded from credential_patterns.toml at compile time. +static CREDENTIAL_REDACT_PATTERNS: Lazy> = Lazy::new(|| { + #[derive(serde::Deserialize)] + struct CredFile { + pattern: Option>, + private_key_pattern: Option>, + } + #[derive(serde::Deserialize)] + struct CredPat { + regex: String, + redact_prefix_len: Option, + } + #[derive(serde::Deserialize)] + struct PkPat { + #[allow(dead_code)] + regex: String, + redact_regex: Option, + } + + let toml_str = include_str!("../assets/data/credential_patterns.toml"); + let cred_file: CredFile = toml::from_str(toml_str).expect("invalid credential_patterns.toml"); + + let mut entries = Vec::new(); + if let Some(patterns) = cred_file.pattern { + for p in patterns { + if let Ok(re) = Regex::new(&p.regex) { + entries.push(CredRedactEntry { + regex: re, + prefix_len: p.redact_prefix_len.unwrap_or(4), + }); + } + } + } + if let Some(pk_patterns) = cred_file.private_key_pattern { + for pk in pk_patterns { + // Use redact_regex (full PEM block) if available, fall back to header-only regex + let redact_pattern = pk.redact_regex.as_deref().unwrap_or(&pk.regex); + if let Ok(re) = Regex::new(redact_pattern) { + entries.push(CredRedactEntry { + regex: re, + prefix_len: 0, + }); + } + } + } + entries +}); + /// Built-in redaction patterns: (label, regex). static BUILTIN_PATTERNS: Lazy> = Lazy::new(|| { vec![ @@ -29,14 +84,26 @@ static BUILTIN_PATTERNS: Lazy> = Lazy::new(|| { ] }); -/// Redact sensitive content from a string using built-in patterns. +/// Redact sensitive content from a string using built-in and credential patterns. pub fn redact(input: &str) -> String { let mut result = input.to_string(); + // Apply built-in patterns first (existing behavior, labeled redaction) for (label, regex) in BUILTIN_PATTERNS.iter() { result = regex .replace_all(&result, format!("[REDACTED:{label}]")) .into_owned(); } + // Apply credential patterns (prefix-preserving, catches patterns not in builtins) + for entry in CREDENTIAL_REDACT_PATTERNS.iter() { + result = entry + .regex + .replace_all(&result, |caps: ®ex::Captures| { + let matched = &caps[0]; + let prefix_len = entry.prefix_len.min(matched.len()); + format!("{}[REDACTED]", &matched[..prefix_len]) + }) + .into_owned(); + } result } @@ -306,8 +373,9 @@ mod tests { #[test] fn test_redact_openai_key() { - let input = "export OPENAI_API_KEY=sk-abcdefghijklmnopqrstuvwxyz12345678"; - let redacted = redact(input); + let key = concat!("sk-", "abcdefghijklmnopqrstuvwxyz12345678"); + let input = format!("export OPENAI_API_KEY={key}"); + let redacted = redact(&input); assert!(!redacted.contains("sk-abcdef")); assert!(redacted.contains("[REDACTED:OpenAI API Key]")); } @@ -322,8 +390,9 @@ mod tests { #[test] fn test_redact_github_pat() { - let input = "GITHUB_TOKEN=ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijkl"; - let redacted = redact(input); + let pat = concat!("gh", "p_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijkl"); + let input = format!("GITHUB_TOKEN={pat}"); + let redacted = redact(&input); assert!(!redacted.contains("ghp_ABCDEF")); assert!(redacted.contains("[REDACTED:GitHub PAT]")); } @@ -354,8 +423,9 @@ mod tests { #[test] fn test_redact_anthropic_key() { - let input = "ANTHROPIC_API_KEY=sk-ant-api03-abcdefghijklmnop"; - let redacted = redact(input); + let key = concat!("sk-ant-api03-", "abcdefghijklmnop"); + let input = format!("ANTHROPIC_API_KEY={key}"); + let redacted = redact(&input); assert!(!redacted.contains("sk-ant-api03")); assert!(redacted.contains("[REDACTED:Anthropic API Key]")); } @@ -363,27 +433,30 @@ mod tests { #[test] fn test_redact_finding_covers_all_fields() { use crate::verdict::{Evidence, Finding, RuleId, Severity}; + let openai_key = concat!("sk-", "abcdefghijklmnopqrstuvwxyz12345678"); + let github_pat = concat!("gh", "p_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijkl"); + let aws_key = "AKIAIOSFODNN7EXAMPLE"; let mut finding = Finding { rule_id: RuleId::SensitiveEnvExport, severity: Severity::High, title: "test".into(), - description: "exports sk-abcdefghijklmnopqrstuvwxyz12345678".into(), + description: format!("exports {openai_key}"), evidence: vec![ Evidence::EnvVar { name: "OPENAI_API_KEY".into(), - value_preview: "sk-abcdefghijklmnopqrstuvwxyz12345678".into(), + value_preview: openai_key.into(), }, Evidence::Text { - detail: "saw ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijkl".into(), + detail: format!("saw {github_pat}"), }, Evidence::CommandPattern { pattern: "export".into(), - matched: "export OPENAI_API_KEY=sk-abcdefghijklmnopqrstuvwxyz12345678".into(), + matched: format!("export OPENAI_API_KEY={openai_key}"), }, ], - human_view: Some("key is sk-abcdefghijklmnopqrstuvwxyz12345678".into()), - agent_view: Some("AKIAIOSFODNN7EXAMPLE exposed".into()), + human_view: Some(format!("key is {openai_key}")), + agent_view: Some(format!("{aws_key} exposed")), mitre_id: None, custom_rule_id: None, }; diff --git a/crates/tirith-core/src/rules/command.rs b/crates/tirith-core/src/rules/command.rs index 2353fd7..1acab14 100644 --- a/crates/tirith-core/src/rules/command.rs +++ b/crates/tirith-core/src/rules/command.rs @@ -938,14 +938,7 @@ const SHELL_INJECTION_VARS: &[&str] = &["BASH_ENV", "ENV", "PROMPT_COMMAND"]; const INTERPRETER_HIJACK_VARS: &[&str] = &["PYTHONPATH", "NODE_OPTIONS", "RUBYLIB", "PERL5LIB"]; /// Sensitive credential variable names that should not be exported in commands. -const SENSITIVE_KEY_VARS: &[&str] = &[ - "AWS_ACCESS_KEY_ID", - "AWS_SECRET_ACCESS_KEY", - "AWS_SESSION_TOKEN", - "OPENAI_API_KEY", - "ANTHROPIC_API_KEY", - "GITHUB_TOKEN", -]; +use super::shared::SENSITIVE_KEY_VARS; fn classify_env_var(name: &str) -> Option<(RuleId, Severity, &'static str, &'static str)> { let name_upper = name.to_ascii_uppercase(); diff --git a/crates/tirith-core/src/rules/credential.rs b/crates/tirith-core/src/rules/credential.rs new file mode 100644 index 0000000..0f5f3b3 --- /dev/null +++ b/crates/tirith-core/src/rules/credential.rs @@ -0,0 +1,610 @@ +//! Credential leak detection. +//! +//! Two-layer approach: +//! 1. Known patterns: provider-specific regex (AWS, GitHub, Stripe, etc.) +//! 2. Generic detection: keyword context + p_random() entropy scoring +//! +//! Entropy detection algorithm ported from ripsecrets +//! (MIT, Copyright 2021 ripsecrets contributors). +//! https://github.com/sirwart/ripsecrets +//! +//! Provider patterns sourced from gitleaks (MIT, Copyright 2019 Zachary Rice). +//! https://github.com/gitleaks/gitleaks + +use once_cell::sync::Lazy; +use regex::Regex; +use serde::Deserialize; +use std::collections::HashSet; + +use crate::extract::ScanContext; +use crate::rules::shared::SENSITIVE_KEY_VARS; +use crate::tokenize::ShellType; +use crate::verdict::{Evidence, Finding, RuleId, Severity}; + +// --------------------------------------------------------------------------- +// TOML schema +// --------------------------------------------------------------------------- + +#[derive(Deserialize)] +struct PatternFile { + #[serde(default)] + pattern: Vec, + #[serde(default)] + private_key_pattern: Vec, +} + +#[derive(Deserialize)] +struct PatternDef { + #[allow(dead_code)] + id: String, + name: String, + regex: String, + #[allow(dead_code)] + tier1_fragment: String, + #[allow(dead_code)] + redact_prefix_len: Option, + #[allow(dead_code)] + severity: String, +} + +#[derive(Deserialize)] +struct PrivateKeyDef { + #[allow(dead_code)] + id: String, + #[allow(dead_code)] + name: String, + regex: String, + #[allow(dead_code)] + tier1_fragment: String, +} + +// --------------------------------------------------------------------------- +// Compiled patterns (loaded once) +// --------------------------------------------------------------------------- + +struct CompiledPattern { + name: String, + regex: Regex, +} + +static KNOWN_PATTERNS: Lazy> = Lazy::new(|| { + let toml_src = include_str!("../../assets/data/credential_patterns.toml"); + let file: PatternFile = toml::from_str(toml_src).expect("credential_patterns.toml parse error"); + file.pattern + .into_iter() + .map(|p| CompiledPattern { + name: p.name, + regex: Regex::new(&p.regex).unwrap_or_else(|e| { + panic!("bad regex in credential_patterns.toml ({}): {e}", p.id) + }), + }) + .collect() +}); + +static PRIVATE_KEY_RE: Lazy = Lazy::new(|| { + let toml_src = include_str!("../../assets/data/credential_patterns.toml"); + let file: PatternFile = toml::from_str(toml_src).expect("credential_patterns.toml parse error"); + let pat = &file.private_key_pattern[0].regex; + Regex::new(pat).expect("bad private key regex") +}); + +// Generic secret regex: keyword context + assignment + value capture. +// Ported from ripsecrets RANDOM_STRING_REGEX. +static GENERIC_SECRET_RE: Lazy = Lazy::new(|| { + Regex::new( + r#"(?i:key|token|secret|password)\w*["']?\]?\s*(?:[:=]|:=|=>|<-|>)\s*[\t "'`]?([\w+./=~\\\-`^]{15,90})(?:[\t\n "'`]| Vec { + if matches!(context, ScanContext::FileScan) { + return Vec::new(); + } + + let mut findings = Vec::new(); + + findings.extend(check_known_patterns(input)); + findings.extend(check_private_keys(input)); + + if matches!(context, ScanContext::Paste) { + findings.extend(check_generic_secrets(input)); + } + + findings +} + +// --------------------------------------------------------------------------- +// Layer 1: Known provider patterns +// --------------------------------------------------------------------------- + +fn check_known_patterns(input: &str) -> Vec { + let mut findings = Vec::new(); + for pat in KNOWN_PATTERNS.iter() { + for m in pat.regex.find_iter(input) { + if is_covered_by_env_export(input, m.start()) { + continue; + } + findings.push(Finding { + rule_id: RuleId::CredentialInText, + severity: Severity::High, + title: format!("{} detected", pat.name), + description: + "A credential matching a known provider pattern was found in the input. \ + Credentials should not appear in commands or pasted text." + .to_string(), + evidence: vec![Evidence::Text { + detail: format!("Matched {} pattern", pat.name), + }], + human_view: None, + agent_view: None, + mitre_id: None, + custom_rule_id: None, + }); + } + } + findings +} + +// --------------------------------------------------------------------------- +// Layer 2: Private key blocks +// --------------------------------------------------------------------------- + +fn check_private_keys(input: &str) -> Vec { + let mut findings = Vec::new(); + for _ in PRIVATE_KEY_RE.find_iter(input) { + findings.push(Finding { + rule_id: RuleId::PrivateKeyExposed, + severity: Severity::Critical, + title: "Private key block detected".to_string(), + description: "A PEM-encoded private key header was found in the input. \ + Private keys should never be pasted into a terminal or used inline." + .to_string(), + evidence: vec![Evidence::Text { + detail: "Matched BEGIN PRIVATE KEY block".to_string(), + }], + human_view: None, + agent_view: None, + mitre_id: None, + custom_rule_id: None, + }); + } + findings +} + +// --------------------------------------------------------------------------- +// Layer 3: Generic entropy-based secrets (paste only) +// --------------------------------------------------------------------------- + +fn check_generic_secrets(input: &str) -> Vec { + let mut findings = Vec::new(); + for caps in GENERIC_SECRET_RE.captures_iter(input) { + let value = match caps.get(1) { + Some(m) => m.as_str(), + None => continue, + }; + if !is_random(value.as_bytes()) { + continue; + } + findings.push(Finding { + rule_id: RuleId::HighEntropySecret, + severity: Severity::Medium, + title: "High-entropy secret value detected".to_string(), + description: + "A value assigned to a secret/key/token/password variable appears to contain \ + a random credential. Avoid pasting secrets into terminals." + .to_string(), + evidence: vec![Evidence::Text { + detail: "High-entropy value in secret assignment context".to_string(), + }], + human_view: None, + agent_view: None, + mitre_id: None, + custom_rule_id: None, + }); + } + findings +} + +// --------------------------------------------------------------------------- +// Dedup helper: suppress if the match is part of `export VAR=`, `env VAR=`, +// or fish `set ... VAR` where VAR is in SENSITIVE_KEY_VARS. +// --------------------------------------------------------------------------- + +fn is_covered_by_env_export(input: &str, match_start: usize) -> bool { + let prefix = &input[..match_start]; + let trimmed = prefix.trim_end(); + + // Case 1: POSIX-style `VAR=value` — check for export/env/set before VAR= + let posix_match = SENSITIVE_KEY_VARS.iter().any(|var| { + let suffix_eq = format!("{var}="); + let suffix_eq_sq = format!("{var}='"); + let suffix_eq_dq = format!("{var}=\""); + let has_eq = trimmed.ends_with(&suffix_eq) + || trimmed.ends_with(&suffix_eq_sq) + || trimmed.ends_with(&suffix_eq_dq); + if !has_eq { + return false; + } + if let Some(var_pos) = trimmed.rfind(&suffix_eq) { + let before_var = trimmed[..var_pos].trim_end(); + before_var.ends_with("export") + || has_command_prefix(before_var, "env") + || has_command_prefix(before_var, "set") + } else { + false + } + }); + + if posix_match { + return true; + } + + // Case 2: Fish-style `set [-gx] VAR value` — VAR is followed by space, not = + // The matched secret starts at match_start. In fish, the prefix looks like + // `set -gx AWS_ACCESS_KEY_ID ` (space before the value, no =). + // Use the raw prefix (not trimmed) to preserve trailing space. + let raw_prefix = prefix; + SENSITIVE_KEY_VARS.iter().any(|var| { + let suffix_space = format!("{var} "); + // Check raw prefix (not trimmed) so trailing space is preserved + if !raw_prefix.ends_with(&suffix_space) { + return false; + } + if let Some(var_pos) = raw_prefix.rfind(var) { + let before_var = raw_prefix[..var_pos].trim_end(); + has_command_prefix(before_var, "set") + } else { + false + } + }) +} + +/// Check if `before` ends with a chain starting from `cmd`. +/// Handles intervening flags like `env -S VAR=` or `set -gx VAR`. +fn has_command_prefix(before: &str, cmd: &str) -> bool { + // Split on whitespace and check if cmd appears as any word + let words: Vec<&str> = before.split_whitespace().collect(); + // Find the last occurrence of cmd in the words + for (i, w) in words.iter().enumerate().rev() { + if *w == cmd { + // Everything after cmd should be flags or VAR=val pairs + let rest = &words[i + 1..]; + return rest.iter().all(|w| w.starts_with('-') || w.contains('=')); + } + } + false +} + +// --------------------------------------------------------------------------- +// Entropy scoring — ported from ripsecrets (MIT) +// --------------------------------------------------------------------------- + +/// Probability that `s` is a random string (higher = more likely random). +fn p_random(s: &[u8]) -> f64 { + let base = if is_hex_string(s) { + 16.0 + } else if is_cap_and_numbers(s) { + 36.0 + } else { + 64.0 + }; + let mut p = p_random_distinct_values(s, base) * p_random_char_class(s, base); + if base == 64.0 { + // bigrams are only calibrated for base64 + p *= p_random_bigrams(s); + } + p +} + +fn is_hex_string(s: &[u8]) -> bool { + s.len() >= 16 && s.iter().all(|b| b.is_ascii_hexdigit()) +} + +fn is_cap_and_numbers(s: &[u8]) -> bool { + s.len() >= 16 + && s.iter() + .all(|b| b.is_ascii_uppercase() || b.is_ascii_digit()) +} + +/// Determine if `s` looks random enough to flag as a secret. +fn is_random(s: &[u8]) -> bool { + let p = p_random(s); + if p < 1.0 / 1e5 { + return false; + } + let contains_num = s.iter().any(|b| b.is_ascii_digit()); + if !contains_num && p < 1.0 / 1e4 { + return false; + } + true +} + +// ---- Bigrams (from ripsecrets) ---- + +static BIGRAMS: Lazy> = Lazy::new(|| { + let bigrams_bytes: &[u8] = b"er,te,an,en,ma,ke,10,at,/m,on,09,ti,al,io,.h,./,..,ra,ht,es,or,tm,pe,ml,re,in,3/,n3,0F,ok,ey,00,80,08,ss,07,15,81,F3,st,52,KE,To,01,it,2B,2C,/E,P_,EY,B7,se,73,de,VP,EV,to,od,B0,0E,nt,et,_P,A0,60,90,0A,ri,30,ar,C0,op,03,ec,ns,as,FF,F7,po,PK,la,.p,AE,62,me,F4,71,8E,yp,pa,50,qu,D7,7D,rs,ea,Y_,t_,ha,3B,c/,D2,ls,DE,pr,am,E0,oc,06,li,do,id,05,51,40,ED,_p,70,ed,04,02,t.,rd,mp,20,d_,co,ro,ex,11,ua,nd,0C,0D,D0,Eq,le,EF,wo,e_,e.,ct,0B,_c,Li,45,rT,pt,14,61,Th,56,sT,E6,DF,nT,16,85,em,BF,9E,ne,_s,25,91,78,57,BE,ta,ng,cl,_t,E1,1F,y_,xp,cr,4F,si,s_,E5,pl,AB,ge,7E,F8,35,E2,s.,CF,58,32,2F,E7,1B,ve,B1,3D,nc,Gr,EB,C6,77,64,sl,8A,6A,_k,79,C8,88,ce,Ex,5C,28,EA,A6,2A,Ke,A7,th,CA,ry,F0,B6,7/,D9,6B,4D,DA,3C,ue,n7,9C,.c,7B,72,ac,98,22,/o,va,2D,n.,_m,B8,A3,8D,n_,12,nE,ca,3A,is,AD,rt,r_,l-,_C,n1,_v,y.,yw,1/,ov,_n,_d,ut,no,ul,sa,CT,_K,SS,_e,F1,ty,ou,nG,tr,s/,il,na,iv,L_,AA,da,Ty,EC,ur,TX,xt,lu,No,r.,SL,Re,sw,_1,om,e/,Pa,xc,_g,_a,X_,/e,vi,ds,ai,==,ts,ni,mg,ic,o/,mt,gm,pk,d.,ch,/p,tu,sp,17,/c,ym,ot,ki,Te,FE,ub,nL,eL,.k,if,he,34,e-,23,ze,rE,iz,St,EE,-p,be,In,ER,67,13,yn,ig,ib,_f,.o,el,55,Un,21,fi,54,mo,mb,gi,_r,Qu,FD,-o,ie,fo,As,7F,48,41,/i,eS,ab,FB,1E,h_,ef,rr,rc,di,b.,ol,im,eg,ap,_l,Se,19,oS,ew,bs,Su,F5,Co,BC,ud,C1,r-,ia,_o,65,.r,sk,o_,ck,CD,Am,9F,un,fa,F6,5F,nk,lo,ev,/f,.t,sE,nO,a_,EN,E4,Di,AC,95,74,1_,1A,us,ly,ll,_b,SA,FC,69,5E,43,um,tT,OS,CE,87,7A,59,44,t-,bl,ad,Or,D5,A_,31,24,t/,ph,mm,f.,ag,RS,Of,It,FA,De,1D,/d,-k,lf,hr,gu,fy,D6,89,6F,4E,/k,w_,cu,br,TE,ST,R_,E8,/O"; + bigrams_bytes.split(|b| *b == b',').collect() +}); + +fn p_random_bigrams(s: &[u8]) -> f64 { + let mut num_bigrams = 0; + for i in 0..s.len().saturating_sub(1) { + let bigram = &s[i..=i + 1]; + if BIGRAMS.contains(bigram) { + num_bigrams += 1; + } + } + p_binomial(s.len(), num_bigrams, (BIGRAMS.len() as f64) / (64.0 * 64.0)) +} + +// ---- Char class probabilities ---- + +fn p_random_char_class(s: &[u8], base: f64) -> f64 { + if base == 16.0 { + return p_random_char_class_aux(s, b'0', b'9', 16.0); + } + let char_classes_36: &[(u8, u8)] = &[(b'0', b'9'), (b'A', b'Z')]; + let char_classes_64: &[(u8, u8)] = &[(b'0', b'9'), (b'A', b'Z'), (b'a', b'z')]; + let classes = if base == 36.0 { + char_classes_36 + } else { + char_classes_64 + }; + classes + .iter() + .map(|(lo, hi)| p_random_char_class_aux(s, *lo, *hi, base)) + .fold(f64::INFINITY, f64::min) +} + +fn p_random_char_class_aux(s: &[u8], min: u8, max: u8, base: f64) -> f64 { + // Note: upper bound is exclusive to match ripsecrets scoring behaviour. + let count = s.iter().filter(|b| **b >= min && **b < max).count(); + let num_chars = (max - min + 1) as f64; + p_binomial(s.len(), count, num_chars / base) +} + +// ---- Distinct values ---- + +fn p_random_distinct_values(s: &[u8], base: f64) -> f64 { + let total_possible: f64 = base.powi(s.len() as i32); + let num_distinct = count_distinct(s); + let mut sum: f64 = 0.0; + for i in 1..=num_distinct { + sum += num_possible_outcomes(s.len(), i, base as usize); + } + sum / total_possible +} + +fn count_distinct(s: &[u8]) -> usize { + let mut seen = [false; 256]; + let mut count = 0; + for &b in s { + if !seen[b as usize] { + seen[b as usize] = true; + count += 1; + } + } + count +} + +fn num_possible_outcomes(num_values: usize, num_distinct: usize, base: usize) -> f64 { + let mut res = base as f64; + for i in 1..num_distinct { + res *= (base - i) as f64; + } + res * num_distinct_configurations(num_values, num_distinct) +} + +fn num_distinct_configurations(num_values: usize, num_distinct: usize) -> f64 { + if num_distinct == 1 || num_distinct == num_values { + return 1.0; + } + num_distinct_configurations_aux(num_distinct, 0, num_values - num_distinct) +} + +fn num_distinct_configurations_aux(num_positions: usize, position: usize, remaining: usize) -> f64 { + if remaining == 0 { + return 1.0; + } + let mut configs = 0.0; + if position + 1 < num_positions { + configs += num_distinct_configurations_aux(num_positions, position + 1, remaining); + } + configs + + (position + 1) as f64 + * num_distinct_configurations_aux(num_positions, position, remaining - 1) +} + +// ---- Binomial probability ---- + +fn p_binomial(n: usize, x: usize, p: f64) -> f64 { + let left_tail = (x as f64) < n as f64 * p; + let min = if left_tail { 0 } else { x }; + let max = if left_tail { x } else { n }; + + let mut total = 0.0; + for i in min..=max { + total += factorial(n) / (factorial(n - i) * factorial(i)) + * p.powi(i as i32) + * (1.0 - p).powi((n - i) as i32); + } + total +} + +fn factorial(n: usize) -> f64 { + let mut res = 1.0; + for i in 2..=n { + res *= i as f64; + } + res +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_export_aws_key_suppressed() { + let input = "export AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE"; + let findings = check(input, ShellType::Posix, ScanContext::Exec); + assert!( + findings.is_empty(), + "export VAR= should be suppressed (handled by SensitiveEnvExport rule): {findings:?}" + ); + } + + #[test] + fn test_env_aws_key_suppressed() { + let input = "env AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE ./run.sh"; + let findings = check(input, ShellType::Posix, ScanContext::Exec); + assert!( + findings.is_empty(), + "env VAR= should be suppressed: {findings:?}" + ); + } + + #[test] + fn test_bare_aws_key_assignment_fires() { + // Bare VAR= without export/env/set is NOT suppressed + let input = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE"; + let findings = check(input, ShellType::Posix, ScanContext::Exec); + assert!( + !findings.is_empty(), + "bare VAR= should still fire credential detection" + ); + assert!(findings + .iter() + .any(|f| f.rule_id == RuleId::CredentialInText)); + } + + #[test] + fn test_aws_key_in_curl_header_fires() { + let input = "curl -H 'Authorization: AKIAIOSFODNN7EXAMPLE' https://api.example.com"; + let findings = check(input, ShellType::Posix, ScanContext::Exec); + assert!(!findings.is_empty(), "AWS key in curl header should fire"); + assert!(findings + .iter() + .any(|f| f.rule_id == RuleId::CredentialInText)); + // Title must NOT contain the secret value + for f in &findings { + assert!( + !f.title.contains("AKIAIOSFODNN7EXAMPLE"), + "title must not contain the raw secret" + ); + } + } + + #[test] + fn test_private_key_detected() { + let input = "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA0Z3VS5JJcds3xfn/yGaV...\n-----END RSA PRIVATE KEY-----"; + let findings = check(input, ShellType::Posix, ScanContext::Paste); + assert!( + findings + .iter() + .any(|f| f.rule_id == RuleId::PrivateKeyExposed), + "private key block should be detected: {findings:?}" + ); + assert!( + findings + .iter() + .filter(|f| f.rule_id == RuleId::PrivateKeyExposed) + .all(|f| f.severity == Severity::Critical), + "private key should be Critical severity" + ); + } + + #[test] + fn test_generic_entropy_detected() { + // A keyword-assignment with a random-looking value in paste context + let input = r#"secret_key = "xK9mP2vL7nR4wQ8jF3hB6dT1yC5uA0eG""#; + let findings = check(input, ShellType::Posix, ScanContext::Paste); + assert!( + findings + .iter() + .any(|f| f.rule_id == RuleId::HighEntropySecret), + "high-entropy secret should be detected in paste context: {findings:?}" + ); + } + + #[test] + fn test_generic_entropy_skipped_in_exec() { + // Same input but in exec context — generic detection should NOT run + let input = r#"secret_key = "xK9mP2vL7nR4wQ8jF3hB6dT1yC5uA0eG""#; + let findings = check(input, ShellType::Posix, ScanContext::Exec); + assert!( + !findings + .iter() + .any(|f| f.rule_id == RuleId::HighEntropySecret), + "generic entropy should be skipped in exec context" + ); + } + + #[test] + fn test_readable_password_not_flagged() { + // A readable, non-random password should NOT be flagged by entropy check + let input = r#"password = "hello_world""#; + let findings = check(input, ShellType::Posix, ScanContext::Paste); + assert!( + !findings + .iter() + .any(|f| f.rule_id == RuleId::HighEntropySecret), + "readable password should not be flagged as high-entropy: {findings:?}" + ); + } + + #[test] + fn test_p_random_ported_correctly() { + // Verify the ported p_random gives same results as ripsecrets + assert!(p_random(b"hello_world") < 1.0 / 1e6); + assert!(p_random(b"xK9mP2vL7nR4wQ8jF3hB6dT1yC5uA0eG") > 1.0 / 1e4); + assert!(p_random(b"rT8vN1kL5qW3mC7xH2jP9sD4fB6yZ0uA") > 1.0 / 1e4); + } + + #[test] + fn test_is_random_basic() { + assert!(!is_random(b"hello_world")); + assert!(is_random(b"xK9mP2vL7nR4wQ8jF3hB6dT1yC5uA0eG")); + } + + #[test] + fn test_file_scan_skipped() { + let input = "AKIAIOSFODNN7EXAMPLE"; + let findings = check(input, ShellType::Posix, ScanContext::FileScan); + assert!( + findings.is_empty(), + "file scan context should produce no findings" + ); + } + + #[test] + fn test_fish_set_eq_suppressed() { + // POSIX-style VAR= after set (some fish versions) + let input = "set -gx AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE"; + let findings = check(input, ShellType::Fish, ScanContext::Exec); + assert!( + findings.is_empty(), + "fish set -gx VAR= should be suppressed: {findings:?}" + ); + } + + #[test] + fn test_fish_set_space_suppressed() { + // Canonical fish form: set -gx VAR value (space-separated) + let input = "set -gx AWS_ACCESS_KEY_ID AKIAIOSFODNN7EXAMPLE"; + let findings = check(input, ShellType::Fish, ScanContext::Exec); + assert!( + findings.is_empty(), + "fish set -gx VAR value should be suppressed: {findings:?}" + ); + } +} diff --git a/crates/tirith-core/src/rules/mod.rs b/crates/tirith-core/src/rules/mod.rs index 2af1bdf..0213ad7 100644 --- a/crates/tirith-core/src/rules/mod.rs +++ b/crates/tirith-core/src/rules/mod.rs @@ -1,11 +1,13 @@ pub mod cloaking; pub mod command; pub mod configfile; +pub mod credential; pub mod custom; pub mod ecosystem; pub mod environment; pub mod hostname; pub mod path; pub mod rendered; +pub mod shared; pub mod terminal; pub mod transport; diff --git a/crates/tirith-core/src/rules/shared.rs b/crates/tirith-core/src/rules/shared.rs new file mode 100644 index 0000000..16ec2ee --- /dev/null +++ b/crates/tirith-core/src/rules/shared.rs @@ -0,0 +1,13 @@ +//! Shared constants and helpers used by multiple rule modules. + +/// Environment variable names that carry sensitive credentials. +/// Used by both `command.rs` (SensitiveEnvExport detection) and +/// `credential.rs` (dedup suppression). +pub const SENSITIVE_KEY_VARS: &[&str] = &[ + "AWS_ACCESS_KEY_ID", + "AWS_SECRET_ACCESS_KEY", + "AWS_SESSION_TOKEN", + "OPENAI_API_KEY", + "ANTHROPIC_API_KEY", + "GITHUB_TOKEN", +]; diff --git a/crates/tirith-core/src/verdict.rs b/crates/tirith-core/src/verdict.rs index caf477a..f710a92 100644 --- a/crates/tirith-core/src/verdict.rs +++ b/crates/tirith-core/src/verdict.rs @@ -97,6 +97,11 @@ pub enum RuleId { // PDF rules PdfHiddenText, + // Credential rules + CredentialInText, + HighEntropySecret, + PrivateKeyExposed, + // Policy rules PolicyBlocklisted, diff --git a/crates/tirith-core/tests/golden_fixtures.rs b/crates/tirith-core/tests/golden_fixtures.rs index 77f28a4..dccc8eb 100644 --- a/crates/tirith-core/tests/golden_fixtures.rs +++ b/crates/tirith-core/tests/golden_fixtures.rs @@ -257,6 +257,16 @@ fn test_rendered_fixtures() { eprintln!("Passed {count} rendered fixtures"); } +#[test] +fn test_credential_fixtures() { + let fixtures = load_fixtures("credential.toml"); + let count = fixtures.len(); + for fixture in &fixtures { + run_fixture(fixture); + } + eprintln!("Passed {count} credential fixtures"); +} + /// Verify total fixture count across all files. #[test] fn test_fixture_count() { @@ -273,6 +283,7 @@ fn test_fixture_count() { "policy.toml", "configfile.toml", "rendered.toml", + "credential.toml", ]; let total: usize = files.iter().map(|f| load_fixtures(f).len()).sum(); @@ -293,6 +304,7 @@ fn test_tier1_coverage() { "terminal.toml", "command.toml", "ecosystem.toml", + "credential.toml", ]; let mut missed = Vec::new(); @@ -379,6 +391,7 @@ const ALL_FIXTURE_FILES: &[&str] = &[ "policy.toml", "configfile.toml", "rendered.toml", + "credential.toml", ]; /// Complete list of all RuleId variants (snake_case serialized form). @@ -457,6 +470,10 @@ const ALL_RULE_IDS: &[&str] = &[ "hidden_html_attribute", "markdown_comment", "html_comment", + // Credential + "credential_in_text", + "high_entropy_secret", + "private_key_exposed", // Cloaking "server_cloaking", // Clipboard @@ -612,6 +629,9 @@ fn test_rule_id_list_is_complete() { RuleId::HiddenHtmlAttribute, RuleId::MarkdownComment, RuleId::HtmlComment, + RuleId::CredentialInText, + RuleId::HighEntropySecret, + RuleId::PrivateKeyExposed, RuleId::ServerCloaking, RuleId::ClipboardHidden, RuleId::PdfHiddenText, @@ -670,6 +690,9 @@ fn test_no_url_rules_have_no_url_fixtures() { "mcp_duplicate_server_name", // file context, no URL needed "metadata_endpoint", // bare IP: curl 169.254.169.254/path "private_network_access", // bare IP: curl 10.0.0.1/path + "credential_in_text", // token/key in text, no URL needed + "high_entropy_secret", // high-entropy secret assignment, no URL needed + "private_key_exposed", // PEM key block, no URL needed ] .into_iter() .collect(); @@ -767,6 +790,15 @@ fn test_extractor_ids_cover_rule_triggers() { ("punycode detection", &["punycode_domain"]), ("lookalike TLD", &["lookalike_tld"]), ("URL shortener", &["url_shortener"]), + // Credential detection + ( + "credential detection", + &[ + "credential_known", + "credential_private_key", + "credential_generic", + ], + ), ]; let mut missing = Vec::new(); diff --git a/tests/fixtures/credential.toml b/tests/fixtures/credential.toml new file mode 100644 index 0000000..37127f5 --- /dev/null +++ b/tests/fixtures/credential.toml @@ -0,0 +1,79 @@ +[[fixture]] +name = "aws_key_in_curl" +min_milestone = 1 +input = "curl -H 'Authorization: token AKIAIOSFODNN7EXAMPLE'" +context = "exec" +expected_action = "block" +expected_rules = ["credential_in_text"] + +[[fixture]] +name = "aws_session_key_in_paste" +min_milestone = 1 +input = "ASIAIOSFODNN7EXAMPLE" +context = "paste" +expected_action = "block" +expected_rules = ["credential_in_text"] + +[[fixture]] +name = "aws_temp_key_in_curl" +min_milestone = 1 +input = "curl -H 'Authorization: Bearer ABIAIOSFODNN7EXAMPLE'" +context = "exec" +expected_action = "block" +expected_rules = ["credential_in_text"] + +[[fixture]] +name = "private_key_block" +min_milestone = 1 +input = "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA0Z3VS5JJcds3xfn" +context = "paste" +expected_action = "block" +expected_rules = ["private_key_exposed"] + +[[fixture]] +name = "aws_key_in_env_assignment" +min_milestone = 1 +input = "AWS_ACCESS_KEY_ID=ACCAIOSFODNN7EXAMPLE" +context = "exec" +expected_action = "block" +expected_rules = ["credential_in_text"] + +[[fixture]] +name = "generic_api_key_assignment" +min_milestone = 1 +input = "api_key = 'xK9mP2vL7nR4wQ8jF3hB6dT1yC5uA0eG'" +context = "paste" +expected_action = "warn" +expected_rules = ["high_entropy_secret"] + +[[fixture]] +name = "normal_path_export" +min_milestone = 1 +input = "export PATH=/usr/local/bin" +context = "exec" +expected_action = "allow" +expected_rules = [] + +[[fixture]] +name = "readable_password_field" +min_milestone = 1 +input = "password = 'hello_world_test'" +context = "paste" +expected_action = "allow" +expected_rules = [] + +[[fixture]] +name = "generic_entropy_not_in_exec" +min_milestone = 1 +input = "api_key = 'xK9mP2vL7nR4wQ8jF3hB6dT1yC5uA0eG'" +context = "exec" +expected_action = "allow" +expected_rules = [] + +[[fixture]] +name = "aws_key_export_dedup" +min_milestone = 1 +input = "export AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE" +context = "exec" +expected_action = "block" +expected_rules = ["sensitive_env_export"] From 581e70759db7f7dad90730d9581c693251546b1c Mon Sep 17 00:00:00 2001 From: sheeki003 <36009418+sheeki03@users.noreply.github.com> Date: Tue, 24 Mar 2026 16:34:18 +0530 Subject: [PATCH 2/2] fix: tighten credential pattern coverage --- .../assets/data/credential_patterns.toml | 5 +- crates/tirith-core/build.rs | 2 + crates/tirith-core/src/redact.rs | 4 +- crates/tirith-core/src/rules/credential.rs | 55 ++++++++++++++++++- tests/fixtures/credential.toml | 4 +- 5 files changed, 63 insertions(+), 7 deletions(-) diff --git a/crates/tirith-core/assets/data/credential_patterns.toml b/crates/tirith-core/assets/data/credential_patterns.toml index 0e544e4..1fb0216 100644 --- a/crates/tirith-core/assets/data/credential_patterns.toml +++ b/crates/tirith-core/assets/data/credential_patterns.toml @@ -15,7 +15,7 @@ id = "aws_access_key" name = "AWS Access Key" regex = '(?:\bA3T[A-Z0-9]|\bAKIA|\bASIA|\bABIA|\bACCA)[A-Z2-7]{16}\b' -tier1_fragment = '(?:AKIA|ASIA|ABIA|ACCA)[A-Z2-7]' +tier1_fragment = '(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z2-7]' redact_prefix_len = 4 severity = "high" @@ -68,7 +68,7 @@ severity = "high" [[pattern]] id = "slack_token" name = "Slack Token" -regex = '\bxox[aboprs]-(?:\d+-)+[\da-zA-Z]+' +regex = '\bxox[aboprs]-(?:\d+-){2,}[A-Za-z0-9]*[A-Za-z][A-Za-z0-9]*\b' tier1_fragment = 'xox[aboprs]-' redact_prefix_len = 5 severity = "high" @@ -129,3 +129,4 @@ tier1_fragment = '-----BEGIN\s' # Full PEM block regex for redaction (header + base64 body + footer). # Detection only needs the header; redaction must scrub the entire block. redact_regex = '-----BEGIN\s[A-Z0-9 ]*PRIVATE KEY-----[\s\S]*?-----END\s[A-Z0-9 ]*PRIVATE KEY-----' +severity = "critical" diff --git a/crates/tirith-core/build.rs b/crates/tirith-core/build.rs index 91ba628..9481632 100644 --- a/crates/tirith-core/build.rs +++ b/crates/tirith-core/build.rs @@ -33,6 +33,8 @@ struct PrivKeyPattern { name: String, #[allow(dead_code)] regex: String, + #[allow(dead_code)] + severity: String, } fn main() { diff --git a/crates/tirith-core/src/redact.rs b/crates/tirith-core/src/redact.rs index eb9aaa6..09100c0 100644 --- a/crates/tirith-core/src/redact.rs +++ b/crates/tirith-core/src/redact.rs @@ -99,8 +99,8 @@ pub fn redact(input: &str) -> String { .regex .replace_all(&result, |caps: ®ex::Captures| { let matched = &caps[0]; - let prefix_len = entry.prefix_len.min(matched.len()); - format!("{}[REDACTED]", &matched[..prefix_len]) + let prefix: String = matched.chars().take(entry.prefix_len).collect(); + format!("{prefix}[REDACTED]") }) .into_owned(); } diff --git a/crates/tirith-core/src/rules/credential.rs b/crates/tirith-core/src/rules/credential.rs index 0f5f3b3..f50e58a 100644 --- a/crates/tirith-core/src/rules/credential.rs +++ b/crates/tirith-core/src/rules/credential.rs @@ -56,6 +56,8 @@ struct PrivateKeyDef { regex: String, #[allow(dead_code)] tier1_fragment: String, + #[allow(dead_code)] + severity: String, } // --------------------------------------------------------------------------- @@ -84,7 +86,11 @@ static KNOWN_PATTERNS: Lazy> = Lazy::new(|| { static PRIVATE_KEY_RE: Lazy = Lazy::new(|| { let toml_src = include_str!("../../assets/data/credential_patterns.toml"); let file: PatternFile = toml::from_str(toml_src).expect("credential_patterns.toml parse error"); - let pat = &file.private_key_pattern[0].regex; + let pat = &file + .private_key_pattern + .first() + .expect("credential_patterns.toml must contain at least one [[private_key_pattern]]") + .regex; Regex::new(pat).expect("bad private key regex") }); @@ -504,6 +510,53 @@ mod tests { } } + #[test] + fn test_a3t_variant_detected() { + let input = "A3T1IOSFODNN7EXAMPLE"; + let findings = check(input, ShellType::Posix, ScanContext::Paste); + assert!( + findings + .iter() + .any(|f| f.rule_id == RuleId::CredentialInText), + "A3T-prefixed AWS key variant should be detected" + ); + } + + #[test] + fn test_slack_token_detected() { + let input = concat!( + "xoxb-", + "123456789012-", + "123456789012-", + "AbCdEfGhIjKlMnOpQrStUvWx" + ); + let findings = check(input, ShellType::Posix, ScanContext::Exec); + assert!( + findings + .iter() + .any(|f| f.rule_id == RuleId::CredentialInText), + "valid Slack token should be detected" + ); + } + + #[test] + fn test_slack_token_does_not_match_word_suffix() { + let input = concat!( + "xoxb-", + "123456789012-", + "123456789012-", + "AbCdEfGhIjKlMnOpQrStUvWx", + "_suffix" + ); + let findings = check(input, ShellType::Posix, ScanContext::Exec); + assert!( + findings + .iter() + .all(|f| f.rule_id != RuleId::CredentialInText), + "Slack token regex should not match when a word suffix extends the token" + ); + } + #[test] fn test_private_key_detected() { let input = "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA0Z3VS5JJcds3xfn/yGaV...\n-----END RSA PRIVATE KEY-----"; diff --git a/tests/fixtures/credential.toml b/tests/fixtures/credential.toml index 37127f5..a39a64b 100644 --- a/tests/fixtures/credential.toml +++ b/tests/fixtures/credential.toml @@ -7,9 +7,9 @@ expected_action = "block" expected_rules = ["credential_in_text"] [[fixture]] -name = "aws_session_key_in_paste" +name = "aws_a3t_key_in_paste" min_milestone = 1 -input = "ASIAIOSFODNN7EXAMPLE" +input = "A3T1IOSFODNN7EXAMPLE" context = "paste" expected_action = "block" expected_rules = ["credential_in_text"]