diff --git a/src/feature_flags.rs b/src/feature_flags.rs index eabbaa332..7b9394371 100644 --- a/src/feature_flags.rs +++ b/src/feature_flags.rs @@ -58,6 +58,7 @@ define_feature_flags!( async_mode: async_mode, debug = false, release = false, git_hooks_enabled: git_hooks_enabled, debug = false, release = false, git_hooks_externally_managed: git_hooks_externally_managed, debug = false, release = false, + sharded_notes: sharded_notes, debug = false, release = false, // gated behind config ); impl FeatureFlags { @@ -127,6 +128,7 @@ mod tests { assert!(!flags.async_mode); assert!(!flags.git_hooks_enabled); assert!(!flags.git_hooks_externally_managed); + assert!(!flags.sharded_notes); } #[cfg(not(debug_assertions))] { @@ -136,6 +138,7 @@ mod tests { assert!(!flags.async_mode); assert!(!flags.git_hooks_enabled); assert!(!flags.git_hooks_externally_managed); + assert!(!flags.sharded_notes); } } @@ -245,6 +248,7 @@ mod tests { async_mode: true, git_hooks_enabled: false, git_hooks_externally_managed: false, + sharded_notes: false, }; let serialized = serde_json::to_string(&flags).unwrap(); @@ -254,6 +258,7 @@ mod tests { assert!(serialized.contains("async_mode")); assert!(serialized.contains("git_hooks_enabled")); assert!(serialized.contains("git_hooks_externally_managed")); + assert!(serialized.contains("sharded_notes")); } #[test] @@ -265,6 +270,7 @@ mod tests { async_mode: true, git_hooks_enabled: true, git_hooks_externally_managed: false, + sharded_notes: true, }; let cloned = flags.clone(); assert_eq!(cloned.rewrite_stash, flags.rewrite_stash); @@ -276,6 +282,7 @@ mod tests { cloned.git_hooks_externally_managed, flags.git_hooks_externally_managed ); + assert_eq!(cloned.sharded_notes, flags.sharded_notes); } #[test] diff --git a/src/git/refs.rs b/src/git/refs.rs index 12199ff95..060e62031 100644 --- a/src/git/refs.rs +++ b/src/git/refs.rs @@ -10,22 +10,83 @@ use std::collections::{HashMap, HashSet}; pub const AI_AUTHORSHIP_REFNAME: &str = "ai"; pub const AI_AUTHORSHIP_PUSH_REFSPEC: &str = "refs/notes/ai:refs/notes/ai"; +// Sharded notes: 256 independent refs keyed by first 2 hex chars of the annotated +// commit SHA. This dramatically reduces push contention on busy monorepos. +// Uses "ai-s" prefix (not "ai/" which conflicts with "ai" as a loose ref file). +pub const AI_SHARDED_NOTES_PREFIX: &str = "refs/notes/ai-s/"; + +/// Return the shard suffix (first 2 hex chars) for a given commit SHA. +/// Panics if commit_sha is shorter than 2 characters (should never happen with real SHAs). +pub fn shard_for_commit(commit_sha: &str) -> &str { + debug_assert!( + commit_sha.len() >= 2, + "shard_for_commit called with SHA shorter than 2 chars: {:?}", + commit_sha + ); + if commit_sha.len() < 2 { + return "00"; + } + &commit_sha[..2] +} + +/// Return the full sharded ref (e.g. "refs/notes/ai-s/ab") for a commit. +fn sharded_ref_for_commit(commit_sha: &str) -> String { + format!( + "{}{}", + AI_SHARDED_NOTES_PREFIX, + shard_for_commit(commit_sha) + ) +} + +/// Return the --ref= argument value for the shard of a commit (e.g. "ai-s/ab"). +fn sharded_refname_for_commit(commit_sha: &str) -> String { + format!("ai-s/{}", shard_for_commit(commit_sha)) +} + +fn sharded_notes_enabled() -> bool { + if crate::daemon::daemon_process_active() { + let config = crate::config::Config::fresh(); + config.get_feature_flags().sharded_notes + } else { + crate::config::Config::get() + .get_feature_flags() + .sharded_notes + } +} + pub fn notes_add( repo: &Repository, commit_sha: &str, note_content: &str, ) -> Result<(), GitAiError> { + // Always write to the legacy ref (backward compat for old clients) let mut args = repo.global_args_for_exec(); args.push("notes".to_string()); args.push("--ref=ai".to_string()); args.push("add".to_string()); - args.push("-f".to_string()); // Always force overwrite + args.push("-f".to_string()); args.push("-F".to_string()); - args.push("-".to_string()); // Read note content from stdin + args.push("-".to_string()); args.push(commit_sha.to_string()); - - // Use stdin to provide the note content to avoid command line length limits exec_git_stdin(&args, note_content.as_bytes())?; + + // Dual-write to shard ref when sharded notes are enabled + if sharded_notes_enabled() { + let shard_ref = sharded_refname_for_commit(commit_sha); + let mut args = repo.global_args_for_exec(); + args.push("notes".to_string()); + args.push(format!("--ref={}", shard_ref)); + args.push("add".to_string()); + args.push("-f".to_string()); + args.push("-F".to_string()); + args.push("-".to_string()); + args.push(commit_sha.to_string()); + // Best-effort: shard write failure should not block the operation + if let Err(e) = exec_git_stdin(&args, note_content.as_bytes()) { + debug_log(&format!("shard notes_add failed for {}: {}", shard_ref, e)); + } + } + crate::authorship::git_ai_hooks::post_notes_updated_single(repo, commit_sha, note_content); Ok(()) } @@ -46,6 +107,18 @@ fn fanout_note_pathspec_for_commit(commit_sha: &str) -> String { format!("refs/notes/ai:{}", notes_path_for_object(commit_sha)) } +fn sharded_flat_note_pathspec_for_commit(commit_sha: &str) -> String { + format!("{}:{}", sharded_ref_for_commit(commit_sha), commit_sha) +} + +fn sharded_fanout_note_pathspec_for_commit(commit_sha: &str) -> String { + format!( + "{}:{}", + sharded_ref_for_commit(commit_sha), + notes_path_for_object(commit_sha) + ) +} + fn parse_batch_check_blob_oid(line: &str) -> Option { let parts: Vec<&str> = line.split_whitespace().collect(); let oid = parts.first().copied().unwrap_or_default(); @@ -135,6 +208,7 @@ fn batch_read_blob_contents( /// Resolve authorship note blob OIDs for a set of commits using one batched cat-file call. /// /// Returns a map of commit SHA -> note blob SHA for commits that currently have notes. +/// When sharded notes are enabled, checks shard refs first and falls back to the legacy ref. pub fn note_blob_oids_for_commits( repo: &Repository, commit_shas: &[String], @@ -143,14 +217,22 @@ pub fn note_blob_oids_for_commits( return Ok(HashMap::new()); } + let sharded = sharded_notes_enabled(); + let mut args = repo.global_args_for_exec(); args.push("cat-file".to_string()); args.push("--batch-check".to_string()); + // When sharded: 4 lines per commit (shard flat, shard fanout, legacy flat, legacy fanout) + // When not sharded: 2 lines per commit (legacy flat, legacy fanout) let mut stdin_data = String::new(); for commit_sha in commit_shas { - // Notes can be stored with either flat paths () or fanout paths (/). - // Query both forms so this works regardless of repository note fanout state. + if sharded { + stdin_data.push_str(&sharded_flat_note_pathspec_for_commit(commit_sha)); + stdin_data.push('\n'); + stdin_data.push_str(&sharded_fanout_note_pathspec_for_commit(commit_sha)); + stdin_data.push('\n'); + } stdin_data.push_str(&flat_note_pathspec_for_commit(commit_sha)); stdin_data.push('\n'); stdin_data.push_str(&fanout_note_pathspec_for_commit(commit_sha)); @@ -163,6 +245,20 @@ pub fn note_blob_oids_for_commits( let mut result = HashMap::new(); for commit_sha in commit_shas { + if sharded { + let shard_flat = lines.next().unwrap_or_default(); + let shard_fanout = lines.next().unwrap_or_default(); + if let Some(oid) = parse_batch_check_blob_oid(shard_flat) + .or_else(|| parse_batch_check_blob_oid(shard_fanout)) + { + // Consume legacy lines but don't use them + let _ = lines.next(); + let _ = lines.next(); + result.insert(commit_sha.clone(), oid); + continue; + } + } + let Some(flat_line) = lines.next() else { break; }; @@ -178,23 +274,56 @@ pub fn note_blob_oids_for_commits( Ok(result) } -pub fn notes_add_batch(repo: &Repository, entries: &[(String, String)]) -> Result<(), GitAiError> { - if entries.is_empty() { - return Ok(()); - } - +/// Resolve the current tip of a notes ref, returning None if the ref doesn't exist. +fn resolve_notes_tip(repo: &Repository, notes_ref: &str) -> Result, GitAiError> { let mut args = repo.global_args_for_exec(); args.push("rev-parse".to_string()); args.push("--verify".to_string()); - args.push("refs/notes/ai".to_string()); - let existing_notes_tip = match exec_git(&args) { - Ok(output) => Some(String::from_utf8(output.stdout)?.trim().to_string()), + args.push(notes_ref.to_string()); + match exec_git(&args) { + Ok(output) => Ok(Some(String::from_utf8(output.stdout)?.trim().to_string())), Err(GitAiError::GitCliError { code: Some(128), .. }) - | Err(GitAiError::GitCliError { code: Some(1), .. }) => None, - Err(e) => return Err(e), - }; + | Err(GitAiError::GitCliError { code: Some(1), .. }) => Ok(None), + Err(e) => Err(e), + } +} + +/// Append a fast-import commit block for a notes ref to `script`. +/// `entries` are (mark_index, commit_sha) pairs — marks must already be defined as blobs. +fn append_notes_commit_block( + script: &mut Vec, + notes_ref: &str, + existing_tip: Option<&str>, + entries: &[(usize, &str)], + now: u64, +) { + script.extend_from_slice(format!("commit {}\n", notes_ref).as_bytes()); + script.extend_from_slice(format!("committer git-ai {} +0000\n", now).as_bytes()); + script.extend_from_slice(b"data 0\n"); + if let Some(tip) = existing_tip { + script.extend_from_slice(format!("from {}\n", tip).as_bytes()); + } + + for (mark_idx, commit_sha) in entries { + let fanout_path = notes_path_for_object(commit_sha); + let flat_path = *commit_sha; + if flat_path != fanout_path { + script.extend_from_slice(format!("D {}\n", flat_path).as_bytes()); + } + script.extend_from_slice(format!("D {}\n", fanout_path).as_bytes()); + script.extend_from_slice(format!("M 100644 :{} {}\n", mark_idx, fanout_path).as_bytes()); + } + script.extend_from_slice(b"\n"); +} + +pub fn notes_add_batch(repo: &Repository, entries: &[(String, String)]) -> Result<(), GitAiError> { + if entries.is_empty() { + return Ok(()); + } + + let existing_notes_tip = resolve_notes_tip(repo, "refs/notes/ai")?; let mut deduped_entries: Vec<(String, String)> = Vec::new(); let mut seen = HashSet::new(); @@ -210,8 +339,25 @@ pub fn notes_add_batch(repo: &Repository, entries: &[(String, String)]) -> Resul .map_err(|e| GitAiError::Generic(format!("System clock before epoch: {}", e)))? .as_secs(); + let sharded = sharded_notes_enabled(); + + // Resolve shard tips upfront if sharding is enabled + let shard_tips: HashMap> = if sharded { + let mut tips = HashMap::new(); + for (commit_sha, _) in &deduped_entries { + let shard_ref = sharded_ref_for_commit(commit_sha); + if !tips.contains_key(&shard_ref) { + tips.insert(shard_ref.clone(), resolve_notes_tip(repo, &shard_ref)?); + } + } + tips + } else { + HashMap::new() + }; + let mut script = Vec::::new(); + // Emit blob definitions with marks for (idx, (_commit_sha, note_content)) in deduped_entries.iter().enumerate() { script.extend_from_slice(b"blob\n"); script.extend_from_slice(format!("mark :{}\n", idx + 1).as_bytes()); @@ -220,23 +366,42 @@ pub fn notes_add_batch(repo: &Repository, entries: &[(String, String)]) -> Resul script.extend_from_slice(b"\n"); } - script.extend_from_slice(b"commit refs/notes/ai\n"); - script.extend_from_slice(format!("committer git-ai {} +0000\n", now).as_bytes()); - script.extend_from_slice(b"data 0\n"); - if let Some(existing_tip) = existing_notes_tip { - script.extend_from_slice(format!("from {}\n", existing_tip).as_bytes()); - } + // Legacy commit block (always written) + let legacy_entries: Vec<(usize, &str)> = deduped_entries + .iter() + .enumerate() + .map(|(idx, (sha, _))| (idx + 1, sha.as_str())) + .collect(); + append_notes_commit_block( + &mut script, + "refs/notes/ai", + existing_notes_tip.as_deref(), + &legacy_entries, + now, + ); + + // Shard commit blocks (one per unique shard, when enabled) + if sharded { + // Group entries by shard + let mut shard_entries: HashMap> = HashMap::new(); + for (idx, (commit_sha, _)) in deduped_entries.iter().enumerate() { + let shard_ref = sharded_ref_for_commit(commit_sha); + shard_entries + .entry(shard_ref) + .or_default() + .push((idx + 1, commit_sha.as_str())); + } - for (idx, (commit_sha, _note_content)) in deduped_entries.iter().enumerate() { - let fanout_path = notes_path_for_object(commit_sha); - let flat_path = commit_sha.clone(); - if flat_path != fanout_path { - script.extend_from_slice(format!("D {}\n", flat_path).as_bytes()); + // Sort shard keys for deterministic output + let mut shard_keys: Vec<&String> = shard_entries.keys().collect(); + shard_keys.sort(); + + for shard_ref in shard_keys { + let entries = &shard_entries[shard_ref]; + let tip = shard_tips.get(shard_ref).and_then(|t| t.as_deref()); + append_notes_commit_block(&mut script, shard_ref, tip, entries, now); } - script.extend_from_slice(format!("D {}\n", fanout_path).as_bytes()); - script.extend_from_slice(format!("M 100644 :{} {}\n", idx + 1, fanout_path).as_bytes()); } - script.extend_from_slice(b"\n"); let mut fast_import_args = repo.global_args_for_exec(); fast_import_args.push("fast-import".to_string()); @@ -247,6 +412,32 @@ pub fn notes_add_batch(repo: &Repository, entries: &[(String, String)]) -> Resul Ok(()) } +/// Append a fast-import commit block that references existing blob OIDs (no marks needed). +fn append_notes_blob_commit_block( + script: &mut Vec, + notes_ref: &str, + existing_tip: Option<&str>, + entries: &[(&str, &str)], // (commit_sha, blob_oid) + now: u64, +) { + script.extend_from_slice(format!("commit {}\n", notes_ref).as_bytes()); + script.extend_from_slice(format!("committer git-ai {} +0000\n", now).as_bytes()); + script.extend_from_slice(b"data 0\n"); + if let Some(tip) = existing_tip { + script.extend_from_slice(format!("from {}\n", tip).as_bytes()); + } + + for (commit_sha, blob_oid) in entries { + let fanout_path = notes_path_for_object(commit_sha); + if *commit_sha != fanout_path { + script.extend_from_slice(format!("D {}\n", commit_sha).as_bytes()); + } + script.extend_from_slice(format!("D {}\n", fanout_path).as_bytes()); + script.extend_from_slice(format!("M 100644 {} {}\n", blob_oid, fanout_path).as_bytes()); + } + script.extend_from_slice(b"\n"); +} + /// Batch-attach existing note blobs to commits without rewriting blob contents. /// /// Each entry is (commit_sha, existing_note_blob_oid). @@ -259,18 +450,7 @@ pub fn notes_add_blob_batch( return Ok(()); } - let mut args = repo.global_args_for_exec(); - args.push("rev-parse".to_string()); - args.push("--verify".to_string()); - args.push("refs/notes/ai".to_string()); - let existing_notes_tip = match exec_git(&args) { - Ok(output) => Some(String::from_utf8(output.stdout)?.trim().to_string()), - Err(GitAiError::GitCliError { - code: Some(128), .. - }) - | Err(GitAiError::GitCliError { code: Some(1), .. }) => None, - Err(e) => return Err(e), - }; + let existing_notes_tip = resolve_notes_tip(repo, "refs/notes/ai")?; let mut deduped_entries: Vec<(String, String)> = Vec::new(); let mut seen = HashSet::new(); @@ -286,24 +466,57 @@ pub fn notes_add_blob_batch( .map_err(|e| GitAiError::Generic(format!("System clock before epoch: {}", e)))? .as_secs(); + let sharded = sharded_notes_enabled(); + + // Resolve shard tips upfront + let shard_tips: HashMap> = if sharded { + let mut tips = HashMap::new(); + for (commit_sha, _) in &deduped_entries { + let shard_ref = sharded_ref_for_commit(commit_sha); + if !tips.contains_key(&shard_ref) { + tips.insert(shard_ref.clone(), resolve_notes_tip(repo, &shard_ref)?); + } + } + tips + } else { + HashMap::new() + }; + let mut script = Vec::::new(); - script.extend_from_slice(b"commit refs/notes/ai\n"); - script.extend_from_slice(format!("committer git-ai {} +0000\n", now).as_bytes()); - script.extend_from_slice(b"data 0\n"); - if let Some(existing_tip) = existing_notes_tip { - script.extend_from_slice(format!("from {}\n", existing_tip).as_bytes()); - } - for (commit_sha, blob_oid) in &deduped_entries { - let fanout_path = notes_path_for_object(commit_sha); - let flat_path = commit_sha.clone(); - if flat_path != fanout_path { - script.extend_from_slice(format!("D {}\n", flat_path).as_bytes()); + // Legacy commit block + let legacy_entries: Vec<(&str, &str)> = deduped_entries + .iter() + .map(|(sha, oid)| (sha.as_str(), oid.as_str())) + .collect(); + append_notes_blob_commit_block( + &mut script, + "refs/notes/ai", + existing_notes_tip.as_deref(), + &legacy_entries, + now, + ); + + // Shard commit blocks + if sharded { + let mut shard_entries: HashMap> = HashMap::new(); + for (commit_sha, blob_oid) in &deduped_entries { + let shard_ref = sharded_ref_for_commit(commit_sha); + shard_entries + .entry(shard_ref) + .or_default() + .push((commit_sha.as_str(), blob_oid.as_str())); + } + + let mut shard_keys: Vec<&String> = shard_entries.keys().collect(); + shard_keys.sort(); + + for shard_ref in shard_keys { + let entries = &shard_entries[shard_ref]; + let tip = shard_tips.get(shard_ref).and_then(|t| t.as_deref()); + append_notes_blob_commit_block(&mut script, shard_ref, tip, entries, now); } - script.extend_from_slice(format!("D {}\n", fanout_path).as_bytes()); - script.extend_from_slice(format!("M 100644 {} {}\n", blob_oid, fanout_path).as_bytes()); } - script.extend_from_slice(b"\n"); let mut fast_import_args = repo.global_args_for_exec(); fast_import_args.push("fast-import".to_string()); @@ -436,7 +649,28 @@ pub fn get_commits_with_notes_from_list( } // Show an authorship note and return its JSON content if found, or None if it doesn't exist. +// When sharded notes are enabled, checks the shard ref first, then falls back to legacy. pub fn show_authorship_note(repo: &Repository, commit_sha: &str) -> Option { + if sharded_notes_enabled() { + let shard_ref = sharded_refname_for_commit(commit_sha); + let mut args = repo.global_args_for_exec(); + args.push("notes".to_string()); + args.push(format!("--ref={}", shard_ref)); + args.push("show".to_string()); + args.push(commit_sha.to_string()); + + if let Ok(output) = exec_git(&args) { + let result = String::from_utf8(output.stdout) + .ok() + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()); + if result.is_some() { + return result; + } + } + } + + // Fall back to legacy ref let mut args = repo.global_args_for_exec(); args.push("notes".to_string()); args.push("--ref=ai".to_string()); @@ -737,7 +971,55 @@ pub fn grep_ai_notes(repo: &Repository, pattern: &str) -> Result, Gi #[cfg(test)] mod tests { use super::*; + use crate::feature_flags::FeatureFlags; use crate::git::test_utils::TmpRepo; + use serial_test::serial; + + struct TestFeatureFlagsGuard; + + impl TestFeatureFlagsGuard { + fn new() -> Self { + crate::config::Config::clear_test_feature_flags(); + Self + } + } + + impl Drop for TestFeatureFlagsGuard { + fn drop(&mut self) { + crate::config::Config::clear_test_feature_flags(); + } + } + + fn set_test_sharded_notes(enabled: bool) { + let flags = FeatureFlags { + sharded_notes: enabled, + ..FeatureFlags::default() + }; + crate::config::Config::set_test_feature_flags(flags); + } + + fn create_commit_without_authorship_note( + tmp_repo: &TmpRepo, + filename: &str, + contents: &str, + message: &str, + ) -> String { + tmp_repo + .write_file(filename, contents, true) + .expect("write file for manual commit"); + + let mut args = tmp_repo.gitai_repo().global_args_for_exec(); + args.extend_from_slice(&["add".to_string(), ".".to_string()]); + exec_git(&args).expect("stage files"); + + let mut args = tmp_repo.gitai_repo().global_args_for_exec(); + args.extend_from_slice(&["commit".to_string(), "-m".to_string(), message.to_string()]); + exec_git(&args).expect("create manual commit"); + + tmp_repo + .get_head_commit_sha() + .expect("get manual commit sha") + } #[test] fn test_parse_batch_check_blob_oid_accepts_sha1_and_sha256() { @@ -793,6 +1075,118 @@ mod tests { assert!(non_existent_content.is_none()); } + #[test] + #[serial] + fn test_notes_add_respects_sharded_notes_flag_for_writes() { + let _guard = TestFeatureFlagsGuard::new(); + let tmp_repo = TmpRepo::new().expect("Failed to create tmp repo"); + set_test_sharded_notes(false); + + tmp_repo + .commit_with_message("Initial commit") + .expect("Failed to create initial commit"); + let commit_a = tmp_repo + .get_head_commit_sha() + .expect("Failed to get first commit SHA"); + + set_test_sharded_notes(false); + notes_add( + tmp_repo.gitai_repo(), + &commit_a, + "{\"mode\":\"legacy-only\"}", + ) + .expect("add non-sharded note"); + + let shard_ref_a = format!("refs/notes/ai-s/{}", &commit_a[..2]); + assert!( + !ref_exists(tmp_repo.gitai_repo(), &shard_ref_a), + "default-off write should not create shard refs" + ); + + tmp_repo + .write_file("second.txt", "second\n", true) + .expect("write second file"); + tmp_repo + .commit_with_message("Second commit") + .expect("create second commit"); + let commit_b = tmp_repo + .get_head_commit_sha() + .expect("Failed to get second commit SHA"); + + set_test_sharded_notes(true); + notes_add(tmp_repo.gitai_repo(), &commit_b, "{\"mode\":\"sharded\"}") + .expect("add sharded note"); + + let shard_ref_b = format!("refs/notes/ai-s/{}", &commit_b[..2]); + assert!( + ref_exists(tmp_repo.gitai_repo(), &shard_ref_b), + "enabling the flag should create the shard ref" + ); + } + + #[test] + #[serial] + fn test_shard_reads_are_ignored_when_flag_is_disabled() { + let _guard = TestFeatureFlagsGuard::new(); + let tmp_repo = TmpRepo::new().expect("Failed to create tmp repo"); + set_test_sharded_notes(false); + let commit_sha = create_commit_without_authorship_note( + &tmp_repo, + "shard-only.txt", + "shard only\n", + "Manual commit without authorship note", + ); + let shard_ref = format!("ai-s/{}", &commit_sha[..2]); + + let mut args = tmp_repo.gitai_repo().global_args_for_exec(); + args.extend_from_slice(&[ + "notes".to_string(), + format!("--ref={}", shard_ref), + "add".to_string(), + "-f".to_string(), + "-m".to_string(), + "{\"note\":\"shard-only\"}".to_string(), + commit_sha.clone(), + ]); + exec_git(&args).expect("add shard-only note"); + + set_test_sharded_notes(false); + assert!( + show_authorship_note(tmp_repo.gitai_repo(), &commit_sha).is_none(), + "default-off reads should ignore shard-only notes" + ); + assert!( + note_blob_oids_for_commits(tmp_repo.gitai_repo(), std::slice::from_ref(&commit_sha)) + .expect("resolve note blob oids") + .is_empty(), + "default-off blob lookup should ignore shard-only notes" + ); + assert!( + commits_with_authorship_notes(tmp_repo.gitai_repo(), std::slice::from_ref(&commit_sha)) + .expect("resolve commit note presence") + .is_empty(), + "default-off note existence checks should ignore shard-only notes" + ); + + set_test_sharded_notes(true); + assert_eq!( + show_authorship_note(tmp_repo.gitai_repo(), &commit_sha).as_deref(), + Some("{\"note\":\"shard-only\"}") + ); + assert!( + note_blob_oids_for_commits(tmp_repo.gitai_repo(), std::slice::from_ref(&commit_sha)) + .expect("resolve note blob oids with sharding") + .contains_key(&commit_sha), + "enabled shard reads should discover shard-only notes" + ); + assert!( + commits_with_authorship_notes(tmp_repo.gitai_repo(), std::slice::from_ref(&commit_sha)) + .expect("resolve commit note presence with sharding") + .contains(&commit_sha), + "enabled note existence checks should discover shard-only notes" + ); + } + #[test] fn test_notes_add_batch_writes_multiple_notes() { let tmp_repo = TmpRepo::new().expect("Failed to create tmp repo"); diff --git a/src/git/sync_authorship.rs b/src/git/sync_authorship.rs index c8258ee27..27d837a4d 100644 --- a/src/git/sync_authorship.rs +++ b/src/git/sync_authorship.rs @@ -1,10 +1,13 @@ use crate::git::refs::{ - AI_AUTHORSHIP_PUSH_REFSPEC, copy_ref, fallback_merge_notes_ours, merge_notes_from_ref, - ref_exists, tracking_ref_for_remote, + AI_AUTHORSHIP_PUSH_REFSPEC, AI_SHARDED_NOTES_PREFIX, copy_ref, fallback_merge_notes_ours, + merge_notes_from_ref, ref_exists, tracking_ref_for_remote, }; use crate::{ error::GitAiError, - git::{cli_parser::ParsedGitInvocation, repository::exec_git}, + git::{ + cli_parser::ParsedGitInvocation, + repository::{exec_git, exec_git_stdin}, + }, utils::debug_log, }; @@ -20,6 +23,224 @@ fn disabled_hooks_config() -> &'static str { "core.hooksPath=/dev/null" } +fn sharded_notes_enabled() -> bool { + if crate::daemon::daemon_process_active() { + let config = crate::config::Config::fresh(); + config.get_feature_flags().sharded_notes + } else { + crate::config::Config::get() + .get_feature_flags() + .sharded_notes + } +} + +/// Tracking ref prefix for sharded notes from a specific remote. +/// e.g. "refs/notes/ai-s-remote/origin/ab" +fn shard_tracking_ref_prefix(remote_name: &str) -> String { + format!( + "refs/notes/ai-s-remote/{}/", + sanitize_remote_name_for_ref(remote_name) + ) +} + +/// Sanitize a remote name for use in a ref path (same logic as refs.rs). +fn sanitize_remote_name_for_ref(remote: &str) -> String { + remote + .chars() + .map(|c| { + if c.is_alphanumeric() || c == '-' || c == '_' { + c + } else { + '_' + } + }) + .collect() +} + +/// Resolved shard ref pair: tracking ref name, local ref name, and their resolved OIDs. +struct ShardRefPair { + tracking_ref: String, + tracking_oid: String, + local_shard_ref: String, + local_oid: Option, // None means local doesn't exist yet +} + +/// Discover shard tracking refs and resolve both tracking and local OIDs in a single +/// `for-each-ref` + `cat-file --batch-check` round-trip pair (2 git processes total, +/// regardless of shard count). +fn resolve_shard_ref_pairs( + repository: &Repository, + remote_name: &str, +) -> Result, GitAiError> { + let prefix = shard_tracking_ref_prefix(remote_name); + + // 1. List tracking refs with their OIDs in one call + let mut args = repository.global_args_for_exec(); + args.push("for-each-ref".to_string()); + args.push("--format=%(objectname) %(refname)".to_string()); + args.push(prefix.clone()); + + let output = match exec_git(&args) { + Ok(o) => o, + Err(_) => return Ok(Vec::new()), + }; + let stdout = String::from_utf8(output.stdout) + .map_err(|_| GitAiError::Generic("bad utf8".to_string()))?; + + // Each entry: (tracking_oid, tracking_refname, local_shard_ref) + let tracking_refs: Vec<(String, String, String)> = stdout + .lines() + .filter(|l| !l.is_empty()) + .filter_map(|line| { + let (oid, refname) = line.split_once(' ')?; + let shard = refname.strip_prefix(&prefix)?; + let local_ref = format!("{}{}", AI_SHARDED_NOTES_PREFIX, shard); + Some((oid.to_string(), refname.to_string(), local_ref)) + }) + .collect(); + + if tracking_refs.is_empty() { + return Ok(Vec::new()); + } + + // 2. Batch-resolve all local shard ref OIDs in one cat-file call + let mut batch_args = repository.global_args_for_exec(); + batch_args.push("cat-file".to_string()); + batch_args.push("--batch-check".to_string()); + + let stdin_data: String = tracking_refs + .iter() + .map(|(_, _, local_ref)| local_ref.as_str()) + .collect::>() + .join("\n") + + "\n"; + + let batch_output = exec_git_stdin(&batch_args, stdin_data.as_bytes())?; + let batch_stdout = String::from_utf8(batch_output.stdout) + .map_err(|_| GitAiError::Generic("bad utf8".to_string()))?; + + let pairs: Vec = tracking_refs + .into_iter() + .zip(batch_stdout.lines()) + .map( + |((tracking_oid, tracking_ref, local_shard_ref), batch_line)| { + // cat-file --batch-check output: " commit " or " missing" + let local_oid = if batch_line.contains("missing") { + None + } else { + batch_line.split_whitespace().next().map(|s| s.to_string()) + }; + ShardRefPair { + tracking_ref, + tracking_oid, + local_shard_ref, + local_oid, + } + }, + ) + .collect(); + + Ok(pairs) +} + +/// Merge all shard tracking refs into their corresponding local shard refs. +/// +/// Optimized to minimize git process invocations: +/// - 2 processes to discover and resolve all shard pairs (for-each-ref + cat-file) +/// - Skips shards where tracking == local (no change) — zero cost +/// - Batches all "copy" operations (local doesn't exist) into one update-ref --stdin call +/// - Only runs `git notes merge` for genuinely diverged shards (typically 1-2 per fetch) +fn merge_shard_tracking_refs(repository: &Repository, remote_name: &str) { + let pairs = match resolve_shard_ref_pairs(repository, remote_name) { + Ok(p) => p, + Err(e) => { + debug_log(&format!("failed to resolve shard ref pairs: {}", e)); + return; + } + }; + + if pairs.is_empty() { + return; + } + + // Partition into: copies (local doesn't exist) and merges (both exist, different OIDs) + let mut copies: Vec<(&str, &str)> = Vec::new(); // (local_ref, tracking_oid) + let mut merges: Vec<(&str, &str)> = Vec::new(); // (local_ref, tracking_ref) + + for pair in &pairs { + match &pair.local_oid { + None => { + // Local doesn't exist — copy tracking OID + copies.push((&pair.local_shard_ref, &pair.tracking_oid)); + } + Some(local_oid) if local_oid == &pair.tracking_oid => { + // Already in sync — skip + } + Some(_) => { + // Both exist, different — need merge + merges.push((&pair.local_shard_ref, &pair.tracking_ref)); + } + } + } + + debug_log(&format!( + "shard merge: {} unchanged, {} copies, {} merges", + pairs.len() - copies.len() - merges.len(), + copies.len(), + merges.len(), + )); + + // Batch all copies into one update-ref --stdin call + if !copies.is_empty() { + let mut args = repository.global_args_for_exec(); + args.push("update-ref".to_string()); + args.push("--stdin".to_string()); + + let mut stdin_data = String::new(); + for (local_ref, tracking_oid) in &copies { + // "create \n" — sets ref to oid, fails if ref already exists + // (safe here because we confirmed local doesn't exist) + stdin_data.push_str(&format!("create {} {}\n", local_ref, tracking_oid)); + } + + if let Err(e) = exec_git_stdin(&args, stdin_data.as_bytes()) { + debug_log(&format!("batch shard copy failed: {}", e)); + // Fall back to individual copies + for (local_ref, _tracking_oid) in &copies { + if let Some(pair) = pairs.iter().find(|p| p.local_shard_ref == **local_ref) + && let Err(e) = copy_ref(repository, &pair.tracking_ref, local_ref) + { + debug_log(&format!( + "shard copy failed for {} <- {}: {}", + local_ref, pair.tracking_ref, e + )); + } + } + } + } + + // Merges: these are genuinely diverged shards — run notes merge per shard. + // Typically only 1-2 shards per fetch, so this is acceptable. + for (local_ref, tracking_ref) in &merges { + let shard_name = local_ref.strip_prefix("refs/notes/").unwrap_or(local_ref); + let mut args = repository.global_args_for_exec(); + args.push("notes".to_string()); + args.push(format!("--ref={}", shard_name)); + args.push("merge".to_string()); + args.push("-s".to_string()); + args.push("ours".to_string()); + args.push("--quiet".to_string()); + args.push(tracking_ref.to_string()); + + if let Err(e) = exec_git(&args) { + debug_log(&format!( + "shard merge failed for {} <- {}: {}", + local_ref, tracking_ref, e + )); + } + } +} + /// Result of checking for authorship notes on a remote #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum NotesExistence { @@ -76,29 +297,25 @@ pub fn fetch_authorship_notes( repository: &Repository, remote_name: &str, ) -> Result { - // Generate tracking ref for this remote let tracking_ref = tracking_ref_for_remote(remote_name); + let sharded = sharded_notes_enabled(); debug_log(&format!( "fetching authorship notes for remote '{}' to tracking ref '{}'", remote_name, tracking_ref )); - // Fetch notes to tracking ref with explicit refspec. - // If the remote does not have refs/notes/ai yet, treat that as NotFound. + // Fetch legacy ref let fetch_refspec = format!("+refs/notes/ai:{}", tracking_ref); - - // Build the internal authorship fetch with explicit flags and disabled hooks. - // IMPORTANT: use repository.global_args_for_exec() to ensure -C flag is present for bare repos. let fetch_authorship = build_authorship_fetch_args( repository.global_args_for_exec(), remote_name, - &fetch_refspec, + &[fetch_refspec.as_str()], ); debug_log(&format!("fetch command: {:?}", fetch_authorship)); - match exec_git(&fetch_authorship) { + let legacy_found = match exec_git(&fetch_authorship) { Ok(output) => { debug_log(&format!( "fetch stdout: '{}'", @@ -108,26 +325,55 @@ pub fn fetch_authorship_notes( "fetch stderr: '{}'", String::from_utf8_lossy(&output.stderr) )); + true } Err(e) => { if is_missing_remote_notes_ref_error(&e) { debug_log(&format!( - "no authorship notes found on remote '{}', nothing to sync", + "no legacy authorship notes found on remote '{}'", remote_name )); - return Ok(NotesExistence::NotFound); + false + } else { + debug_log(&format!("authorship fetch failed: {}", e)); + return Err(e); + } + } + }; + + // Fetch shard refs separately so a missing legacy ref doesn't block shard fetching + let mut shards_found = false; + if sharded { + let shard_prefix = shard_tracking_ref_prefix(remote_name); + let shard_refspec = format!("+{}*:{}*", AI_SHARDED_NOTES_PREFIX, shard_prefix); + let shard_fetch = build_authorship_fetch_args( + repository.global_args_for_exec(), + remote_name, + &[shard_refspec.as_str()], + ); + + debug_log(&format!("shard fetch command: {:?}", shard_fetch)); + + match exec_git(&shard_fetch) { + Ok(_) => { + shards_found = true; + } + Err(e) => { + // Wildcard refspecs that match nothing don't error, so this is a real failure + debug_log(&format!("shard fetch failed: {}", e)); } - debug_log(&format!("authorship fetch failed: {}", e)); - return Err(e); } } - // After successful fetch, merge the tracking ref into refs/notes/ai + if !legacy_found && !shards_found { + return Ok(NotesExistence::NotFound); + } + + // Merge the legacy tracking ref into refs/notes/ai let local_notes_ref = "refs/notes/ai"; if crate::git::refs::ref_exists(repository, &tracking_ref) { if crate::git::refs::ref_exists(repository, local_notes_ref) { - // Both exist - merge them debug_log(&format!( "merging authorship notes from {} into {}", tracking_ref, local_notes_ref @@ -140,14 +386,12 @@ pub fn fetch_authorship_notes( } } } else { - // Only tracking ref exists - copy it to local debug_log(&format!( "initializing {} from tracking ref {}", local_notes_ref, tracking_ref )); if let Err(e) = copy_ref(repository, &tracking_ref, local_notes_ref) { debug_log(&format!("notes copy failed: {}", e)); - // Don't fail on copy errors, just log and continue } } } else { @@ -157,6 +401,11 @@ pub fn fetch_authorship_notes( )); } + // Merge shard tracking refs + if sharded { + merge_shard_tracking_refs(repository, remote_name); + } + Ok(NotesExistence::Found) } @@ -193,7 +442,19 @@ pub fn push_authorship_notes(repository: &Repository, remote_name: &str) -> Resu fetch_and_merge_tracking_notes(repository, remote_name); // Push notes without force (requires fast-forward) - let push_args = build_authorship_push_args(repository.global_args_for_exec(), remote_name); + let sharded = sharded_notes_enabled(); + let shard_push_refspec = + format!("{}*:{}*", AI_SHARDED_NOTES_PREFIX, AI_SHARDED_NOTES_PREFIX); + let mut push_refspecs: Vec<&str> = vec![AI_AUTHORSHIP_PUSH_REFSPEC]; + if sharded { + push_refspecs.push(&shard_push_refspec); + } + + let push_args = build_authorship_push_args( + repository.global_args_for_exec(), + remote_name, + &push_refspecs, + ); debug_log(&format!( "pushing authorship refs (no force): {:?}", @@ -220,53 +481,63 @@ pub fn push_authorship_notes(repository: &Repository, remote_name: &str) -> Resu } /// Fetch remote notes into a tracking ref and merge into local refs/notes/ai. +/// Fetches legacy and shard refs separately so a missing ref doesn't block the other. fn fetch_and_merge_tracking_notes(repository: &Repository, remote_name: &str) { + let sharded = sharded_notes_enabled(); let tracking_ref = tracking_ref_for_remote(remote_name); - let fetch_refspec = format!("+refs/notes/ai:{}", tracking_ref); - let fetch_args = build_authorship_fetch_args( + // Fetch legacy ref (best-effort) + let fetch_refspec = format!("+refs/notes/ai:{}", tracking_ref); + let legacy_fetch = build_authorship_fetch_args( repository.global_args_for_exec(), remote_name, - &fetch_refspec, + &[fetch_refspec.as_str()], ); - debug_log(&format!("pre-push authorship fetch: {:?}", &fetch_args)); - - // Fetch is best-effort; if it fails (e.g., no remote notes yet), continue - if exec_git(&fetch_args).is_err() { - return; - } - - let local_notes_ref = "refs/notes/ai"; + debug_log(&format!("pre-push authorship fetch: {:?}", &legacy_fetch)); - if !ref_exists(repository, &tracking_ref) { - return; - } + if exec_git(&legacy_fetch).is_ok() { + let local_notes_ref = "refs/notes/ai"; - if !ref_exists(repository, local_notes_ref) { - // Only tracking ref exists - copy it to local - debug_log(&format!( - "pre-push: initializing {} from {}", - local_notes_ref, tracking_ref - )); - if let Err(e) = copy_ref(repository, &tracking_ref, local_notes_ref) { - debug_log(&format!("pre-push notes copy failed: {}", e)); + if ref_exists(repository, &tracking_ref) { + if ref_exists(repository, local_notes_ref) { + debug_log(&format!( + "pre-push: merging {} into {}", + tracking_ref, local_notes_ref + )); + if let Err(e) = merge_notes_from_ref(repository, &tracking_ref) { + debug_log(&format!("pre-push notes merge failed: {}", e)); + // Fallback: manually merge notes when git notes merge crashes + if let Err(e2) = fallback_merge_notes_ours(repository, &tracking_ref) { + debug_log(&format!("pre-push fallback merge also failed: {}", e2)); + } + } + } else { + debug_log(&format!( + "pre-push: initializing {} from {}", + local_notes_ref, tracking_ref + )); + if let Err(e) = copy_ref(repository, &tracking_ref, local_notes_ref) { + debug_log(&format!("pre-push notes copy failed: {}", e)); + } + } } - return; } - // Both exist - merge them - debug_log(&format!( - "pre-push: merging {} into {}", - tracking_ref, local_notes_ref - )); - if let Err(e) = merge_notes_from_ref(repository, &tracking_ref) { - debug_log(&format!("pre-push notes merge failed: {}", e)); - // Fallback: manually merge notes when git notes merge crashes - // (e.g., due to corrupted/mixed-fanout notes trees, or git bugs - // with fanout-level mismatches on older git versions like macOS) - if let Err(e2) = fallback_merge_notes_ours(repository, &tracking_ref) { - debug_log(&format!("pre-push fallback merge also failed: {}", e2)); + // Fetch shard refs separately (best-effort) + if sharded { + let shard_prefix = shard_tracking_ref_prefix(remote_name); + let shard_fetch_refspec = format!("+{}*:{}*", AI_SHARDED_NOTES_PREFIX, shard_prefix); + let shard_fetch = build_authorship_fetch_args( + repository.global_args_for_exec(), + remote_name, + &[shard_fetch_refspec.as_str()], + ); + + debug_log(&format!("pre-push shard fetch: {:?}", &shard_fetch)); + + if exec_git(&shard_fetch).is_ok() { + merge_shard_tracking_refs(repository, remote_name); } } } @@ -338,7 +609,7 @@ fn with_disabled_hooks(mut args: Vec) -> Vec { fn build_authorship_fetch_args( global_args: Vec, remote_name: &str, - fetch_refspec: &str, + fetch_refspecs: &[&str], ) -> Vec { let mut args = with_disabled_hooks(global_args); args.push("fetch".to_string()); @@ -348,11 +619,17 @@ fn build_authorship_fetch_args( args.push("--no-write-commit-graph".to_string()); args.push("--no-auto-maintenance".to_string()); args.push(remote_name.to_string()); - args.push(fetch_refspec.to_string()); + for refspec in fetch_refspecs { + args.push(refspec.to_string()); + } args } -fn build_authorship_push_args(global_args: Vec, remote_name: &str) -> Vec { +fn build_authorship_push_args( + global_args: Vec, + remote_name: &str, + push_refspecs: &[&str], +) -> Vec { let mut args = with_disabled_hooks(global_args); args.push("push".to_string()); args.push("--quiet".to_string()); @@ -360,7 +637,9 @@ fn build_authorship_push_args(global_args: Vec, remote_name: &str) -> Ve args.push("--no-verify".to_string()); args.push("--no-signed".to_string()); args.push(remote_name.to_string()); - args.push(AI_AUTHORSHIP_PUSH_REFSPEC.to_string()); + for refspec in push_refspecs { + args.push(refspec.to_string()); + } args } @@ -374,7 +653,7 @@ mod tests { let args = build_authorship_fetch_args( vec!["-C".to_string(), "/tmp/repo".to_string()], "origin", - "+refs/notes/ai:refs/notes/ai-remote/origin", + &["+refs/notes/ai:refs/notes/ai-remote/origin"], ); assert!( @@ -387,8 +666,11 @@ mod tests { #[test] fn authorship_push_args_always_disable_hooks() { let disabled_hooks = disabled_hooks_config(); - let args = - build_authorship_push_args(vec!["-C".to_string(), "/tmp/repo".to_string()], "origin"); + let args = build_authorship_push_args( + vec!["-C".to_string(), "/tmp/repo".to_string()], + "origin", + &[AI_AUTHORSHIP_PUSH_REFSPEC], + ); assert!( args.windows(2) diff --git a/tests/integration/internal_machine_commands.rs b/tests/integration/internal_machine_commands.rs index 91b621371..dc7983759 100644 --- a/tests/integration/internal_machine_commands.rs +++ b/tests/integration/internal_machine_commands.rs @@ -2,8 +2,40 @@ use crate::repos::test_repo::{TestRepo, real_git_executable}; use serde_json::json; use std::fs; use std::io::Write; +use std::path::Path; use std::process::{Command, Stdio}; +fn set_sharded_notes(repo: &mut TestRepo, enabled: bool) { + repo.patch_git_ai_config(|patch| { + let mut flags = match patch.feature_flags.take() { + Some(existing) if existing.is_object() => existing, + _ => json!({}), + }; + let flags_obj = flags + .as_object_mut() + .expect("feature_flags value should be an object"); + flags_obj.insert("sharded_notes".to_string(), json!(enabled)); + patch.feature_flags = Some(flags); + }); +} + +fn enable_sharded_notes(repo: &mut TestRepo) { + set_sharded_notes(repo, true); +} + +fn disable_sharded_notes(repo: &mut TestRepo) { + set_sharded_notes(repo, false); +} + +fn git_ref_exists(repo_path: &Path, ref_name: &str) -> bool { + Command::new("git") + .args(["-C", repo_path.to_str().unwrap()]) + .args(["show-ref", "--verify", "--quiet", ref_name]) + .status() + .expect("git should run") + .success() +} + #[test] fn test_effective_ignore_patterns_internal_command_json() { let repo = TestRepo::new(); @@ -408,3 +440,309 @@ fn test_push_authorship_notes_retries_on_concurrent_push() { "upstream should have note from concurrent pusher" ); } + +/// With sharded_notes enabled, push-authorship-notes writes both legacy refs/notes/ai +/// AND shard refs (refs/notes/ai-s/XX) to the upstream. +#[test] +fn test_sharded_notes_dual_write_on_push() { + let (mut mirror, upstream) = TestRepo::new_with_remote(); + enable_sharded_notes(&mut mirror); + + fs::write(mirror.path().join("shard_test.txt"), "sharded notes test\n") + .expect("should write file"); + let commit = mirror + .stage_all_and_commit("add shard test file") + .expect("commit should succeed"); + + let request = json!({ "remote_name": "origin" }).to_string(); + + let push_output = mirror + .git_ai(&["push-authorship-notes", "--json", &request]) + .expect("push should succeed with sharded notes"); + let push_json: serde_json::Value = + serde_json::from_str(push_output.trim()).expect("valid JSON"); + assert_eq!(push_json["ok"], true); + + // Verify legacy ref exists on upstream + let legacy_note = git_plumbing( + upstream.path(), + &["notes", "--ref=ai", "show", &commit.commit_sha], + None, + ); + assert!( + !legacy_note.trim().is_empty(), + "legacy notes ref should have the note on upstream" + ); + + // Verify shard ref exists on upstream + let shard_suffix = &commit.commit_sha[..2]; + let shard_ref = format!("ai-s/{}", shard_suffix); + let shard_note = git_plumbing( + upstream.path(), + &[ + "notes", + &format!("--ref={}", shard_ref), + "show", + &commit.commit_sha, + ], + None, + ); + assert!( + !shard_note.trim().is_empty(), + "shard ref {} should have the note on upstream", + shard_ref + ); + + // Both should have identical content + assert_eq!(legacy_note.trim(), shard_note.trim()); +} + +/// A non-sharded client writes notes. A sharded client can read them via union-read +/// (falls back to legacy ref when shard ref doesn't exist). +#[test] +fn test_sharded_notes_union_read_legacy_fallback() { + let (mut mirror, _upstream) = TestRepo::new_with_remote(); + + fs::write(mirror.path().join("legacy.txt"), "legacy only write\n").expect("should write file"); + let commit = mirror + .stage_all_and_commit("legacy commit") + .expect("commit should succeed"); + + // Push WITHOUT sharded notes (legacy-only write) + let request = json!({ "remote_name": "origin" }).to_string(); + mirror + .git_ai(&["push-authorship-notes", "--json", &request]) + .expect("legacy push should succeed"); + + // Verify legacy note exists locally + let legacy_note = git_plumbing( + mirror.path(), + &["notes", "--ref=ai", "show", &commit.commit_sha], + None, + ); + assert!(!legacy_note.trim().is_empty(), "legacy note should exist"); + + // Verify NO shard ref exists + let shard_suffix = &commit.commit_sha[..2]; + let shard_ref_full = format!("refs/notes/ai-s/{}", shard_suffix); + assert!( + !git_ref_exists(mirror.path(), &shard_ref_full), + "shard ref should NOT exist after legacy-only write" + ); + + // Now do a blame-analysis with sharded notes enabled — it should still find the note + // via legacy fallback + let blame_request = json!({ + "file_path": "legacy.txt", + "options": { + "line_ranges": [[1, 1]], + "return_human_authors_as_human": true, + "split_hunks_by_ai_author": false + } + }) + .to_string(); + + enable_sharded_notes(&mut mirror); + let blame_output = mirror + .git_ai(&["blame-analysis", "--json", &blame_request]) + .expect("blame with sharded notes should succeed"); + let blame_json: serde_json::Value = + serde_json::from_str(blame_output.trim()).expect("valid blame JSON"); + + // The blame should find the note (via legacy fallback) and report line authors + let line_authors = blame_json["line_authors"] + .as_object() + .expect("line_authors should be present"); + assert!( + !line_authors.is_empty(), + "blame should find authorship via legacy fallback" + ); +} + +/// Mixed-team scenario: one clone pushes with sharding, another without. +/// Both should be able to read each other's notes after fetch. +#[test] +fn test_sharded_notes_mixed_team_interop() { + let (mut mirror, upstream) = TestRepo::new_with_remote(); + enable_sharded_notes(&mut mirror); + + // Create a second clone of the same upstream + let base = std::env::temp_dir(); + let clone2_path = base.join(format!("sharded-clone2-{}", std::process::id())); + let _ = fs::remove_dir_all(&clone2_path); + let clone_output = Command::new("git") + .args([ + "clone", + upstream.path().to_str().unwrap(), + clone2_path.to_str().unwrap(), + ]) + .output() + .expect("clone should succeed"); + assert!(clone_output.status.success(), "clone2 should succeed"); + + // Mirror (sharded client) creates a commit and pushes notes + fs::write( + mirror.path().join("from_sharded.txt"), + "from sharded client\n", + ) + .expect("write file"); + let sharded_commit = mirror + .stage_all_and_commit("sharded client commit") + .expect("commit should succeed"); + mirror + .git_og(&["push", "origin", "HEAD"]) + .expect("push code should succeed"); + + let request = json!({ "remote_name": "origin" }).to_string(); + mirror + .git_ai(&["push-authorship-notes", "--json", &request]) + .expect("sharded push should succeed"); + + // Clone2 (non-sharded client) fetches and reads the note via legacy ref + let _ = Command::new("git") + .args([ + "-C", + clone2_path.to_str().unwrap(), + "pull", + "origin", + "main", + ]) + .output(); + let _ = git_plumbing( + &clone2_path, + &["fetch", "origin", "+refs/notes/ai:refs/notes/ai"], + None, + ); + let note_content = git_plumbing( + &clone2_path, + &["notes", "--ref=ai", "show", &sharded_commit.commit_sha], + None, + ); + assert!( + !note_content.trim().is_empty(), + "non-sharded client should read notes via legacy ref" + ); + + // Clean up + let _ = fs::remove_dir_all(&clone2_path); +} + +#[test] +fn test_non_sharded_push_does_not_push_existing_shard_refs() { + let (mut mirror, upstream) = TestRepo::new_with_remote(); + enable_sharded_notes(&mut mirror); + + fs::write( + mirror.path().join("local-shard-only.txt"), + "local shard should stay local when flag is off\n", + ) + .expect("should write file"); + let commit = mirror + .stage_all_and_commit("create local shard ref") + .expect("commit should succeed"); + mirror + .git_og(&["push", "origin", "HEAD"]) + .expect("push code should succeed"); + + let shard_ref_full = format!("refs/notes/ai-s/{}", &commit.commit_sha[..2]); + assert!( + git_ref_exists(mirror.path(), &shard_ref_full), + "local shard ref should exist before disabling the flag" + ); + assert!( + !git_ref_exists(upstream.path(), &shard_ref_full), + "upstream should not have the shard ref before notes are pushed" + ); + + disable_sharded_notes(&mut mirror); + + let request = json!({ "remote_name": "origin" }).to_string(); + let push_output = mirror + .git_ai(&["push-authorship-notes", "--json", &request]) + .expect("legacy-only push should succeed"); + let push_json: serde_json::Value = + serde_json::from_str(push_output.trim()).expect("valid JSON"); + assert_eq!(push_json["ok"], true); + + let legacy_note = git_plumbing( + upstream.path(), + &["notes", "--ref=ai", "show", &commit.commit_sha], + None, + ); + assert!( + !legacy_note.trim().is_empty(), + "legacy note should still be pushed with the flag disabled" + ); + assert!( + !git_ref_exists(upstream.path(), &shard_ref_full), + "default-off push should not propagate shard refs that already exist locally" + ); +} + +#[test] +fn test_non_sharded_fetch_does_not_fetch_shards_from_upstream() { + let (mut mirror, upstream) = TestRepo::new_with_remote(); + enable_sharded_notes(&mut mirror); + + fs::write( + mirror.path().join("remote-shard.txt"), + "remote shard should stay remote when flag is off\n", + ) + .expect("should write file"); + let commit = mirror + .stage_all_and_commit("create upstream shard ref") + .expect("commit should succeed"); + mirror + .git_og(&["push", "origin", "HEAD"]) + .expect("push code should succeed"); + + let request = json!({ "remote_name": "origin" }).to_string(); + mirror + .git_ai(&["push-authorship-notes", "--json", &request]) + .expect("sharded push should succeed"); + + let shard_ref_full = format!("refs/notes/ai-s/{}", &commit.commit_sha[..2]); + assert!( + git_ref_exists(upstream.path(), &shard_ref_full), + "upstream should have the shard ref before the non-sharded fetch" + ); + + let base = std::env::temp_dir(); + let clone2_path = base.join(format!("non-sharded-fetch-clone-{}", std::process::id())); + let _ = fs::remove_dir_all(&clone2_path); + + let clone_output = Command::new("git") + .args([ + "clone", + upstream.path().to_str().unwrap(), + clone2_path.to_str().unwrap(), + ]) + .output() + .expect("clone should succeed"); + assert!(clone_output.status.success(), "clone2 should succeed"); + + let clone2 = TestRepo::new_at_path_with_mode(&clone2_path, mirror.mode()); + let fetch_output = clone2 + .git_ai(&["fetch-authorship-notes", "--json", &request]) + .expect("legacy-only fetch should succeed"); + let fetch_json: serde_json::Value = + serde_json::from_str(fetch_output.trim()).expect("valid JSON"); + assert_eq!(fetch_json["notes_existence"], "found"); + + let legacy_note = git_plumbing( + clone2.path(), + &["notes", "--ref=ai", "show", &commit.commit_sha], + None, + ); + assert!( + !legacy_note.trim().is_empty(), + "legacy note should still be fetched with the flag disabled" + ); + assert!( + !git_ref_exists(clone2.path(), &shard_ref_full), + "default-off fetch should not materialize shard refs locally" + ); + + drop(clone2); + let _ = fs::remove_dir_all(&clone2_path); +} diff --git a/tests/integration/performance.rs b/tests/integration/performance.rs index 1cae21891..2b8d13ef7 100644 --- a/tests/integration/performance.rs +++ b/tests/integration/performance.rs @@ -23,6 +23,7 @@ fn setup() { async_mode: false, git_hooks_enabled: false, git_hooks_externally_managed: false, + sharded_notes: false, }; git_ai::config::Config::set_test_feature_flags(test_flags.clone());