Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 27 additions & 27 deletions crates/cli/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::path::Path;
use std::path::PathBuf;

use bob_core::{drv, graph, overrides, resolve, scheduler, ArtifactCache, Backend};
use bob_core::{drv, graph, resolve, scheduler, tracked_set, ArtifactCache, Backend};
use clap::{Args, Parser, Subcommand};

/// Registered language backends, tried in order for `resolve_attr` /
Expand Down Expand Up @@ -265,25 +265,36 @@ fn cmd_build(args: BuildArgs) {
// Realize any missing source tarballs / build inputs
g.realize_inputs().expect("realizing inputs");

// Per-unit source overrides with cascading invalidation; see
// overrides::cascade for the algorithm. Each backend supplies own-source
// hashes for the workspace units it recognises.
// Each backend supplies own-source hashes for the workspace units it
// recognises. `tracked` is the closure of those under unit_deps; tracked
// units use early-cutoff (output-hash) cache keys resolved at ready-time
// in the scheduler. Everything else stays on plain drv-path keys.
let mut own = std::collections::HashMap::new();
for b in BACKENDS {
own.extend(b.workspace_unit_hashes(&repo_root, &g));
}
let overrides = overrides::cascade(&g, own);
let tracked = tracked_set(&g, &own);
eprintln!(
" \x1b[2mTracking {} workspace unit(s) for source changes\x1b[0m",
overrides.len()
tracked.len()
);

eprintln!(
"\x1b[1m Compiling\x1b[0m {} units ({} jobs)",
g.unit_count(),
jobs
);

let result = scheduler::run_parallel(
&g, &cache, jobs, BACKENDS, &repo_root, &own, &tracked, &drv_paths,
);

if dump_keys {
// Tracked units' keys are only known post-scheduler (early cutoff
// resolves them at ready-time), so --dump-keys now implies a full
// run. Acceptable for a hidden bench-harness flag.
for (drv, node) in &g.nodes {
let key = match overrides.get(drv) {
Some(ov) => ArtifactCache::cache_key_with_source(drv, &ov.source_hash),
None => ArtifactCache::cache_key(drv),
};
let key = result.keys.get(drv).cloned().unwrap_or_default();
let name = BACKENDS
.iter()
.find(|b| b.is_unit(drv, &node.drv, &repo_root))
Expand All @@ -294,29 +305,18 @@ fn cmd_build(args: BuildArgs) {
return;
}

eprintln!(
"\x1b[1m Compiling\x1b[0m {} units ({} jobs)",
g.unit_count(),
jobs
);

let result = scheduler::run_parallel(
&g, &cache, jobs, BACKENDS, &repo_root, &overrides, &drv_paths,
);

// Result symlinks + --print-out-paths, one per (root, output) following
// nix-build's naming: <prefix>[-<n>][-<output>], with `-<n>` omitted for
// the first root and `-<output>` omitted for `out`. Unlike before, lib-only
// roots get a link too (`result-lib`), so callers can locate the artifact
// without a second `--dump-keys` round-trip.
for (idx, r) in resolve_results.iter().enumerate() {
let artifact = match overrides.get(&r.drv_path) {
Some(ov) => cache.artifact_dir_by_key(&ArtifactCache::cache_key_with_source(
&r.drv_path,
&ov.source_hash,
)),
None => cache.artifact_dir(&r.drv_path),
};
let key = result
.keys
.get(&r.drv_path)
.cloned()
.unwrap_or_else(|| ArtifactCache::cache_key(&r.drv_path));
let artifact = cache.artifact_dir_by_key(&key);
if !artifact.exists() {
// Build failed or aborted before commit; skip silently, the
// failure summary already reported it.
Expand Down
25 changes: 24 additions & 1 deletion crates/core/src/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,37 @@ pub trait Backend: Send + Sync {
/// versions (errexit vs `eval`'d phases).
fn output_populated(&self, tmp: &Path, drv: &Derivation) -> bool;

// ── pipelining (optional) ──────────────────────────────────────────────
// ── pipelining + early-cutoff propagation (optional) ────────────────────

/// `None` → every edge is done-gated; the backend never emits a mid-build
/// signal.
fn pipeline(&self) -> Option<&dyn PipelinePolicy> {
None
}

/// Does this unit's build need its tracked deps' *full* (committed)
/// output, or does the early-signal artifact suffice? Decides which
/// propagated hash the unit's eff-key reads, and so whether tracked→this
/// edges may early-gate. Default `true` (done-gate everything) is the
/// conservative choice for backends without a separable interface
/// artifact.
///
/// Rust: a pure `lib`/`rlib` crate compiles against deps' rmeta only; any
/// crate that links (cdylib/staticlib/bin) or loads (proc-macro) reads
/// deps' rlib/\.so and so needs the done hash.
fn needs_dep_done_output(&self, _drv: &Derivation) -> bool {
true
}

/// Hash of the early-signal artifact. Called from the scheduler's
/// `__META_READY__` callback with the directory the wrapper signalled
/// (e.g. the rmeta dir). The result is the `early_propagated` value
/// dependents whose `needs_dep_done_output` is `false` key on. `None` →
/// the done hash is used for both (backends that don't pipeline).
fn early_hash(&self, _early_dir: &Path) -> Option<String> {
None
}

// ── internal subcommands ───────────────────────────────────────────────

/// Handle `bob __<x> …` re-entries from wrapper shims. If `cmd` belongs
Expand Down
88 changes: 87 additions & 1 deletion crates/core/src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,58 @@ impl ArtifactCache {
Ok(CacheLock(f))
}

/// Early-cutoff sidecar for the artifact at `eff_key`, written on commit
/// (`.out-hash` = full-artifact hash) and at `__META_READY__`
/// (`.early-hash` = interface-artifact hash, e.g. rmeta). Read on
/// cache-hit so dependents key on this unit's *output* rather than its
/// inputs. A unit may have only `.out-hash` (no early signal); a missing
/// `.early-hash` falls back to `.out-hash` at the dependent.
pub fn out_hash_path(&self, eff_key: &str) -> PathBuf {
self.artifact_dir_by_key(eff_key).join(".out-hash")
}
pub fn early_hash_path(&self, eff_key: &str) -> PathBuf {
self.artifact_dir_by_key(eff_key).join(".early-hash")
}

/// blake3 over every regular file under `dir`, ordered by relative path.
/// Symlinks contribute their target string (so a relinked `lib<name>.so`
/// pointing at a new hashed filename still moves the hash). Used for the
/// early-cutoff propagated hash; cheap on rlib/rmeta-sized outputs and
/// still fine for cc libs (a few MB).
pub fn hash_tree(dir: &Path) -> String {
fn walk(h: &mut blake3::Hasher, base: &Path, dir: &Path) {
let mut entries: Vec<_> = match std::fs::read_dir(dir) {
Ok(rd) => rd.flatten().collect(),
Err(_) => return,
};
entries.sort_by_key(|e| e.file_name());
for e in entries {
let p = e.path();
let name = e.file_name();
// Skip our own sidecar and stderr/stdout capture files.
if name.to_string_lossy().starts_with('.') {
continue;
}
let Ok(ft) = e.file_type() else { continue };
let rel = p.strip_prefix(base).unwrap_or(&p);
h.update(rel.as_os_str().as_encoded_bytes());
h.update(b"\0");
if ft.is_dir() {
walk(h, base, &p);
} else if ft.is_symlink() {
if let Ok(t) = std::fs::read_link(&p) {
h.update(t.as_os_str().as_encoded_bytes());
}
} else if let Ok(mut f) = std::fs::File::open(&p) {
let _ = std::io::copy(&mut f, h);
}
}
}
let mut h = blake3::Hasher::new();
walk(&mut h, dir, dir);
h.finalize().to_hex()[..32].to_string()
}

/// Persistent per-unit incremental-compilation state. Unlike
/// `artifact_dir` (replaced on each build), this persists across builds
/// so the backend's compiler can reuse work (`-C incremental`, `GOCACHE`,
Expand All @@ -127,12 +179,46 @@ mod tests {
use std::fs;

fn tempdir() -> PathBuf {
let dir = std::env::temp_dir().join(format!("bob-test-{}", std::process::id()));
// Tests run in parallel threads within one process; pid alone collides
// (cache_key_stable's remove_dir_all races hash_tree_content_addressed's
// create_dir_all). Per-call counter + pid is unique within and across
// processes.
static N: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
let n = N.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
let dir = std::env::temp_dir().join(format!("bob-test-{}-{n}", std::process::id()));
let _ = fs::remove_dir_all(&dir);
fs::create_dir_all(&dir).unwrap();
dir
}

#[test]
fn hash_tree_content_addressed() {
let d = tempdir();
fs::create_dir_all(d.join("out/lib")).unwrap();
fs::write(d.join("out/lib/libfoo.a"), b"v1").unwrap();
fs::write(d.join(".out-hash"), b"ignored").unwrap(); // dot-file skipped
let h1 = ArtifactCache::hash_tree(&d);

// Same content, fresh mtimes → same hash.
fs::write(d.join("out/lib/libfoo.a"), b"v1").unwrap();
assert_eq!(h1, ArtifactCache::hash_tree(&d));

// Content change → hash moves.
fs::write(d.join("out/lib/libfoo.a"), b"v2").unwrap();
assert_ne!(h1, ArtifactCache::hash_tree(&d));

// New file → hash moves; symlink target contributes.
fs::write(d.join("out/lib/libfoo.a"), b"v1").unwrap();
std::os::unix::fs::symlink("libfoo.a", d.join("out/lib/libfoo.so")).unwrap();
let h2 = ArtifactCache::hash_tree(&d);
assert_ne!(h1, h2);
fs::remove_file(d.join("out/lib/libfoo.so")).unwrap();
std::os::unix::fs::symlink("other", d.join("out/lib/libfoo.so")).unwrap();
assert_ne!(h2, ArtifactCache::hash_tree(&d));

let _ = fs::remove_dir_all(&d);
}

#[test]
fn cache_key_stable() {
let cache = ArtifactCache::from_path(tempdir());
Expand Down
32 changes: 17 additions & 15 deletions crates/core/src/executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,25 +21,21 @@ use crate::cache::ArtifactCache;
use crate::drv::Derivation;
use crate::rewrite::PathRewriter;

/// Override for a unit's cache key (and optionally its source) when reusing
/// a cached drv whose inputs have effectively changed.
/// Per-unit build-time override: the live source directory (if this unit's
/// own source is tracked) and the resolved early-cutoff cache key.
///
/// `source_hash` is the *effective* hash: it incorporates this unit's own
/// source content AND the effective hashes of all its workspace deps. This
/// cascades invalidation through the DAG without changing drv paths, so a
/// change to a workspace unit's source produces a new key for that unit and
/// every downstream workspace unit, while external (registry) deps — which
/// never sit downstream of workspace units — keep their plain
/// `blake3(drv_path)` key.
/// `eff_key` is computed by the scheduler at ready-time from this unit's
/// own-source hash plus its tracked deps' *output* hashes (see
/// `overrides::eff_hash`); it is `None` for untracked units, which use the
/// plain `blake3(drv_path)` key.
#[derive(Clone, Debug)]
pub struct SourceOverride {
/// Local source directory to use instead of the store path in the drv.
/// `None` when only the cache key changes (i.e., this unit's own source
/// is unchanged but a dep's effective hash differs) — currently every
/// overridden unit is a workspace unit so this is always `Some`.
/// `None` for tracked units whose own source isn't being overridden
/// (i.e. the unit is only tracked because a dep is).
pub src_path: Option<PathBuf>,
/// Effective source hash, mixed into the cache key.
pub source_hash: String,
/// Resolved composite cache key for this unit.
pub eff_key: String,
}

/// Result of executing a single unit build.
Expand Down Expand Up @@ -71,7 +67,7 @@ pub fn build_unit(
..
} = ctx;
let effective_key = match src_override {
Some(ov) => ArtifactCache::cache_key_with_source(drv_path, &ov.source_hash),
Some(ov) => ov.eff_key.clone(),
None => ArtifactCache::cache_key(drv_path),
};
let unit_name = backend.unit_name(drv).into_owned();
Expand Down Expand Up @@ -270,6 +266,12 @@ exit $rc
let _ = std::fs::remove_dir_all(&dest);
return Err(format!("committing {unit_name}: {e}"));
}
// Early-cutoff sidecar: hash the committed outputs (not rmeta/ — that's
// an internal pipelining artifact) so dependents key on our output
// content, not our inputs. Written under the dest (artifacts/<key>/)
// so a future cache-hit at this key can read it without rebuilding.
let out_hash = ArtifactCache::hash_tree(&dest);
let _ = std::fs::write(cache.out_hash_path(&effective_key), &out_hash);
// Signal full completion for any wrapper polling on us (consumers that
// need the linkable artifact, not just the early metadata).
let _ = std::fs::write(tmp.join("done"), b"");
Expand Down
2 changes: 1 addition & 1 deletion crates/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ pub use cache::ArtifactCache;
pub use drv::Derivation;
pub use executor::SourceOverride;
pub use graph::{BuildGraph, UnitNode};
pub use overrides::OwnHash;
pub use overrides::{tracked_set, OwnHash};
Loading