Skip to content

Commit 6d0ac56

Browse files
Improve support for sparse cache.
With the addition of a notes backed cache we will have the possibility of avoiding a "cold start" by transferring just the notes to another repo. To keep the amount of extra data small the notes cache is kept sparse: Not all commits have and entry and also in shards by sequence number, as old entries are unlikely to be relevant. The old traversal logic did not perform very well with sparse cache. Especially "find_known" proved to be a bottleneck. So the traversal is revised now to work better in the sparse cache case. Change: cache-sparse-shards
1 parent 3cf249c commit 6d0ac56

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+312
-116
lines changed

josh-core/src/cache.rs

Lines changed: 99 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,20 @@ use std::sync::{LazyLock, RwLock};
99
pub(crate) const CACHE_VERSION: u64 = 24;
1010

1111
pub trait CacheBackend: Send + Sync {
12-
fn read(&self, filter: filter::Filter, from: git2::Oid) -> JoshResult<Option<git2::Oid>>;
13-
14-
fn write(&self, filter: filter::Filter, from: git2::Oid, to: git2::Oid) -> JoshResult<()>;
12+
fn read(
13+
&self,
14+
filter: filter::Filter,
15+
from: git2::Oid,
16+
sequence_number: u128,
17+
) -> JoshResult<Option<git2::Oid>>;
18+
19+
fn write(
20+
&self,
21+
filter: filter::Filter,
22+
from: git2::Oid,
23+
to: git2::Oid,
24+
sequence_number: u128,
25+
) -> JoshResult<()>;
1526
}
1627

1728
pub trait FilterHook {
@@ -323,6 +334,11 @@ impl Transaction {
323334
}
324335

325336
pub fn insert(&self, filter: filter::Filter, from: git2::Oid, to: git2::Oid, store: bool) {
337+
let sequence_number = if filter != filter::sequence_number() {
338+
compute_sequence_number(self, from).unwrap()
339+
} else {
340+
0
341+
};
326342
let mut t2 = self.t2.borrow_mut();
327343
t2.commit_map
328344
.entry(filter.id())
@@ -334,14 +350,13 @@ impl Transaction {
334350
// the history length by a very large factor.
335351
if store || from.as_bytes()[0] == 0 {
336352
t2.cache
337-
.write_all(filter, from, to)
353+
.write_all(filter, from, to, sequence_number)
338354
.expect("Failed to write cache");
339355
}
340356
}
341357

342358
pub fn get_missing(&self) -> Vec<(filter::Filter, git2::Oid)> {
343359
let mut missing = self.t2.borrow().missing.clone();
344-
missing.sort_by_key(|(f, i)| (filter::nesting(*f), *f, *i));
345360
missing.dedup();
346361
missing.retain(|(f, i)| !self.known(*f, *i));
347362
self.t2.borrow_mut().missing = missing.clone();
@@ -358,7 +373,9 @@ impl Transaction {
358373
} else {
359374
let mut t2 = self.t2.borrow_mut();
360375
t2.misses += 1;
361-
t2.missing.push((filter, from));
376+
if !t2.missing.contains(&(filter, from)) {
377+
t2.missing.insert(0, (filter, from));
378+
}
362379
None
363380
}
364381
}
@@ -367,6 +384,11 @@ impl Transaction {
367384
if filter == filter::nop() {
368385
return Some(from);
369386
}
387+
let sequence_number = if filter != filter::sequence_number() {
388+
compute_sequence_number(self, from).unwrap()
389+
} else {
390+
0
391+
};
370392
let t2 = self.t2.borrow_mut();
371393
if let Some(m) = t2.commit_map.get(&filter.id()) {
372394
if let Some(oid) = m.get(&from).cloned() {
@@ -376,7 +398,7 @@ impl Transaction {
376398

377399
let oid = t2
378400
.cache
379-
.read_propagate(filter, from)
401+
.read_propagate(filter, from, sequence_number)
380402
.expect("Failed to read from cache backend");
381403

382404
let oid = if let Some(oid) = oid { Some(oid) } else { None };
@@ -385,6 +407,9 @@ impl Transaction {
385407
if oid == git2::Oid::zero() {
386408
return Some(oid);
387409
}
410+
if filter == filter::sequence_number() {
411+
return Some(oid);
412+
}
388413

389414
if self.repo.odb().unwrap().exists(oid) {
390415
// Only report an object as cached if it exists in the object database.
@@ -396,3 +421,70 @@ impl Transaction {
396421
None
397422
}
398423
}
424+
425+
/// Encode a `u128` into a 20-byte git OID (SHA-1 sized).
426+
/// The high 4 bytes of the OID are zero; the low 16 bytes
427+
/// contain the big-endian integer.
428+
pub fn oid_from_u128(n: u128) -> git2::Oid {
429+
let mut bytes = [0u8; 20];
430+
// place the 16 integer bytes at the end (big-endian)
431+
bytes[20 - 16..].copy_from_slice(&n.to_be_bytes());
432+
// Safe: length is exactly 20
433+
git2::Oid::from_bytes(&bytes).expect("20-byte OID construction cannot fail")
434+
}
435+
436+
/// Decode a `u128` previously encoded by `oid_from_u128`.
437+
pub fn u128_from_oid(oid: git2::Oid) -> u128 {
438+
let b = oid.as_bytes();
439+
let mut n = [0u8; 16];
440+
n.copy_from_slice(&b[20 - 16..]); // take the last 16 bytes
441+
u128::from_be_bytes(n)
442+
}
443+
444+
pub fn compute_sequence_number(
445+
transaction: &cache::Transaction,
446+
input: git2::Oid,
447+
) -> JoshResult<u128> {
448+
if let Some(count) = transaction.get(filter::sequence_number(), input) {
449+
return Ok(u128_from_oid(count));
450+
}
451+
452+
let commit = transaction.repo().find_commit(input)?;
453+
if let Some(p) = commit.parent_ids().next() {
454+
if let Some(count) = transaction.get(filter::sequence_number(), p) {
455+
let pc = u128_from_oid(count);
456+
transaction.insert(
457+
filter::sequence_number(),
458+
input,
459+
oid_from_u128(pc + 1),
460+
true,
461+
);
462+
return Ok(pc + 1);
463+
}
464+
}
465+
466+
let mut walk = transaction.repo().revwalk()?;
467+
walk.set_sorting(git2::Sort::REVERSE | git2::Sort::TOPOLOGICAL)?;
468+
walk.push(input)?;
469+
470+
for c in walk {
471+
let commit = transaction.repo().find_commit(c?)?;
472+
let pc = if let Some(p) = commit.parent_ids().next() {
473+
compute_sequence_number(transaction, p)?
474+
} else {
475+
0
476+
};
477+
478+
transaction.insert(
479+
filter::sequence_number(),
480+
commit.id(),
481+
oid_from_u128(pc + 1),
482+
true,
483+
);
484+
}
485+
if let Some(count) = transaction.get(filter::sequence_number(), input) {
486+
Ok(u128_from_oid(count))
487+
} else {
488+
Err(josh_error("missing sequence_number"))
489+
}
490+
}

josh-core/src/cache_notes.rs

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::JoshResult;
22
use crate::cache::{CACHE_VERSION, CacheBackend};
3+
use crate::filter;
34
use crate::filter::Filter;
45

56
pub struct NotesCacheBackend {
@@ -15,24 +16,43 @@ impl NotesCacheBackend {
1516
}
1617
}
1718

18-
fn is_note_eligible(oid: git2::Oid) -> bool {
19-
oid.as_bytes()[0] == 0
19+
fn is_note_eligible(repo: &git2::Repository, oid: git2::Oid, sequence_number: u128) -> bool {
20+
let parent_count = if let Ok(c) = repo.find_commit(oid) {
21+
c.parent_ids().count()
22+
} else {
23+
return false;
24+
};
25+
26+
sequence_number % 100 == 0 || parent_count != 1
2027
}
2128

22-
fn note_path(key: git2::Oid) -> String {
23-
format!("refs/josh/{}/{}", CACHE_VERSION, key)
29+
fn note_path(key: git2::Oid, sequence_number: u128) -> String {
30+
format!(
31+
"refs/josh/{}/{}/{}",
32+
CACHE_VERSION,
33+
sequence_number / 10000,
34+
key,
35+
)
2436
}
2537

2638
impl CacheBackend for NotesCacheBackend {
27-
fn read(&self, filter: Filter, from: git2::Oid) -> JoshResult<Option<git2::Oid>> {
39+
fn read(
40+
&self,
41+
filter: Filter,
42+
from: git2::Oid,
43+
sequence_number: u128,
44+
) -> JoshResult<Option<git2::Oid>> {
45+
if filter == filter::sequence_number() {
46+
return Ok(None);
47+
}
2848
let repo = self.repo.lock()?;
29-
let key = crate::filter::as_tree(&repo, filter)?;
30-
31-
if !is_note_eligible(from) {
49+
if !is_note_eligible(&repo, from, sequence_number) {
3250
return Ok(None);
3351
}
3452

35-
if let Ok(note) = repo.find_note(Some(&note_path(key)), from) {
53+
let key = crate::filter::as_tree(&*repo, filter)?;
54+
55+
if let Ok(note) = repo.find_note(Some(&note_path(key, sequence_number)), from) {
3656
let message = note.message().unwrap_or("");
3757
let result = git2::Oid::from_str(message)?;
3858

@@ -42,20 +62,29 @@ impl CacheBackend for NotesCacheBackend {
4262
}
4363
}
4464

45-
fn write(&self, filter: Filter, from: git2::Oid, to: git2::Oid) -> JoshResult<()> {
46-
let repo = self.repo.lock()?;
47-
let key = crate::filter::as_tree(&repo, filter)?;
65+
fn write(
66+
&self,
67+
filter: Filter,
68+
from: git2::Oid,
69+
to: git2::Oid,
70+
sequence_number: u128,
71+
) -> JoshResult<()> {
72+
if filter == filter::sequence_number() {
73+
return Ok(());
74+
}
4875

49-
if !is_note_eligible(from) {
76+
let repo = self.repo.lock()?;
77+
if !is_note_eligible(&*repo, from, sequence_number) {
5078
return Ok(());
5179
}
5280

81+
let key = crate::filter::as_tree(&*repo, filter)?;
5382
let signature = crate::cache::josh_commit_signature()?;
5483

5584
repo.note(
5685
&signature,
5786
&signature,
58-
Some(&note_path(key)),
87+
Some(&note_path(key, sequence_number)),
5988
from,
6089
&to.to_string(),
6190
true,

josh-core/src/cache_sled.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,12 @@ fn insert_sled_tree(filter: Filter) -> sled::Tree {
8080
}
8181

8282
impl CacheBackend for SledCacheBackend {
83-
fn read(&self, filter: Filter, from: git2::Oid) -> JoshResult<Option<git2::Oid>> {
83+
fn read(
84+
&self,
85+
filter: Filter,
86+
from: git2::Oid,
87+
_sequence_number: u128,
88+
) -> JoshResult<Option<git2::Oid>> {
8489
let mut trees = self.trees.lock()?;
8590
let tree = trees
8691
.entry(filter.id())
@@ -94,7 +99,13 @@ impl CacheBackend for SledCacheBackend {
9499
}
95100
}
96101

97-
fn write(&self, filter: Filter, from: git2::Oid, to: git2::Oid) -> JoshResult<()> {
102+
fn write(
103+
&self,
104+
filter: Filter,
105+
from: git2::Oid,
106+
to: git2::Oid,
107+
_sequence_number: u128,
108+
) -> JoshResult<()> {
98109
let mut trees = self.trees.lock()?;
99110
let tree = trees
100111
.entry(filter.id())

josh-core/src/cache_stack.rs

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,10 @@ impl CacheStack {
3333
filter: filter::Filter,
3434
from: git2::Oid,
3535
to: git2::Oid,
36+
sequence_number: u128,
3637
) -> JoshResult<()> {
3738
for backend in &self.backends {
38-
backend.write(filter, from, to)?;
39+
backend.write(filter, from, to, sequence_number)?;
3940
}
4041

4142
Ok(())
@@ -51,16 +52,19 @@ impl CacheStack {
5152
&self,
5253
filter: filter::Filter,
5354
from: git2::Oid,
55+
sequence_number: u128,
5456
) -> JoshResult<Option<git2::Oid>> {
5557
let values = self
5658
.backends
5759
.iter()
5860
.enumerate()
59-
.find_map(|(index, backend)| match backend.read(filter, from) {
60-
Ok(None) => None,
61-
Ok(Some(oid)) => Some(Ok((index, oid))),
62-
Err(e) => Some(Err(e)),
63-
});
61+
.find_map(
62+
|(index, backend)| match backend.read(filter, from, sequence_number) {
63+
Ok(None) => None,
64+
Ok(Some(oid)) => Some(Ok((index, oid))),
65+
Err(e) => Some(Err(e)),
66+
},
67+
);
6468

6569
let (index, oid) = match values {
6670
// None of the backends had the value
@@ -74,7 +78,7 @@ impl CacheStack {
7478
self.backends
7579
.iter()
7680
.take(index)
77-
.try_for_each(|backend| backend.write(filter, from, oid))?;
81+
.try_for_each(|backend| backend.write(filter, from, oid, sequence_number))?;
7882

7983
Ok(Some(oid))
8084
}

0 commit comments

Comments
 (0)