Skip to content

Commit 1869525

Browse files
pks-tgitster
authored andcommitted
refs/reftable: wire up support for exclude patterns
Exclude patterns can be used by reference backends to skip over blocks of references that are uninteresting to the caller. Reference backends do not have to wire up support for them, and all callers are expected to behave as if the backend didn't support them. In fact, the only backend that supports exclude patterns right now is the "packed" backend. Exclude patterns can be quite an important performance optimization in repositories that have loads of references. The patterns are set up in case "transfer.hideRefs" and friends are configured during a fetch, so handling these patterns becomes important once there are lots of hidden refs in a served repository. Now that we have properly re-seekable reftable iterators we can also wire up support for these patterns in the "reftable" backend. Doing so is conceptually simple: once we hit a reference whose prefix matches the current exclude pattern we re-seek the iterator to the first reference that doesn't match the pattern anymore. This schema only works for trivial patterns that do not have any globbing characters in them, but this restriction also applies do the "packed" backend. This makes t1419 work with the "reftable" backend with some slight modifications. Of course it also speeds up listing of references with hidden refs. The following benchmark prints one reference with 1 million hidden references: Benchmark 1: HEAD~ Time (mean ± σ): 93.3 ms ± 2.1 ms [User: 90.3 ms, System: 2.5 ms] Range (min … max): 89.8 ms … 97.2 ms 33 runs Benchmark 2: HEAD Time (mean ± σ): 4.2 ms ± 0.6 ms [User: 2.2 ms, System: 1.8 ms] Range (min … max): 3.1 ms … 8.1 ms 765 runs Summary HEAD ran 22.15 ± 3.19 times faster than HEAD~ Signed-off-by: Patrick Steinhardt <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 0a148a8 commit 1869525

File tree

4 files changed

+176
-12
lines changed

4 files changed

+176
-12
lines changed

refs/reftable-backend.c

Lines changed: 130 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "../reftable/reftable-iterator.h"
2222
#include "../setup.h"
2323
#include "../strmap.h"
24+
#include "../trace2.h"
2425
#include "parse.h"
2526
#include "refs-internal.h"
2627

@@ -447,10 +448,81 @@ struct reftable_ref_iterator {
447448

448449
const char *prefix;
449450
size_t prefix_len;
451+
char **exclude_patterns;
452+
size_t exclude_patterns_index;
453+
size_t exclude_patterns_strlen;
450454
unsigned int flags;
451455
int err;
452456
};
453457

458+
/*
459+
* Handle exclude patterns. Returns either `1`, which tells the caller that the
460+
* current reference shall not be shown. Or `0`, which indicates that it should
461+
* be shown.
462+
*/
463+
static int should_exclude_current_ref(struct reftable_ref_iterator *iter)
464+
{
465+
while (iter->exclude_patterns[iter->exclude_patterns_index]) {
466+
const char *pattern = iter->exclude_patterns[iter->exclude_patterns_index];
467+
char *ref_after_pattern;
468+
int cmp;
469+
470+
/*
471+
* Lazily cache the pattern length so that we don't have to
472+
* recompute it every time this function is called.
473+
*/
474+
if (!iter->exclude_patterns_strlen)
475+
iter->exclude_patterns_strlen = strlen(pattern);
476+
477+
/*
478+
* When the reference name is lexicographically bigger than the
479+
* current exclude pattern we know that it won't ever match any
480+
* of the following references, either. We thus advance to the
481+
* next pattern and re-check whether it matches.
482+
*
483+
* Otherwise, if it's smaller, then we do not have a match and
484+
* thus want to show the current reference.
485+
*/
486+
cmp = strncmp(iter->ref.refname, pattern,
487+
iter->exclude_patterns_strlen);
488+
if (cmp > 0) {
489+
iter->exclude_patterns_index++;
490+
iter->exclude_patterns_strlen = 0;
491+
continue;
492+
}
493+
if (cmp < 0)
494+
return 0;
495+
496+
/*
497+
* The reference shares a prefix with the exclude pattern and
498+
* shall thus be omitted. We skip all references that match the
499+
* pattern by seeking to the first reference after the block of
500+
* matches.
501+
*
502+
* This is done by appending the highest possible character to
503+
* the pattern. Consequently, all references that have the
504+
* pattern as prefix and whose suffix starts with anything in
505+
* the range [0x00, 0xfe] are skipped. And given that 0xff is a
506+
* non-printable character that shouldn't ever be in a ref name,
507+
* we'd not yield any such record, either.
508+
*
509+
* Note that the seeked-to reference may also be excluded. This
510+
* is not handled here though, but the caller is expected to
511+
* loop and re-verify the next reference for us.
512+
*/
513+
ref_after_pattern = xstrfmt("%s%c", pattern, 0xff);
514+
iter->err = reftable_iterator_seek_ref(&iter->iter, ref_after_pattern);
515+
iter->exclude_patterns_index++;
516+
iter->exclude_patterns_strlen = 0;
517+
trace2_counter_add(TRACE2_COUNTER_ID_REFTABLE_RESEEKS, 1);
518+
519+
free(ref_after_pattern);
520+
return 1;
521+
}
522+
523+
return 0;
524+
}
525+
454526
static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator)
455527
{
456528
struct reftable_ref_iterator *iter =
@@ -481,6 +553,9 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator)
481553
break;
482554
}
483555

556+
if (iter->exclude_patterns && should_exclude_current_ref(iter))
557+
continue;
558+
484559
if (iter->flags & DO_FOR_EACH_PER_WORKTREE_ONLY &&
485560
parse_worktree_ref(iter->ref.refname, NULL, NULL, NULL) !=
486561
REF_WORKTREE_CURRENT)
@@ -570,6 +645,11 @@ static int reftable_ref_iterator_abort(struct ref_iterator *ref_iterator)
570645
(struct reftable_ref_iterator *)ref_iterator;
571646
reftable_ref_record_release(&iter->ref);
572647
reftable_iterator_destroy(&iter->iter);
648+
if (iter->exclude_patterns) {
649+
for (size_t i = 0; iter->exclude_patterns[i]; i++)
650+
free(iter->exclude_patterns[i]);
651+
free(iter->exclude_patterns);
652+
}
573653
free(iter);
574654
return ITER_DONE;
575655
}
@@ -580,9 +660,53 @@ static struct ref_iterator_vtable reftable_ref_iterator_vtable = {
580660
.abort = reftable_ref_iterator_abort
581661
};
582662

663+
static int qsort_strcmp(const void *va, const void *vb)
664+
{
665+
const char *a = *(const char **)va;
666+
const char *b = *(const char **)vb;
667+
return strcmp(a, b);
668+
}
669+
670+
static char **filter_exclude_patterns(const char **exclude_patterns)
671+
{
672+
size_t filtered_size = 0, filtered_alloc = 0;
673+
char **filtered = NULL;
674+
675+
if (!exclude_patterns)
676+
return NULL;
677+
678+
for (size_t i = 0; ; i++) {
679+
const char *exclude_pattern = exclude_patterns[i];
680+
int has_glob = 0;
681+
682+
if (!exclude_pattern)
683+
break;
684+
685+
for (const char *p = exclude_pattern; *p; p++) {
686+
has_glob = is_glob_special(*p);
687+
if (has_glob)
688+
break;
689+
}
690+
if (has_glob)
691+
continue;
692+
693+
ALLOC_GROW(filtered, filtered_size + 1, filtered_alloc);
694+
filtered[filtered_size++] = xstrdup(exclude_pattern);
695+
}
696+
697+
if (filtered_size) {
698+
QSORT(filtered, filtered_size, qsort_strcmp);
699+
ALLOC_GROW(filtered, filtered_size + 1, filtered_alloc);
700+
filtered[filtered_size++] = NULL;
701+
}
702+
703+
return filtered;
704+
}
705+
583706
static struct reftable_ref_iterator *ref_iterator_for_stack(struct reftable_ref_store *refs,
584707
struct reftable_stack *stack,
585708
const char *prefix,
709+
const char **exclude_patterns,
586710
int flags)
587711
{
588712
struct reftable_ref_iterator *iter;
@@ -595,6 +719,7 @@ static struct reftable_ref_iterator *ref_iterator_for_stack(struct reftable_ref_
595719
iter->base.oid = &iter->oid;
596720
iter->flags = flags;
597721
iter->refs = refs;
722+
iter->exclude_patterns = filter_exclude_patterns(exclude_patterns);
598723

599724
ret = refs->err;
600725
if (ret)
@@ -616,7 +741,7 @@ static struct reftable_ref_iterator *ref_iterator_for_stack(struct reftable_ref_
616741

617742
static struct ref_iterator *reftable_be_iterator_begin(struct ref_store *ref_store,
618743
const char *prefix,
619-
const char **exclude_patterns UNUSED,
744+
const char **exclude_patterns,
620745
unsigned int flags)
621746
{
622747
struct reftable_ref_iterator *main_iter, *worktree_iter;
@@ -627,7 +752,8 @@ static struct ref_iterator *reftable_be_iterator_begin(struct ref_store *ref_sto
627752
required_flags |= REF_STORE_ODB;
628753
refs = reftable_be_downcast(ref_store, required_flags, "ref_iterator_begin");
629754

630-
main_iter = ref_iterator_for_stack(refs, refs->main_stack, prefix, flags);
755+
main_iter = ref_iterator_for_stack(refs, refs->main_stack, prefix,
756+
exclude_patterns, flags);
631757

632758
/*
633759
* The worktree stack is only set when we're in an actual worktree
@@ -641,7 +767,8 @@ static struct ref_iterator *reftable_be_iterator_begin(struct ref_store *ref_sto
641767
* Otherwise we merge both the common and the per-worktree refs into a
642768
* single iterator.
643769
*/
644-
worktree_iter = ref_iterator_for_stack(refs, refs->worktree_stack, prefix, flags);
770+
worktree_iter = ref_iterator_for_stack(refs, refs->worktree_stack, prefix,
771+
exclude_patterns, flags);
645772
return merge_ref_iterator_begin(&worktree_iter->base, &main_iter->base,
646773
ref_iterator_select, NULL);
647774
}

t/t1419-exclude-refs.sh

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,6 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
88
TEST_PASSES_SANITIZE_LEAK=true
99
. ./test-lib.sh
1010

11-
if test_have_prereq !REFFILES
12-
then
13-
skip_all='skipping `git for-each-ref --exclude` tests; need files backend'
14-
test_done
15-
fi
16-
1711
for_each_ref__exclude () {
1812
GIT_TRACE2_PERF=1 test-tool ref-store main \
1913
for-each-ref--exclude "$@" >actual.raw
@@ -28,7 +22,14 @@ assert_jumps () {
2822
local nr="$1"
2923
local trace="$2"
3024

31-
grep -q "name:jumps_made value:$nr$" $trace
25+
case "$GIT_DEFAULT_REF_FORMAT" in
26+
files)
27+
grep -q "name:jumps_made value:$nr$" $trace;;
28+
reftable)
29+
grep -q "name:reseeks_made value:$nr$" $trace;;
30+
*)
31+
BUG "unhandled ref format $GIT_DEFAULT_REF_FORMAT";;
32+
esac
3233
}
3334

3435
assert_no_jumps () {
@@ -89,7 +90,14 @@ test_expect_success 'adjacent, non-overlapping excluded regions' '
8990
for_each_ref refs/heads/foo refs/heads/quux >expect &&
9091
9192
test_cmp expect actual &&
92-
assert_jumps 1 perf
93+
case "$GIT_DEFAULT_REF_FORMAT" in
94+
files)
95+
assert_jumps 1 perf;;
96+
reftable)
97+
assert_jumps 2 perf;;
98+
*)
99+
BUG "unhandled ref format $GIT_DEFAULT_REF_FORMAT";;
100+
esac
93101
'
94102

95103
test_expect_success 'overlapping excluded regions' '
@@ -106,7 +114,30 @@ test_expect_success 'several overlapping excluded regions' '
106114
for_each_ref refs/heads/quux >expect &&
107115
108116
test_cmp expect actual &&
109-
assert_jumps 1 perf
117+
case "$GIT_DEFAULT_REF_FORMAT" in
118+
files)
119+
assert_jumps 1 perf;;
120+
reftable)
121+
assert_jumps 3 perf;;
122+
*)
123+
BUG "unhandled ref format $GIT_DEFAULT_REF_FORMAT";;
124+
esac
125+
'
126+
127+
test_expect_success 'unordered excludes' '
128+
for_each_ref__exclude refs/heads \
129+
refs/heads/foo refs/heads/baz >actual 2>perf &&
130+
for_each_ref refs/heads/bar refs/heads/quux >expect &&
131+
132+
test_cmp expect actual &&
133+
case "$GIT_DEFAULT_REF_FORMAT" in
134+
files)
135+
assert_jumps 1 perf;;
136+
reftable)
137+
assert_jumps 2 perf;;
138+
*)
139+
BUG "unhandled ref format $GIT_DEFAULT_REF_FORMAT";;
140+
esac
110141
'
111142

112143
test_expect_success 'non-matching excluded section' '

trace2.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,7 @@ enum trace2_counter_id {
554554
TRACE2_COUNTER_ID_TEST2, /* emits summary and thread events */
555555

556556
TRACE2_COUNTER_ID_PACKED_REFS_JUMPS, /* counts number of jumps */
557+
TRACE2_COUNTER_ID_REFTABLE_RESEEKS, /* counts number of re-seeks */
557558

558559
/* counts number of fsyncs */
559560
TRACE2_COUNTER_ID_FSYNC_WRITEOUT_ONLY,

trace2/tr2_ctr.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@ static struct tr2_counter_metadata tr2_counter_metadata[TRACE2_NUMBER_OF_COUNTER
3131
.name = "jumps_made",
3232
.want_per_thread_events = 0,
3333
},
34+
[TRACE2_COUNTER_ID_REFTABLE_RESEEKS] = {
35+
.category = "reftable",
36+
.name = "reseeks_made",
37+
.want_per_thread_events = 0,
38+
},
3439
[TRACE2_COUNTER_ID_FSYNC_WRITEOUT_ONLY] = {
3540
.category = "fsync",
3641
.name = "writeout-only",

0 commit comments

Comments
 (0)