Skip to content

Commit 0a8cb87

Browse files
authored
Fix REF_DELTA chain bug in 'git index-pack' (#745)
This is an early version of work already under review upstream: gitgitgadget#1906
2 parents 13a0d99 + 638c49a commit 0a8cb87

8 files changed

+237
-39
lines changed

Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -827,6 +827,7 @@ TEST_BUILTINS_OBJS += test-mergesort.o
827827
TEST_BUILTINS_OBJS += test-mktemp.o
828828
TEST_BUILTINS_OBJS += test-name-hash.o
829829
TEST_BUILTINS_OBJS += test-online-cpus.o
830+
TEST_BUILTINS_OBJS += test-pack-deltas.o
830831
TEST_BUILTINS_OBJS += test-pack-mtimes.o
831832
TEST_BUILTINS_OBJS += test-parse-options.o
832833
TEST_BUILTINS_OBJS += test-parse-pathspec-file.o

builtin/index-pack.c

+32-26
Original file line numberDiff line numberDiff line change
@@ -1107,8 +1107,8 @@ static void *threaded_second_pass(void *data)
11071107
set_thread_data(data);
11081108
for (;;) {
11091109
struct base_data *parent = NULL;
1110-
struct object_entry *child_obj;
1111-
struct base_data *child;
1110+
struct object_entry *child_obj = NULL;
1111+
struct base_data *child = NULL;
11121112

11131113
counter_lock();
11141114
display_progress(progress, nr_resolved_deltas);
@@ -1135,15 +1135,18 @@ static void *threaded_second_pass(void *data)
11351135
parent = list_first_entry(&work_head, struct base_data,
11361136
list);
11371137

1138-
if (parent->ref_first <= parent->ref_last) {
1138+
while (parent->ref_first <= parent->ref_last) {
11391139
int offset = ref_deltas[parent->ref_first++].obj_no;
11401140
child_obj = objects + offset;
1141-
if (child_obj->real_type != OBJ_REF_DELTA)
1142-
die("REF_DELTA at offset %"PRIuMAX" already resolved (duplicate base %s?)",
1143-
(uintmax_t) child_obj->idx.offset,
1144-
oid_to_hex(&parent->obj->idx.oid));
1141+
if (child_obj->real_type != OBJ_REF_DELTA) {
1142+
child_obj = NULL;
1143+
continue;
1144+
}
11451145
child_obj->real_type = parent->obj->real_type;
1146-
} else {
1146+
break;
1147+
}
1148+
1149+
if (!child_obj && parent->ofs_first <= parent->ofs_last) {
11471150
child_obj = objects +
11481151
ofs_deltas[parent->ofs_first++].obj_no;
11491152
assert(child_obj->real_type == OBJ_OFS_DELTA);
@@ -1176,29 +1179,32 @@ static void *threaded_second_pass(void *data)
11761179
}
11771180
work_unlock();
11781181

1179-
if (parent) {
1180-
child = resolve_delta(child_obj, parent);
1181-
if (!child->children_remaining)
1182-
FREE_AND_NULL(child->data);
1183-
} else {
1184-
child = make_base(child_obj, NULL);
1185-
if (child->children_remaining) {
1186-
/*
1187-
* Since this child has its own delta children,
1188-
* we will need this data in the future.
1189-
* Inflate now so that future iterations will
1190-
* have access to this object's data while
1191-
* outside the work mutex.
1192-
*/
1193-
child->data = get_data_from_pack(child_obj);
1194-
child->size = child_obj->size;
1182+
if (child_obj) {
1183+
if (parent) {
1184+
child = resolve_delta(child_obj, parent);
1185+
if (!child->children_remaining)
1186+
FREE_AND_NULL(child->data);
1187+
} else{
1188+
child = make_base(child_obj, NULL);
1189+
if (child->children_remaining) {
1190+
/*
1191+
* Since this child has its own delta children,
1192+
* we will need this data in the future.
1193+
* Inflate now so that future iterations will
1194+
* have access to this object's data while
1195+
* outside the work mutex.
1196+
*/
1197+
child->data = get_data_from_pack(child_obj);
1198+
child->size = child_obj->size;
1199+
}
11951200
}
11961201
}
11971202

11981203
work_lock();
11991204
if (parent)
12001205
parent->retain_data--;
1201-
if (child->data) {
1206+
1207+
if (child && child->data) {
12021208
/*
12031209
* This child has its own children, so add it to
12041210
* work_head.
@@ -1207,7 +1213,7 @@ static void *threaded_second_pass(void *data)
12071213
base_cache_used += child->size;
12081214
prune_base_data(NULL);
12091215
free_base_data(child);
1210-
} else {
1216+
} else if (child) {
12111217
/*
12121218
* This child does not have its own children. It may be
12131219
* the last descendant of its ancestors; free those

ci/install-dependencies.sh

+24-11
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ begin_group "Install dependencies"
99

1010
P4WHENCE=https://cdist2.perforce.com/perforce/r23.2
1111
LFSWHENCE=https://github.com/github/git-lfs/releases/download/v$LINUX_GIT_LFS_VERSION
12-
JGITWHENCE=https://repo.eclipse.org/content/groups/releases//org/eclipse/jgit/org.eclipse.jgit.pgm/6.8.0.202311291450-r/org.eclipse.jgit.pgm-6.8.0.202311291450-r.sh
12+
JGITWHENCE=https://repo1.maven.org/maven2/org/eclipse/jgit/org.eclipse.jgit.pgm/6.8.0.202311291450-r/org.eclipse.jgit.pgm-6.8.0.202311291450-r.sh
1313

1414
# Make sudo a no-op and execute the command directly when running as root.
1515
# While using sudo would be fine on most platforms when we are root already,
@@ -31,7 +31,7 @@ alpine-*)
3131
;;
3232
fedora-*|almalinux-*)
3333
dnf -yq update >/dev/null &&
34-
dnf -yq install shadow-utils sudo make gcc findutils diffutils perl python3 gettext zlib-devel expat-devel openssl-devel curl-devel pcre2-devel >/dev/null
34+
dnf -yq install shadow-utils sudo make gcc findutils diffutils perl python3 gawk gettext zlib-devel expat-devel openssl-devel curl-devel pcre2-devel >/dev/null
3535
;;
3636
ubuntu-*|i386/ubuntu-*|debian-*)
3737
# Required so that apt doesn't wait for user input on certain packages.
@@ -66,16 +66,29 @@ ubuntu-*|i386/ubuntu-*|debian-*)
6666
mkdir --parents "$CUSTOM_PATH"
6767

6868
wget --quiet --directory-prefix="$CUSTOM_PATH" \
69-
"$P4WHENCE/bin.linux26x86_64/p4d" "$P4WHENCE/bin.linux26x86_64/p4"
70-
chmod a+x "$CUSTOM_PATH/p4d" "$CUSTOM_PATH/p4"
71-
72-
wget --quiet "$LFSWHENCE/git-lfs-linux-amd64-$LINUX_GIT_LFS_VERSION.tar.gz"
69+
"$P4WHENCE/bin.linux26x86_64/p4d" \
70+
"$P4WHENCE/bin.linux26x86_64/p4" &&
71+
chmod a+x "$CUSTOM_PATH/p4d" "$CUSTOM_PATH/p4" || {
72+
rm -f "$CUSTOM_PATH/p4"
73+
rm -f "$CUSTOM_PATH/p4d"
74+
echo >&2 "P4 download (optional) failed"
75+
}
76+
77+
wget --quiet \
78+
"$LFSWHENCE/git-lfs-linux-amd64-$LINUX_GIT_LFS_VERSION.tar.gz" &&
7379
tar -xzf "git-lfs-linux-amd64-$LINUX_GIT_LFS_VERSION.tar.gz" \
74-
-C "$CUSTOM_PATH" --strip-components=1 "git-lfs-$LINUX_GIT_LFS_VERSION/git-lfs"
75-
rm "git-lfs-linux-amd64-$LINUX_GIT_LFS_VERSION.tar.gz"
76-
77-
wget --quiet "$JGITWHENCE" --output-document="$CUSTOM_PATH/jgit"
78-
chmod a+x "$CUSTOM_PATH/jgit"
80+
-C "$CUSTOM_PATH" --strip-components=1 \
81+
"git-lfs-$LINUX_GIT_LFS_VERSION/git-lfs" &&
82+
rm "git-lfs-linux-amd64-$LINUX_GIT_LFS_VERSION.tar.gz" || {
83+
rm -f "$CUSTOM_PATH/git-lfs"
84+
echo >&2 "LFS download (optional) failed"
85+
}
86+
87+
wget --quiet "$JGITWHENCE" --output-document="$CUSTOM_PATH/jgit" &&
88+
chmod a+x "$CUSTOM_PATH/jgit" || {
89+
rm -f "$CUSTOM_PATH/jgit"
90+
echo >&2 "JGit download (optional) failed"
91+
}
7992
;;
8093
esac
8194
;;

t/helper/meson.build

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ test_tool_sources = [
3737
'test-mktemp.c',
3838
'test-name-hash.c',
3939
'test-online-cpus.c',
40+
'test-pack-deltas.c',
4041
'test-pack-mtimes.c',
4142
'test-parse-options.c',
4243
'test-parse-pathspec-file.c',

t/helper/test-pack-deltas.c

+143
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
#define USE_THE_REPOSITORY_VARIABLE
2+
3+
#include "test-tool.h"
4+
#include "git-compat-util.h"
5+
#include "delta.h"
6+
#include "git-zlib.h"
7+
#include "hash.h"
8+
#include "hex.h"
9+
#include "pack.h"
10+
#include "pack-objects.h"
11+
#include "parse.h"
12+
#include "setup.h"
13+
#include "strbuf.h"
14+
#include "string-list.h"
15+
16+
static const char usage_str[] = "test-tool pack-deltas <nr_entries>";
17+
18+
static unsigned long do_compress(void **pptr, unsigned long size)
19+
{
20+
git_zstream stream;
21+
void *in, *out;
22+
unsigned long maxsize;
23+
24+
git_deflate_init(&stream, 1);
25+
maxsize = git_deflate_bound(&stream, size);
26+
27+
in = *pptr;
28+
out = xmalloc(maxsize);
29+
*pptr = out;
30+
31+
stream.next_in = in;
32+
stream.avail_in = size;
33+
stream.next_out = out;
34+
stream.avail_out = maxsize;
35+
while (git_deflate(&stream, Z_FINISH) == Z_OK)
36+
; /* nothing */
37+
git_deflate_end(&stream);
38+
39+
free(in);
40+
return stream.total_out;
41+
}
42+
43+
static void write_ref_delta(struct hashfile *f,
44+
struct object_id *oid,
45+
struct object_id *base)
46+
{
47+
unsigned char header[MAX_PACK_OBJECT_HEADER];
48+
unsigned long size, base_size, delta_size, compressed_size, hdrlen;
49+
enum object_type type;
50+
void *base_buf, *delta_buf;
51+
void *buf = repo_read_object_file(the_repository,
52+
oid, &type,
53+
&size);
54+
55+
if (!buf)
56+
die("unable to read %s", oid_to_hex(oid));
57+
58+
base_buf = repo_read_object_file(the_repository,
59+
base, &type,
60+
&base_size);
61+
62+
if (!base_buf)
63+
die("unable to read %s", oid_to_hex(base));
64+
65+
delta_buf = diff_delta(base_buf, base_size,
66+
buf, size, &delta_size, 0);
67+
68+
compressed_size = do_compress(&delta_buf, delta_size);
69+
70+
hdrlen = encode_in_pack_object_header(header, sizeof(header),
71+
OBJ_REF_DELTA, delta_size);
72+
hashwrite(f, header, hdrlen);
73+
hashwrite(f, base->hash, the_repository->hash_algo->rawsz);
74+
hashwrite(f, delta_buf, compressed_size);
75+
76+
free(buf);
77+
free(base_buf);
78+
free(delta_buf);
79+
}
80+
81+
int cmd__pack_deltas(int argc, const char **argv)
82+
{
83+
unsigned long n;
84+
struct hashfile *f;
85+
struct strbuf line = STRBUF_INIT;
86+
87+
if (argc != 2) {
88+
usage(usage_str);
89+
return -1;
90+
}
91+
92+
if (!git_parse_ulong(argv[1], &n) || n != (uint32_t)n)
93+
die("invalid number of objects: %s", argv[1]);
94+
95+
setup_git_directory();
96+
97+
f = hashfd(1, "<stdout>");
98+
write_pack_header(f, n);
99+
100+
/* Read each line from stdin into 'line' */
101+
while (strbuf_getline_lf(&line, stdin) != EOF) {
102+
const char *type_str, *content_oid_str, *base_oid_str = NULL;
103+
struct object_id content_oid, base_oid;
104+
struct string_list items = STRING_LIST_INIT_NODUP;
105+
/*
106+
* Tokenize into two or three parts:
107+
* 1. REF_DELTA, OFS_DELTA, or FULL.
108+
* 2. The object ID for the content object.
109+
* 3. The object ID for the base object (optional).
110+
*/
111+
if (string_list_split_in_place(&items, line.buf, " ", 3) < 0)
112+
die("invalid input format: %s", line.buf);
113+
114+
if (items.nr < 2)
115+
die("invalid input format: %s", line.buf);
116+
117+
type_str = items.items[0].string;
118+
content_oid_str = items.items[1].string;
119+
120+
if (get_oid_hex(content_oid_str, &content_oid))
121+
die("invalid object: %s", content_oid_str);
122+
if (items.nr >= 3) {
123+
base_oid_str = items.items[2].string;
124+
if (get_oid_hex(base_oid_str, &base_oid))
125+
die("invalid object: %s", base_oid_str);
126+
}
127+
string_list_clear(&items, 0);
128+
129+
if (!strcmp(type_str, "REF_DELTA"))
130+
write_ref_delta(f, &content_oid, &base_oid);
131+
else if (!strcmp(type_str, "OFS_DELTA"))
132+
die("OFS_DELTA not implemented");
133+
else if (!strcmp(type_str, "FULL"))
134+
die("FULL not implemented");
135+
else
136+
die("unknown pack type: %s", type_str);
137+
}
138+
139+
finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK,
140+
CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
141+
strbuf_release(&line);
142+
return 0;
143+
}

t/helper/test-tool.c

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ static struct test_cmd cmds[] = {
4747
{ "mktemp", cmd__mktemp },
4848
{ "name-hash", cmd__name_hash },
4949
{ "online-cpus", cmd__online_cpus },
50+
{ "pack-deltas", cmd__pack_deltas },
5051
{ "pack-mtimes", cmd__pack_mtimes },
5152
{ "parse-options", cmd__parse_options },
5253
{ "parse-options-flags", cmd__parse_options_flags },

t/helper/test-tool.h

+1
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ int cmd__mergesort(int argc, const char **argv);
4040
int cmd__mktemp(int argc, const char **argv);
4141
int cmd__name_hash(int argc, const char **argv);
4242
int cmd__online_cpus(int argc, const char **argv);
43+
int cmd__pack_deltas(int argc, const char **argv);
4344
int cmd__pack_mtimes(int argc, const char **argv);
4445
int cmd__parse_options(int argc, const char **argv);
4546
int cmd__parse_options_flags(int argc, const char **argv);

t/t5309-pack-delta-cycles.sh

+34-2
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,10 @@ test_expect_success 'index-pack detects REF_DELTA cycles' '
6060
test_expect_success 'failover to an object in another pack' '
6161
clear_packs &&
6262
git index-pack --stdin <ab.pack &&
63-
test_must_fail git index-pack --stdin --fix-thin <cycle.pack
63+
64+
# This cycle does not fail since the existence of A & B in
65+
# the repo allows us to resolve the cycle.
66+
git index-pack --stdin --fix-thin <cycle.pack
6467
'
6568

6669
test_expect_success 'failover to a duplicate object in the same pack' '
@@ -72,7 +75,36 @@ test_expect_success 'failover to a duplicate object in the same pack' '
7275
pack_obj $A
7376
} >recoverable.pack &&
7477
pack_trailer recoverable.pack &&
75-
test_must_fail git index-pack --fix-thin --stdin <recoverable.pack
78+
79+
# This cycle does not fail since the existence of a full copy
80+
# of A in the pack allows us to resolve the cycle.
81+
git index-pack --fix-thin --stdin <recoverable.pack
82+
'
83+
84+
test_expect_success 'index-pack works with thin pack A->B->C with B on disk' '
85+
git init server &&
86+
(
87+
cd server &&
88+
test_commit_bulk 4
89+
) &&
90+
91+
A=$(git -C server rev-parse HEAD^{tree}) &&
92+
B=$(git -C server rev-parse HEAD~1^{tree}) &&
93+
C=$(git -C server rev-parse HEAD~2^{tree}) &&
94+
git -C server reset --hard HEAD~1 &&
95+
96+
cat >in <<-EOF &&
97+
REF_DELTA $A $B
98+
REF_DELTA $B $C
99+
EOF
100+
101+
test-tool -C server pack-deltas 2 <in >thin.pack &&
102+
103+
git clone "file://$(pwd)/server" client &&
104+
(
105+
cd client &&
106+
git index-pack --fix-thin --stdin <../thin.pack
107+
)
76108
'
77109

78110
test_done

0 commit comments

Comments
 (0)