Skip to content

Commit 6dbc416

Browse files
committed
Merge branch 'ds/fix-thin-fix'
"git index-pack --fix-thin" used to abort to prevent a cycle in delta chains from forming in a corner case even when there is no such cycle. * ds/fix-thin-fix: index-pack: allow revisiting REF_DELTA chains t5309: create failing test for 'git index-pack' test-tool: add pack-deltas helper
2 parents a9d67d6 + 98f8854 commit 6dbc416

File tree

7 files changed

+216
-28
lines changed

7 files changed

+216
-28
lines changed

Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -819,6 +819,7 @@ TEST_BUILTINS_OBJS += test-mergesort.o
819819
TEST_BUILTINS_OBJS += test-mktemp.o
820820
TEST_BUILTINS_OBJS += test-name-hash.o
821821
TEST_BUILTINS_OBJS += test-online-cpus.o
822+
TEST_BUILTINS_OBJS += test-pack-deltas.o
822823
TEST_BUILTINS_OBJS += test-pack-mtimes.o
823824
TEST_BUILTINS_OBJS += test-parse-options.o
824825
TEST_BUILTINS_OBJS += test-parse-pathspec-file.o

builtin/index-pack.c

+32-26
Original file line numberDiff line numberDiff line change
@@ -1108,8 +1108,8 @@ static void *threaded_second_pass(void *data)
11081108
set_thread_data(data);
11091109
for (;;) {
11101110
struct base_data *parent = NULL;
1111-
struct object_entry *child_obj;
1112-
struct base_data *child;
1111+
struct object_entry *child_obj = NULL;
1112+
struct base_data *child = NULL;
11131113

11141114
counter_lock();
11151115
display_progress(progress, nr_resolved_deltas);
@@ -1136,15 +1136,18 @@ static void *threaded_second_pass(void *data)
11361136
parent = list_first_entry(&work_head, struct base_data,
11371137
list);
11381138

1139-
if (parent->ref_first <= parent->ref_last) {
1139+
while (parent->ref_first <= parent->ref_last) {
11401140
int offset = ref_deltas[parent->ref_first++].obj_no;
11411141
child_obj = objects + offset;
1142-
if (child_obj->real_type != OBJ_REF_DELTA)
1143-
die("REF_DELTA at offset %"PRIuMAX" already resolved (duplicate base %s?)",
1144-
(uintmax_t) child_obj->idx.offset,
1145-
oid_to_hex(&parent->obj->idx.oid));
1142+
if (child_obj->real_type != OBJ_REF_DELTA) {
1143+
child_obj = NULL;
1144+
continue;
1145+
}
11461146
child_obj->real_type = parent->obj->real_type;
1147-
} else {
1147+
break;
1148+
}
1149+
1150+
if (!child_obj && parent->ofs_first <= parent->ofs_last) {
11481151
child_obj = objects +
11491152
ofs_deltas[parent->ofs_first++].obj_no;
11501153
assert(child_obj->real_type == OBJ_OFS_DELTA);
@@ -1177,29 +1180,32 @@ static void *threaded_second_pass(void *data)
11771180
}
11781181
work_unlock();
11791182

1180-
if (parent) {
1181-
child = resolve_delta(child_obj, parent);
1182-
if (!child->children_remaining)
1183-
FREE_AND_NULL(child->data);
1184-
} else {
1185-
child = make_base(child_obj, NULL);
1186-
if (child->children_remaining) {
1187-
/*
1188-
* Since this child has its own delta children,
1189-
* we will need this data in the future.
1190-
* Inflate now so that future iterations will
1191-
* have access to this object's data while
1192-
* outside the work mutex.
1193-
*/
1194-
child->data = get_data_from_pack(child_obj);
1195-
child->size = child_obj->size;
1183+
if (child_obj) {
1184+
if (parent) {
1185+
child = resolve_delta(child_obj, parent);
1186+
if (!child->children_remaining)
1187+
FREE_AND_NULL(child->data);
1188+
} else{
1189+
child = make_base(child_obj, NULL);
1190+
if (child->children_remaining) {
1191+
/*
1192+
* Since this child has its own delta children,
1193+
* we will need this data in the future.
1194+
* Inflate now so that future iterations will
1195+
* have access to this object's data while
1196+
* outside the work mutex.
1197+
*/
1198+
child->data = get_data_from_pack(child_obj);
1199+
child->size = child_obj->size;
1200+
}
11961201
}
11971202
}
11981203

11991204
work_lock();
12001205
if (parent)
12011206
parent->retain_data--;
1202-
if (child->data) {
1207+
1208+
if (child && child->data) {
12031209
/*
12041210
* This child has its own children, so add it to
12051211
* work_head.
@@ -1208,7 +1214,7 @@ static void *threaded_second_pass(void *data)
12081214
base_cache_used += child->size;
12091215
prune_base_data(NULL);
12101216
free_base_data(child);
1211-
} else {
1217+
} else if (child) {
12121218
/*
12131219
* This child does not have its own children. It may be
12141220
* the last descendant of its ancestors; free those

t/helper/meson.build

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ test_tool_sources = [
3636
'test-mktemp.c',
3737
'test-name-hash.c',
3838
'test-online-cpus.c',
39+
'test-pack-deltas.c',
3940
'test-pack-mtimes.c',
4041
'test-parse-options.c',
4142
'test-parse-pathspec-file.c',

t/helper/test-pack-deltas.c

+148
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
#define USE_THE_REPOSITORY_VARIABLE
2+
3+
#include "test-tool.h"
4+
#include "git-compat-util.h"
5+
#include "delta.h"
6+
#include "git-zlib.h"
7+
#include "hash.h"
8+
#include "hex.h"
9+
#include "pack.h"
10+
#include "pack-objects.h"
11+
#include "parse-options.h"
12+
#include "setup.h"
13+
#include "strbuf.h"
14+
#include "string-list.h"
15+
16+
static const char *usage_str[] = {
17+
"test-tool pack-deltas --num-objects <num-objects>",
18+
NULL
19+
};
20+
21+
static unsigned long do_compress(void **pptr, unsigned long size)
22+
{
23+
git_zstream stream;
24+
void *in, *out;
25+
unsigned long maxsize;
26+
27+
git_deflate_init(&stream, 1);
28+
maxsize = git_deflate_bound(&stream, size);
29+
30+
in = *pptr;
31+
out = xmalloc(maxsize);
32+
*pptr = out;
33+
34+
stream.next_in = in;
35+
stream.avail_in = size;
36+
stream.next_out = out;
37+
stream.avail_out = maxsize;
38+
while (git_deflate(&stream, Z_FINISH) == Z_OK)
39+
; /* nothing */
40+
git_deflate_end(&stream);
41+
42+
free(in);
43+
return stream.total_out;
44+
}
45+
46+
static void write_ref_delta(struct hashfile *f,
47+
struct object_id *oid,
48+
struct object_id *base)
49+
{
50+
unsigned char header[MAX_PACK_OBJECT_HEADER];
51+
unsigned long size, base_size, delta_size, compressed_size, hdrlen;
52+
enum object_type type;
53+
void *base_buf, *delta_buf;
54+
void *buf = repo_read_object_file(the_repository,
55+
oid, &type,
56+
&size);
57+
58+
if (!buf)
59+
die("unable to read %s", oid_to_hex(oid));
60+
61+
base_buf = repo_read_object_file(the_repository,
62+
base, &type,
63+
&base_size);
64+
65+
if (!base_buf)
66+
die("unable to read %s", oid_to_hex(base));
67+
68+
delta_buf = diff_delta(base_buf, base_size,
69+
buf, size, &delta_size, 0);
70+
71+
compressed_size = do_compress(&delta_buf, delta_size);
72+
73+
hdrlen = encode_in_pack_object_header(header, sizeof(header),
74+
OBJ_REF_DELTA, delta_size);
75+
hashwrite(f, header, hdrlen);
76+
hashwrite(f, base->hash, the_repository->hash_algo->rawsz);
77+
hashwrite(f, delta_buf, compressed_size);
78+
79+
free(buf);
80+
free(base_buf);
81+
free(delta_buf);
82+
}
83+
84+
int cmd__pack_deltas(int argc, const char **argv)
85+
{
86+
int num_objects = -1;
87+
struct hashfile *f;
88+
struct strbuf line = STRBUF_INIT;
89+
struct option options[] = {
90+
OPT_INTEGER('n', "num-objects", &num_objects, N_("the number of objects to write")),
91+
OPT_END()
92+
};
93+
94+
argc = parse_options(argc, argv, NULL,
95+
options, usage_str, 0);
96+
97+
if (argc || num_objects < 0)
98+
usage_with_options(usage_str, options);
99+
100+
setup_git_directory();
101+
102+
f = hashfd(the_repository->hash_algo, 1, "<stdout>");
103+
write_pack_header(f, num_objects);
104+
105+
/* Read each line from stdin into 'line' */
106+
while (strbuf_getline_lf(&line, stdin) != EOF) {
107+
const char *type_str, *content_oid_str, *base_oid_str = NULL;
108+
struct object_id content_oid, base_oid;
109+
struct string_list items = STRING_LIST_INIT_NODUP;
110+
/*
111+
* Tokenize into two or three parts:
112+
* 1. REF_DELTA, OFS_DELTA, or FULL.
113+
* 2. The object ID for the content object.
114+
* 3. The object ID for the base object (optional).
115+
*/
116+
if (string_list_split_in_place(&items, line.buf, " ", 3) < 0)
117+
die("invalid input format: %s", line.buf);
118+
119+
if (items.nr < 2)
120+
die("invalid input format: %s", line.buf);
121+
122+
type_str = items.items[0].string;
123+
content_oid_str = items.items[1].string;
124+
125+
if (get_oid_hex(content_oid_str, &content_oid))
126+
die("invalid object: %s", content_oid_str);
127+
if (items.nr >= 3) {
128+
base_oid_str = items.items[2].string;
129+
if (get_oid_hex(base_oid_str, &base_oid))
130+
die("invalid object: %s", base_oid_str);
131+
}
132+
string_list_clear(&items, 0);
133+
134+
if (!strcmp(type_str, "REF_DELTA"))
135+
write_ref_delta(f, &content_oid, &base_oid);
136+
else if (!strcmp(type_str, "OFS_DELTA"))
137+
die("OFS_DELTA not implemented");
138+
else if (!strcmp(type_str, "FULL"))
139+
die("FULL not implemented");
140+
else
141+
die("unknown pack type: %s", type_str);
142+
}
143+
144+
finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK,
145+
CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
146+
strbuf_release(&line);
147+
return 0;
148+
}

t/helper/test-tool.c

+1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ static struct test_cmd cmds[] = {
4646
{ "mktemp", cmd__mktemp },
4747
{ "name-hash", cmd__name_hash },
4848
{ "online-cpus", cmd__online_cpus },
49+
{ "pack-deltas", cmd__pack_deltas },
4950
{ "pack-mtimes", cmd__pack_mtimes },
5051
{ "parse-options", cmd__parse_options },
5152
{ "parse-options-flags", cmd__parse_options_flags },

t/helper/test-tool.h

+1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ int cmd__mergesort(int argc, const char **argv);
3939
int cmd__mktemp(int argc, const char **argv);
4040
int cmd__name_hash(int argc, const char **argv);
4141
int cmd__online_cpus(int argc, const char **argv);
42+
int cmd__pack_deltas(int argc, const char **argv);
4243
int cmd__pack_mtimes(int argc, const char **argv);
4344
int cmd__parse_options(int argc, const char **argv);
4445
int cmd__parse_options_flags(int argc, const char **argv);

t/t5309-pack-delta-cycles.sh

+32-2
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,10 @@ test_expect_success 'index-pack detects REF_DELTA cycles' '
6060
test_expect_success 'failover to an object in another pack' '
6161
clear_packs &&
6262
git index-pack --stdin <ab.pack &&
63-
test_must_fail git index-pack --stdin --fix-thin <cycle.pack
63+
64+
# This cycle does not fail since the existence of A & B in
65+
# the repo allows us to resolve the cycle.
66+
git index-pack --stdin --fix-thin <cycle.pack
6467
'
6568

6669
test_expect_success 'failover to a duplicate object in the same pack' '
@@ -72,7 +75,34 @@ test_expect_success 'failover to a duplicate object in the same pack' '
7275
pack_obj $A
7376
} >recoverable.pack &&
7477
pack_trailer recoverable.pack &&
75-
test_must_fail git index-pack --fix-thin --stdin <recoverable.pack
78+
79+
# This cycle does not fail since the existence of a full copy
80+
# of A in the pack allows us to resolve the cycle.
81+
git index-pack --fix-thin --stdin <recoverable.pack
82+
'
83+
84+
test_expect_success 'index-pack works with thin pack A->B->C with B on disk' '
85+
git init server &&
86+
(
87+
cd server &&
88+
test_commit_bulk 4
89+
) &&
90+
91+
A=$(git -C server rev-parse HEAD^{tree}) &&
92+
B=$(git -C server rev-parse HEAD~1^{tree}) &&
93+
C=$(git -C server rev-parse HEAD~2^{tree}) &&
94+
git -C server reset --hard HEAD~1 &&
95+
96+
test-tool -C server pack-deltas --num-objects=2 >thin.pack <<-EOF &&
97+
REF_DELTA $A $B
98+
REF_DELTA $B $C
99+
EOF
100+
101+
git clone "file://$(pwd)/server" client &&
102+
(
103+
cd client &&
104+
git index-pack --fix-thin --stdin <../thin.pack
105+
)
76106
'
77107

78108
test_done

0 commit comments

Comments
 (0)