diff --git a/Makefile b/Makefile index 13f9062a056944..c4d21ccd3d1b6b 100644 --- a/Makefile +++ b/Makefile @@ -821,6 +821,7 @@ TEST_BUILTINS_OBJS += test-mergesort.o TEST_BUILTINS_OBJS += test-mktemp.o TEST_BUILTINS_OBJS += test-name-hash.o TEST_BUILTINS_OBJS += test-online-cpus.o +TEST_BUILTINS_OBJS += test-pack-deltas.o TEST_BUILTINS_OBJS += test-pack-mtimes.o TEST_BUILTINS_OBJS += test-parse-options.o TEST_BUILTINS_OBJS += test-parse-pathspec-file.o diff --git a/builtin/index-pack.c b/builtin/index-pack.c index de127c0ff13a28..dbe79701fb8b6f 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1109,8 +1109,8 @@ static void *threaded_second_pass(void *data) set_thread_data(data); for (;;) { struct base_data *parent = NULL; - struct object_entry *child_obj; - struct base_data *child; + struct object_entry *child_obj = NULL; + struct base_data *child = NULL; counter_lock(); display_progress(progress, nr_resolved_deltas); @@ -1137,15 +1137,18 @@ static void *threaded_second_pass(void *data) parent = list_first_entry(&work_head, struct base_data, list); - if (parent->ref_first <= parent->ref_last) { + while (parent->ref_first <= parent->ref_last) { int offset = ref_deltas[parent->ref_first++].obj_no; child_obj = objects + offset; - if (child_obj->real_type != OBJ_REF_DELTA) - die("REF_DELTA at offset %"PRIuMAX" already resolved (duplicate base %s?)", - (uintmax_t) child_obj->idx.offset, - oid_to_hex(&parent->obj->idx.oid)); + if (child_obj->real_type != OBJ_REF_DELTA) { + child_obj = NULL; + continue; + } child_obj->real_type = parent->obj->real_type; - } else { + break; + } + + if (!child_obj && parent->ofs_first <= parent->ofs_last) { child_obj = objects + ofs_deltas[parent->ofs_first++].obj_no; assert(child_obj->real_type == OBJ_OFS_DELTA); @@ -1178,29 +1181,32 @@ static void *threaded_second_pass(void *data) } work_unlock(); - if (parent) { - child = resolve_delta(child_obj, parent); - if (!child->children_remaining) - FREE_AND_NULL(child->data); - } else { - child = make_base(child_obj, NULL); - if (child->children_remaining) { - /* - * Since this child has its own delta children, - * we will need this data in the future. - * Inflate now so that future iterations will - * have access to this object's data while - * outside the work mutex. - */ - child->data = get_data_from_pack(child_obj); - child->size = child_obj->size; + if (child_obj) { + if (parent) { + child = resolve_delta(child_obj, parent); + if (!child->children_remaining) + FREE_AND_NULL(child->data); + } else{ + child = make_base(child_obj, NULL); + if (child->children_remaining) { + /* + * Since this child has its own delta children, + * we will need this data in the future. + * Inflate now so that future iterations will + * have access to this object's data while + * outside the work mutex. + */ + child->data = get_data_from_pack(child_obj); + child->size = child_obj->size; + } } } work_lock(); if (parent) parent->retain_data--; - if (child->data) { + + if (child && child->data) { /* * This child has its own children, so add it to * work_head. @@ -1209,7 +1215,7 @@ static void *threaded_second_pass(void *data) base_cache_used += child->size; prune_base_data(NULL); free_base_data(child); - } else { + } else if (child) { /* * This child does not have its own children. It may be * the last descendant of its ancestors; free those diff --git a/t/helper/meson.build b/t/helper/meson.build index d2cabaa2bcfcc9..d4e8b26df8d6de 100644 --- a/t/helper/meson.build +++ b/t/helper/meson.build @@ -36,6 +36,7 @@ test_tool_sources = [ 'test-mktemp.c', 'test-name-hash.c', 'test-online-cpus.c', + 'test-pack-deltas.c', 'test-pack-mtimes.c', 'test-parse-options.c', 'test-parse-pathspec-file.c', diff --git a/t/helper/test-pack-deltas.c b/t/helper/test-pack-deltas.c new file mode 100644 index 00000000000000..4caa024b1ebe73 --- /dev/null +++ b/t/helper/test-pack-deltas.c @@ -0,0 +1,148 @@ +#define USE_THE_REPOSITORY_VARIABLE + +#include "test-tool.h" +#include "git-compat-util.h" +#include "delta.h" +#include "git-zlib.h" +#include "hash.h" +#include "hex.h" +#include "pack.h" +#include "pack-objects.h" +#include "parse-options.h" +#include "setup.h" +#include "strbuf.h" +#include "string-list.h" + +static const char *usage_str[] = { + "test-tool pack-deltas --num-objects ", + NULL +}; + +static unsigned long do_compress(void **pptr, unsigned long size) +{ + git_zstream stream; + void *in, *out; + unsigned long maxsize; + + git_deflate_init(&stream, 1); + maxsize = git_deflate_bound(&stream, size); + + in = *pptr; + out = xmalloc(maxsize); + *pptr = out; + + stream.next_in = in; + stream.avail_in = size; + stream.next_out = out; + stream.avail_out = maxsize; + while (git_deflate(&stream, Z_FINISH) == Z_OK) + ; /* nothing */ + git_deflate_end(&stream); + + free(in); + return stream.total_out; +} + +static void write_ref_delta(struct hashfile *f, + struct object_id *oid, + struct object_id *base) +{ + unsigned char header[MAX_PACK_OBJECT_HEADER]; + unsigned long size, base_size, delta_size, compressed_size, hdrlen; + enum object_type type; + void *base_buf, *delta_buf; + void *buf = repo_read_object_file(the_repository, + oid, &type, + &size); + + if (!buf) + die("unable to read %s", oid_to_hex(oid)); + + base_buf = repo_read_object_file(the_repository, + base, &type, + &base_size); + + if (!base_buf) + die("unable to read %s", oid_to_hex(base)); + + delta_buf = diff_delta(base_buf, base_size, + buf, size, &delta_size, 0); + + compressed_size = do_compress(&delta_buf, delta_size); + + hdrlen = encode_in_pack_object_header(header, sizeof(header), + OBJ_REF_DELTA, delta_size); + hashwrite(f, header, hdrlen); + hashwrite(f, base->hash, the_repository->hash_algo->rawsz); + hashwrite(f, delta_buf, compressed_size); + + free(buf); + free(base_buf); + free(delta_buf); +} + +int cmd__pack_deltas(int argc, const char **argv) +{ + int num_objects = -1; + struct hashfile *f; + struct strbuf line = STRBUF_INIT; + struct option options[] = { + OPT_INTEGER('n', "num-objects", &num_objects, N_("the number of objects to write")), + OPT_END() + }; + + argc = parse_options(argc, argv, NULL, + options, usage_str, 0); + + if (argc || num_objects < 0) + usage_with_options(usage_str, options); + + setup_git_directory(); + + f = hashfd(the_repository->hash_algo, 1, ""); + write_pack_header(f, num_objects); + + /* Read each line from stdin into 'line' */ + while (strbuf_getline_lf(&line, stdin) != EOF) { + const char *type_str, *content_oid_str, *base_oid_str = NULL; + struct object_id content_oid, base_oid; + struct string_list items = STRING_LIST_INIT_NODUP; + /* + * Tokenize into two or three parts: + * 1. REF_DELTA, OFS_DELTA, or FULL. + * 2. The object ID for the content object. + * 3. The object ID for the base object (optional). + */ + if (string_list_split_in_place(&items, line.buf, " ", 3) < 0) + die("invalid input format: %s", line.buf); + + if (items.nr < 2) + die("invalid input format: %s", line.buf); + + type_str = items.items[0].string; + content_oid_str = items.items[1].string; + + if (get_oid_hex(content_oid_str, &content_oid)) + die("invalid object: %s", content_oid_str); + if (items.nr >= 3) { + base_oid_str = items.items[2].string; + if (get_oid_hex(base_oid_str, &base_oid)) + die("invalid object: %s", base_oid_str); + } + string_list_clear(&items, 0); + + if (!strcmp(type_str, "REF_DELTA")) + write_ref_delta(f, &content_oid, &base_oid); + else if (!strcmp(type_str, "OFS_DELTA")) + die("OFS_DELTA not implemented"); + else if (!strcmp(type_str, "FULL")) + die("FULL not implemented"); + else + die("unknown pack type: %s", type_str); + } + + finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK, + CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); + strbuf_release(&line); + return 0; +} diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index 50dc4dac4ed625..74812ed86d385a 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -46,6 +46,7 @@ static struct test_cmd cmds[] = { { "mktemp", cmd__mktemp }, { "name-hash", cmd__name_hash }, { "online-cpus", cmd__online_cpus }, + { "pack-deltas", cmd__pack_deltas }, { "pack-mtimes", cmd__pack_mtimes }, { "parse-options", cmd__parse_options }, { "parse-options-flags", cmd__parse_options_flags }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index 6d62a5b53d9596..2571a3ccfe8991 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -39,6 +39,7 @@ int cmd__mergesort(int argc, const char **argv); int cmd__mktemp(int argc, const char **argv); int cmd__name_hash(int argc, const char **argv); int cmd__online_cpus(int argc, const char **argv); +int cmd__pack_deltas(int argc, const char **argv); int cmd__pack_mtimes(int argc, const char **argv); int cmd__parse_options(int argc, const char **argv); int cmd__parse_options_flags(int argc, const char **argv); diff --git a/t/t5309-pack-delta-cycles.sh b/t/t5309-pack-delta-cycles.sh index 60fc710bacb20e..6b03675d91b5e1 100755 --- a/t/t5309-pack-delta-cycles.sh +++ b/t/t5309-pack-delta-cycles.sh @@ -60,7 +60,10 @@ test_expect_success 'index-pack detects REF_DELTA cycles' ' test_expect_success 'failover to an object in another pack' ' clear_packs && git index-pack --stdin recoverable.pack && pack_trailer recoverable.pack && - test_must_fail git index-pack --fix-thin --stdin B->C with B on disk' ' + git init server && + ( + cd server && + test_commit_bulk 4 + ) && + + A=$(git -C server rev-parse HEAD^{tree}) && + B=$(git -C server rev-parse HEAD~1^{tree}) && + C=$(git -C server rev-parse HEAD~2^{tree}) && + git -C server reset --hard HEAD~1 && + + test-tool -C server pack-deltas --num-objects=2 >thin.pack <<-EOF && + REF_DELTA $A $B + REF_DELTA $B $C + EOF + + git clone "file://$(pwd)/server" client && + ( + cd client && + git index-pack --fix-thin --stdin <../thin.pack + ) ' test_done