Message ID | 36c00613d9a6ad4fc768e15b9ec23f9af520338a.1638750965.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | ns/tmp-objdir: add support for temporary writable databases | expand |
"Neeraj Singh via GitGitGadget" <gitgitgadget@gmail.com> writes: > diff --git a/builtin/prune.c b/builtin/prune.c > index 485c9a3c56f..c2bcdc07db4 100644 > --- a/builtin/prune.c > +++ b/builtin/prune.c > @@ -26,10 +26,22 @@ static int prune_tmp_file(const char *fullpath) > return error("Could not stat '%s'", fullpath); > if (st.st_mtime > expire) > return 0; > - if (show_only || verbose) > - printf("Removing stale temporary file %s\n", fullpath); > - if (!show_only) > - unlink_or_warn(fullpath); > + if (S_ISDIR(st.st_mode)) { Because the updated tmp_objdir_create() always uses "tmp_objdir-" as the common prefix (instead of "incoming-" that we used to use, prune_cruft() will call this function not just for temporary files for loose objects, but also for directories. So a new code to do an equivalent of "rm -fr" is added here. OK. > + if (show_only || verbose) > + printf("Removing stale temporary directory %s\n", fullpath); > + if (!show_only) { > + struct strbuf remove_dir_buf = STRBUF_INIT; > + > + strbuf_addstr(&remove_dir_buf, fullpath); > + remove_dir_recursively(&remove_dir_buf, 0); > + strbuf_release(&remove_dir_buf); > + } > + } else { > + if (show_only || verbose) > + printf("Removing stale temporary file %s\n", fullpath); > + if (!show_only) > + unlink_or_warn(fullpath); > + } > return 0; > } > > diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c > index 49b846d9605..8815e24cde5 100644 > --- a/builtin/receive-pack.c > +++ b/builtin/receive-pack.c > @@ -2213,7 +2213,7 @@ static const char *unpack(int err_fd, struct shallow_info *si) > strvec_push(&child.args, alt_shallow_file); > } > > - tmp_objdir = tmp_objdir_create(); > + tmp_objdir = tmp_objdir_create("incoming"); > if (!tmp_objdir) { > if (err_fd > 0) > close(err_fd); > diff --git a/environment.c b/environment.c > index 9da7f3c1a19..342400fcaad 100644 > --- a/environment.c > +++ b/environment.c > @@ -17,6 +17,7 @@ > #include "commit.h" > #include "strvec.h" > #include "object-store.h" > +#include "tmp-objdir.h" > #include "chdir-notify.h" > #include "shallow.h" > > @@ -331,10 +332,14 @@ static void update_relative_gitdir(const char *name, > void *data) > { > char *path = reparent_relative_path(old_cwd, new_cwd, get_git_dir()); > + struct tmp_objdir *tmp_objdir = tmp_objdir_unapply_primary_odb(); > trace_printf_key(&trace_setup_key, > "setup: move $GIT_DIR to '%s'", > path); > + > set_git_dir_1(path); If a blank line needs to be added, have it between the variable declarations and the first statement (i.e. before the above call to "trace_printf_key()"). > + if (tmp_objdir) > + tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); > free(path); > } This is called during set_git_dir(), which happens fairly early in the set-up sequence. I wonder if there is a real use case that creates a tmp-objdir that early in the process to require this unapply-reapply sequence. > @@ -1809,8 +1846,11 @@ int hash_object_file(const struct git_hash_algo *algo, const void *buf, > /* Finalize a file on disk, and close it. */ > static void close_loose_object(int fd) > { > - if (fsync_object_files) > - fsync_or_die(fd, "loose object file"); > + if (!the_repository->objects->odb->will_destroy) { > + if (fsync_object_files) > + fsync_or_die(fd, "loose object file"); OK, so we omit fsync because these newly created loose objects may not survive and instead get discarded. Presumably when we migrate them to the real object store, we'll make sure they hit the disk platter in some other way? ... goes and cheats by reading ahead ... Ahh, ok, new objects created in a temporary object store that is marked with the will_destroy bit is not allowed to migrate to the real object store, so there is no point to fsync them. set_temporary_primary_odb() and tmp_objdir_replace_primary_odb() can mark the temporary one to be throw-away, but unfortunately there is no caller in this step, so it is a bit hard to see when a throw-away object store is useful. I guess remerge-diff wants to do tentative merges that create new objects in a throw-away object directory, because it is logically a read-only operation. > diff --git a/tmp-objdir.c b/tmp-objdir.c > index b8d880e3626..3d38eeab66b 100644 > --- a/tmp-objdir.c > +++ b/tmp-objdir.c > @@ -1,5 +1,6 @@ > #include "cache.h" > #include "tmp-objdir.h" > +#include "chdir-notify.h" > #include "dir.h" > #include "sigchain.h" > #include "string-list.h" > @@ -11,6 +12,8 @@ > struct tmp_objdir { > struct strbuf path; > struct strvec env; > + struct object_directory *prev_odb; > + int will_destroy; The other one was a one-bit unsigned bitfield, but this is a full integer. I somehow think that the other one can and should be a full integer, too---it's not like there are tons of bits need to be stored in the structure or we will have tons of instances of the structure that storing many bits compactly matters. Thanks.
On Sun, Dec 05, 2021 at 11:43:07PM -0800, Junio C Hamano wrote: > "Neeraj Singh via GitGitGadget" <gitgitgadget@gmail.com> writes: > > > @@ -331,10 +332,14 @@ static void update_relative_gitdir(const char *name, > > void *data) > > { > > char *path = reparent_relative_path(old_cwd, new_cwd, get_git_dir()); > > + struct tmp_objdir *tmp_objdir = tmp_objdir_unapply_primary_odb(); > > trace_printf_key(&trace_setup_key, > > "setup: move $GIT_DIR to '%s'", > > path); > > + > > set_git_dir_1(path); > > If a blank line needs to be added, have it between the variable > declarations and the first statement (i.e. before the above call to > "trace_printf_key()"). > Will fix. > > + if (tmp_objdir) > > + tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); > > free(path); > > } > > This is called during set_git_dir(), which happens fairly early in > the set-up sequence. I wonder if there is a real use case that > creates a tmp-objdir that early in the process to require this > unapply-reapply sequence. > The lack of this code was causing a failure, I believe in t2107-update-index-basic.sh: "--refresh triggers late setup_work_tree". This problem came up after applying: https://lore.kernel.org/git/4a40fd4a29a468b9ce320bc7b22f19e5a526fad6.1637020263.git.gitgitgadget@gmail.com/ I thought it would be best to fix this in the tmp-objdir code so that callers could plug/unplug bulk checkin without any subtle surprises. > > @@ -1809,8 +1846,11 @@ int hash_object_file(const struct git_hash_algo *algo, const void *buf, > > /* Finalize a file on disk, and close it. */ > > static void close_loose_object(int fd) > > { > > - if (fsync_object_files) > > - fsync_or_die(fd, "loose object file"); > > + if (!the_repository->objects->odb->will_destroy) { > > + if (fsync_object_files) > > + fsync_or_die(fd, "loose object file"); > > OK, so we omit fsync because these newly created loose objects may > not survive and instead get discarded. Presumably when we migrate > them to the real object store, we'll make sure they hit the disk > platter in some other way? > > ... goes and cheats by reading ahead ... > > Ahh, ok, new objects created in a temporary object store that is > marked with the will_destroy bit is not allowed to migrate to the > real object store, so there is no point to fsync them. > > set_temporary_primary_odb() and tmp_objdir_replace_primary_odb() can > mark the temporary one to be throw-away, but unfortunately there is > no caller in this step, so it is a bit hard to see when a throw-away > object store is useful. I guess remerge-diff wants to do tentative > merges that create new objects in a throw-away object directory, > because it is logically a read-only operation. > Yes, this code is there exactly for remerge-diff and anyone doing something similar in the future. > > diff --git a/tmp-objdir.c b/tmp-objdir.c > > index b8d880e3626..3d38eeab66b 100644 > > --- a/tmp-objdir.c > > +++ b/tmp-objdir.c > > @@ -1,5 +1,6 @@ > > #include "cache.h" > > #include "tmp-objdir.h" > > +#include "chdir-notify.h" > > #include "dir.h" > > #include "sigchain.h" > > #include "string-list.h" > > @@ -11,6 +12,8 @@ > > struct tmp_objdir { > > struct strbuf path; > > struct strvec env; > > + struct object_directory *prev_odb; > > + int will_destroy; > > The other one was a one-bit unsigned bitfield, but this is a full > integer. I somehow think that the other one can and should be a > full integer, too---it's not like there are tons of bits need to be > stored in the structure or we will have tons of instances of the > structure that storing many bits compactly matters. > The principle I was trying to follow here is that the only flag in a structure might as well be a full integer, but when we have two or more it might be worth combining them into a single machine word. Given that these are not highly replicated structures, you're right that's it's not a big benefit. I'll switch everything to an int and call it good. Given that this patch series introduces functions with no users, are you going to hold off on putting this into 'next' until another next-worthy patch series is ready? I've already reworked the batch mode stuff on Github, but I'll need to do a lot more testing before sending it to the list. Thanks, Neeraj
Neeraj Singh <nksingh85@gmail.com> writes: >> > + if (tmp_objdir) >> > + tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); >> > free(path); >> > } >> >> This is called during set_git_dir(), which happens fairly early in >> the set-up sequence. I wonder if there is a real use case that >> creates a tmp-objdir that early in the process to require this >> unapply-reapply sequence. >> > > The lack of this code was causing a failure, I believe in > t2107-update-index-basic.sh: "--refresh triggers late setup_work_tree". > > This problem came up after applying: https://lore.kernel.org/git/4a40fd4a29a468b9ce320bc7b22f19e5a526fad6.1637020263.git.gitgitgadget@gmail.com/ > > I thought it would be best to fix this in the tmp-objdir code so that > callers could plug/unplug bulk checkin without any subtle surprises. OK, I think that is fine. As a slightly-related tangent that is outside the topic, I think we should revisit "update-index", which is one of the oldest plumbing commands with its own quirks. I do not offhand see why it needs to sprinkle this many setup_work_tree() calls everywhere. Having an index to work on means we must have a working tree to update and/or refresh from. We should be able to get away with the NEED_WORK_TREE bit in the git.c::commands[] table for this command. If this were a more recent command, I may suspect that there were valid reasons like "in this particular mode, update-index must work inside a bare repository" to force us to take this unusual program structure, but because this is probably a lot older than NEED_WORK_TREE bit, I would not be surprised if the answer were "nobody noticed the ugliness so far". > Given that this patch series introduces functions with no users, are you > going to hold off on putting this into 'next' until another next-worthy > patch series is ready? Even without any existing callers, as long as we see Reviewed-by: by Elijah, who we know will have to build on top of this series, I think this can and should go to 'next'. Thanks.
diff --git a/builtin/prune.c b/builtin/prune.c index 485c9a3c56f..c2bcdc07db4 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -26,10 +26,22 @@ static int prune_tmp_file(const char *fullpath) return error("Could not stat '%s'", fullpath); if (st.st_mtime > expire) return 0; - if (show_only || verbose) - printf("Removing stale temporary file %s\n", fullpath); - if (!show_only) - unlink_or_warn(fullpath); + if (S_ISDIR(st.st_mode)) { + if (show_only || verbose) + printf("Removing stale temporary directory %s\n", fullpath); + if (!show_only) { + struct strbuf remove_dir_buf = STRBUF_INIT; + + strbuf_addstr(&remove_dir_buf, fullpath); + remove_dir_recursively(&remove_dir_buf, 0); + strbuf_release(&remove_dir_buf); + } + } else { + if (show_only || verbose) + printf("Removing stale temporary file %s\n", fullpath); + if (!show_only) + unlink_or_warn(fullpath); + } return 0; } diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 49b846d9605..8815e24cde5 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -2213,7 +2213,7 @@ static const char *unpack(int err_fd, struct shallow_info *si) strvec_push(&child.args, alt_shallow_file); } - tmp_objdir = tmp_objdir_create(); + tmp_objdir = tmp_objdir_create("incoming"); if (!tmp_objdir) { if (err_fd > 0) close(err_fd); diff --git a/environment.c b/environment.c index 9da7f3c1a19..342400fcaad 100644 --- a/environment.c +++ b/environment.c @@ -17,6 +17,7 @@ #include "commit.h" #include "strvec.h" #include "object-store.h" +#include "tmp-objdir.h" #include "chdir-notify.h" #include "shallow.h" @@ -331,10 +332,14 @@ static void update_relative_gitdir(const char *name, void *data) { char *path = reparent_relative_path(old_cwd, new_cwd, get_git_dir()); + struct tmp_objdir *tmp_objdir = tmp_objdir_unapply_primary_odb(); trace_printf_key(&trace_setup_key, "setup: move $GIT_DIR to '%s'", path); + set_git_dir_1(path); + if (tmp_objdir) + tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); free(path); } diff --git a/object-file.c b/object-file.c index c3d866a287e..0b6a61aeaff 100644 --- a/object-file.c +++ b/object-file.c @@ -683,6 +683,43 @@ void add_to_alternates_memory(const char *reference) '\n', NULL, 0); } +struct object_directory *set_temporary_primary_odb(const char *dir, int will_destroy) +{ + struct object_directory *new_odb; + + /* + * Make sure alternates are initialized, or else our entry may be + * overwritten when they are. + */ + prepare_alt_odb(the_repository); + + /* + * Make a new primary odb and link the old primary ODB in as an + * alternate + */ + new_odb = xcalloc(1, sizeof(*new_odb)); + new_odb->path = xstrdup(dir); + new_odb->will_destroy = will_destroy; + new_odb->next = the_repository->objects->odb; + the_repository->objects->odb = new_odb; + return new_odb->next; +} + +void restore_primary_odb(struct object_directory *restore_odb, const char *old_path) +{ + struct object_directory *cur_odb = the_repository->objects->odb; + + if (strcmp(old_path, cur_odb->path)) + BUG("expected %s as primary object store; found %s", + old_path, cur_odb->path); + + if (cur_odb->next != restore_odb) + BUG("we expect the old primary object store to be the first alternate"); + + the_repository->objects->odb = restore_odb; + free_object_directory(cur_odb); +} + /* * Compute the exact path an alternate is at and returns it. In case of * error NULL is returned and the human readable error is added to `err` @@ -1809,8 +1846,11 @@ int hash_object_file(const struct git_hash_algo *algo, const void *buf, /* Finalize a file on disk, and close it. */ static void close_loose_object(int fd) { - if (fsync_object_files) - fsync_or_die(fd, "loose object file"); + if (!the_repository->objects->odb->will_destroy) { + if (fsync_object_files) + fsync_or_die(fd, "loose object file"); + } + if (close(fd) != 0) die_errno(_("error when closing loose object file")); } diff --git a/object-store.h b/object-store.h index 952efb6a4be..82cf13f1054 100644 --- a/object-store.h +++ b/object-store.h @@ -27,6 +27,11 @@ struct object_directory { uint32_t loose_objects_subdir_seen[8]; /* 256 bits */ struct oidtree *loose_objects_cache; + /* + * This object store is ephemeral, so there is no need to fsync. + */ + unsigned int will_destroy : 1; + /* * Path to the alternative object store. If this is a relative path, * it is relative to the current working directory. @@ -58,6 +63,17 @@ void add_to_alternates_file(const char *dir); */ void add_to_alternates_memory(const char *dir); +/* + * Replace the current writable object directory with the specified temporary + * object directory; returns the former primary object directory. + */ +struct object_directory *set_temporary_primary_odb(const char *dir, int will_destroy); + +/* + * Restore a previous ODB replaced by set_temporary_main_odb. + */ +void restore_primary_odb(struct object_directory *restore_odb, const char *old_path); + /* * Populate and return the loose object cache array corresponding to the * given object ID. @@ -68,6 +84,9 @@ struct oidtree *odb_loose_cache(struct object_directory *odb, /* Empty the loose object cache for the specified object directory. */ void odb_clear_loose_cache(struct object_directory *odb); +/* Clear and free the specified object directory */ +void free_object_directory(struct object_directory *odb); + struct packed_git { struct hashmap_entry packmap_ent; struct packed_git *next; diff --git a/object.c b/object.c index 23a24e678a8..048f96a260e 100644 --- a/object.c +++ b/object.c @@ -513,7 +513,7 @@ struct raw_object_store *raw_object_store_new(void) return o; } -static void free_object_directory(struct object_directory *odb) +void free_object_directory(struct object_directory *odb) { free(odb->path); odb_clear_loose_cache(odb); diff --git a/tmp-objdir.c b/tmp-objdir.c index b8d880e3626..3d38eeab66b 100644 --- a/tmp-objdir.c +++ b/tmp-objdir.c @@ -1,5 +1,6 @@ #include "cache.h" #include "tmp-objdir.h" +#include "chdir-notify.h" #include "dir.h" #include "sigchain.h" #include "string-list.h" @@ -11,6 +12,8 @@ struct tmp_objdir { struct strbuf path; struct strvec env; + struct object_directory *prev_odb; + int will_destroy; }; /* @@ -38,6 +41,9 @@ static int tmp_objdir_destroy_1(struct tmp_objdir *t, int on_signal) if (t == the_tmp_objdir) the_tmp_objdir = NULL; + if (!on_signal && t->prev_odb) + restore_primary_odb(t->prev_odb, t->path.buf); + /* * This may use malloc via strbuf_grow(), but we should * have pre-grown t->path sufficiently so that this @@ -52,6 +58,7 @@ static int tmp_objdir_destroy_1(struct tmp_objdir *t, int on_signal) */ if (!on_signal) tmp_objdir_free(t); + return err; } @@ -121,7 +128,7 @@ static int setup_tmp_objdir(const char *root) return ret; } -struct tmp_objdir *tmp_objdir_create(void) +struct tmp_objdir *tmp_objdir_create(const char *prefix) { static int installed_handlers; struct tmp_objdir *t; @@ -129,11 +136,16 @@ struct tmp_objdir *tmp_objdir_create(void) if (the_tmp_objdir) BUG("only one tmp_objdir can be used at a time"); - t = xmalloc(sizeof(*t)); + t = xcalloc(1, sizeof(*t)); strbuf_init(&t->path, 0); strvec_init(&t->env); - strbuf_addf(&t->path, "%s/incoming-XXXXXX", get_object_directory()); + /* + * Use a string starting with tmp_ so that the builtin/prune.c code + * can recognize any stale objdirs left behind by a crash and delete + * them. + */ + strbuf_addf(&t->path, "%s/tmp_objdir-%s-XXXXXX", get_object_directory(), prefix); /* * Grow the strbuf beyond any filename we expect to be placed in it. @@ -269,6 +281,13 @@ int tmp_objdir_migrate(struct tmp_objdir *t) if (!t) return 0; + if (t->prev_odb) { + if (the_repository->objects->odb->will_destroy) + BUG("migrating an ODB that was marked for destruction"); + restore_primary_odb(t->prev_odb, t->path.buf); + t->prev_odb = NULL; + } + strbuf_addbuf(&src, &t->path); strbuf_addstr(&dst, get_object_directory()); @@ -292,3 +311,33 @@ void tmp_objdir_add_as_alternate(const struct tmp_objdir *t) { add_to_alternates_memory(t->path.buf); } + +void tmp_objdir_replace_primary_odb(struct tmp_objdir *t, int will_destroy) +{ + if (t->prev_odb) + BUG("the primary object database is already replaced"); + t->prev_odb = set_temporary_primary_odb(t->path.buf, will_destroy); + t->will_destroy = will_destroy; +} + +struct tmp_objdir *tmp_objdir_unapply_primary_odb(void) +{ + if (!the_tmp_objdir || !the_tmp_objdir->prev_odb) + return NULL; + + restore_primary_odb(the_tmp_objdir->prev_odb, the_tmp_objdir->path.buf); + the_tmp_objdir->prev_odb = NULL; + return the_tmp_objdir; +} + +void tmp_objdir_reapply_primary_odb(struct tmp_objdir *t, const char *old_cwd, + const char *new_cwd) +{ + char *path; + + path = reparent_relative_path(old_cwd, new_cwd, t->path.buf); + strbuf_reset(&t->path); + strbuf_addstr(&t->path, path); + free(path); + tmp_objdir_replace_primary_odb(t, t->will_destroy); +} diff --git a/tmp-objdir.h b/tmp-objdir.h index b1e45b4c75d..a3145051f25 100644 --- a/tmp-objdir.h +++ b/tmp-objdir.h @@ -10,7 +10,7 @@ * * Example: * - * struct tmp_objdir *t = tmp_objdir_create(); + * struct tmp_objdir *t = tmp_objdir_create("incoming"); * if (!run_command_v_opt_cd_env(cmd, 0, NULL, tmp_objdir_env(t)) && * !tmp_objdir_migrate(t)) * printf("success!\n"); @@ -22,9 +22,10 @@ struct tmp_objdir; /* - * Create a new temporary object directory; returns NULL on failure. + * Create a new temporary object directory with the specified prefix; + * returns NULL on failure. */ -struct tmp_objdir *tmp_objdir_create(void); +struct tmp_objdir *tmp_objdir_create(const char *prefix); /* * Return a list of environment strings, suitable for use with @@ -51,4 +52,26 @@ int tmp_objdir_destroy(struct tmp_objdir *); */ void tmp_objdir_add_as_alternate(const struct tmp_objdir *); +/* + * Replaces the main object store in the current process with the temporary + * object directory and makes the former main object store an alternate. + * If will_destroy is nonzero, the object directory may not be migrated. + */ +void tmp_objdir_replace_primary_odb(struct tmp_objdir *, int will_destroy); + +/* + * If the primary object database was replaced by a temporary object directory, + * restore it to its original value while keeping the directory contents around. + * Returns NULL if the primary object database was not replaced. + */ +struct tmp_objdir *tmp_objdir_unapply_primary_odb(void); + +/* + * Reapplies the former primary temporary object database, after protentially + * changing its relative path. + */ +void tmp_objdir_reapply_primary_odb(struct tmp_objdir *, const char *old_cwd, + const char *new_cwd); + + #endif /* TMP_OBJDIR_H */