Message ID | deece9eb70e9750bb8350946679b521e59139fe2.1638224692.git.me@ttaylorr.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | cruft packs | expand |
On 11/29/2021 5:25 PM, Taylor Blau wrote:> @@ -168,6 +168,9 @@ struct packing_data { > /* delta islands */ > unsigned int *tree_depth; > unsigned char *layer; > + > + /* cruft packs */ > + uint32_t *cruft_mtime; This comment is a bit terse. Perhaps... /* Used when writing cruft packs. */ > +static inline uint32_t oe_cruft_mtime(struct packing_data *pack, > + struct object_entry *e) > +{ > + if (!pack->cruft_mtime) > + return 0; > + return pack->cruft_mtime[e - pack->objects]; > +} When writing a pack, it appears that the cruft_mtime array maps to objects in pack-order, not idx-order, correct? That might be worth mentioning in the struct definition because it differs from the .mtimes file. > +static void write_mtimes_objects(struct hashfile *f, > + struct packing_data *to_pack, > + struct pack_idx_entry **objects, > + uint32_t nr_objects) > +{ > + uint32_t i; > + for (i = 0; i < nr_objects; i++) { > + struct object_entry *e = (struct object_entry*)objects[i]; > + hashwrite_be32(f, oe_cruft_mtime(to_pack, e)); > + } The name "objects" here confused me at first, thinking it corresponded to the objects member of 'struct packing_data', but that is being handled by the fact that 'objects' is actually a lex-sorted list of pack_idx_entry pointers (and they happen to also point to 'struct object_entry' values because the 'struct pack_idx_entry' is the first member. So this is (very densely) handling the translation from pack-order to lex-order through the double pointer 'objects'. I'm not sure if there is a way to make it more clear or if every reader will need to do the same mental gymnastics I had to do. > +} > + > +static void write_mtimes_trailer(struct hashfile *f, const unsigned char *hash) > +{ > + hashwrite(f, hash, the_hash_algo->rawsz); > +} > + > +static const char *write_mtimes_file(const char *mtimes_name, > + struct packing_data *to_pack, > + struct pack_idx_entry **objects, > + uint32_t nr_objects, > + const unsigned char *hash) > +{ > + struct hashfile *f; > + int fd; > + > + if (!to_pack) > + BUG("cannot call write_mtimes_file with NULL packing_data"); > + > + if (!mtimes_name) { > + struct strbuf tmp_file = STRBUF_INIT; > + fd = odb_mkstemp(&tmp_file, "pack/tmp_mtimes_XXXXXX"); > + mtimes_name = strbuf_detach(&tmp_file, NULL); > + } else { > + unlink(mtimes_name); > + fd = xopen(mtimes_name, O_CREAT|O_EXCL|O_WRONLY, 0600); > + } > + f = hashfd(fd, mtimes_name); > + > + write_mtimes_header(f); > + write_mtimes_objects(f, to_pack, objects, nr_objects); > + write_mtimes_trailer(f, hash); > + > + if (mtimes_name && adjust_shared_perm(mtimes_name) < 0) > + die(_("failed to make %s readable"), mtimes_name); What could cause 'mtimes_name' to be NULL here? It seems that it would be initialized in the "if (!mtimes_name)" block above. > + > + finalize_hashfile(f, NULL, > + CSUM_HASH_IN_STREAM | CSUM_CLOSE | CSUM_FSYNC); > + > + return mtimes_name; Note that you return the name here... > + if (pack_idx_opts->flags & WRITE_MTIMES) { > + mtimes_tmp_name = write_mtimes_file(NULL, to_pack, written_list, > + nr_written, > + hash); > + if (adjust_shared_perm(mtimes_tmp_name)) > + die_errno("unable to make temporary mtimes file readable"); ...and then adjust the perms again. I think that this adjustment is redundant, because it already happened within the write_mtimes_file() method. > + } > + > rename_tmp_packfile(name_buffer, pack_tmp_name, "pack"); > if (rev_tmp_name) > rename_tmp_packfile(name_buffer, rev_tmp_name, "rev"); > + if (mtimes_tmp_name) > + rename_tmp_packfile(name_buffer, mtimes_tmp_name, "mtimes"); And then it is finally renamed here, if it had a temporary name to start. Thanks, -Stolee
On Thu, Dec 02, 2021 at 10:36:16AM -0500, Derrick Stolee wrote: > On 11/29/2021 5:25 PM, Taylor Blau wrote:> @@ -168,6 +168,9 @@ struct packing_data { > > /* delta islands */ > > unsigned int *tree_depth; > > unsigned char *layer; > > + > > + /* cruft packs */ > > + uint32_t *cruft_mtime; > > This comment is a bit terse. Perhaps... > > /* Used when writing cruft packs. */ Sure; here I was imitating the terseness of the "delta islands" comment a few lines above. But I don't mind changing it here. > > +static inline uint32_t oe_cruft_mtime(struct packing_data *pack, > > + struct object_entry *e) > > +{ > > + if (!pack->cruft_mtime) > > + return 0; > > + return pack->cruft_mtime[e - pack->objects]; > > +} > > When writing a pack, it appears that the cruft_mtime array > maps to objects in pack-order, not idx-order, correct? That > might be worth mentioning in the struct definition because > it differs from the .mtimes file. Great observation and suggestion, thank you! The comment that I ultimately settled on is: /* * Used when writing cruft packs. * * Object mtimes are stored in pack order when writing, but * written out in lexicographic (index) order. */ uint32_t *cruft_mtime; > > +static void write_mtimes_objects(struct hashfile *f, > > + struct packing_data *to_pack, > > + struct pack_idx_entry **objects, > > + uint32_t nr_objects) > > +{ > > + uint32_t i; > > + for (i = 0; i < nr_objects; i++) { > > + struct object_entry *e = (struct object_entry*)objects[i]; > > + hashwrite_be32(f, oe_cruft_mtime(to_pack, e)); > > + } > > The name "objects" here confused me at first, thinking it > corresponded to the objects member of 'struct packing_data', but > that is being handled by the fact that 'objects' is actually a > lex-sorted list of pack_idx_entry pointers (and they happen to > also point to 'struct object_entry' values because the 'struct > pack_idx_entry' is the first member. > > So this is (very densely) handling the translation from pack-order > to lex-order through the double pointer 'objects'. I'm not sure if > there is a way to make it more clear or if every reader will need > to do the same mental gymnastics I had to do. Exactly, and sorry that I didn't point this out more clearly. It's been long enough since I wrote this code that I can sympathize with the mental gymnastics required ;). > > +} > > + > > +static void write_mtimes_trailer(struct hashfile *f, const unsigned char *hash) > > +{ > > + hashwrite(f, hash, the_hash_algo->rawsz); > > +} > > + > > +static const char *write_mtimes_file(const char *mtimes_name, > > + struct packing_data *to_pack, > > + struct pack_idx_entry **objects, > > + uint32_t nr_objects, > > + const unsigned char *hash) > > +{ > > + struct hashfile *f; > > + int fd; > > + > > + if (!to_pack) > > + BUG("cannot call write_mtimes_file with NULL packing_data"); > > + > > + if (!mtimes_name) { > > + struct strbuf tmp_file = STRBUF_INIT; > > + fd = odb_mkstemp(&tmp_file, "pack/tmp_mtimes_XXXXXX"); > > + mtimes_name = strbuf_detach(&tmp_file, NULL); > > + } else { > > + unlink(mtimes_name); > > + fd = xopen(mtimes_name, O_CREAT|O_EXCL|O_WRONLY, 0600); > > + } > > + f = hashfd(fd, mtimes_name); > > + > > + write_mtimes_header(f); > > + write_mtimes_objects(f, to_pack, objects, nr_objects); > > + write_mtimes_trailer(f, hash); > > + > > + if (mtimes_name && adjust_shared_perm(mtimes_name) < 0) > > + die(_("failed to make %s readable"), mtimes_name); > > What could cause 'mtimes_name' to be NULL here? It seems that it would > be initialized in the "if (!mtimes_name)" block above. You're right, it's impossible for it to be NULL here. I'll remove the redundant side of the &&-expression here. > > + > > + finalize_hashfile(f, NULL, > > + CSUM_HASH_IN_STREAM | CSUM_CLOSE | CSUM_FSYNC); > > + > > + return mtimes_name; > > Note that you return the name here... > > > + if (pack_idx_opts->flags & WRITE_MTIMES) { > > + mtimes_tmp_name = write_mtimes_file(NULL, to_pack, written_list, > > + nr_written, > > + hash); > > + if (adjust_shared_perm(mtimes_tmp_name)) > > + die_errno("unable to make temporary mtimes file readable"); > > ...and then adjust the perms again. I think that this adjustment is > redundant, because it already happened within the write_mtimes_file() > method. Yep, thanks. I'll clean it up here to just call adjust_shared_perm() witin write_mtimes_file(). Thanks, Taylor
diff --git a/pack-objects.c b/pack-objects.c index fe2a4eace9..272e8d4517 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -170,6 +170,9 @@ struct object_entry *packlist_alloc(struct packing_data *pdata, if (pdata->layer) REALLOC_ARRAY(pdata->layer, pdata->nr_alloc); + + if (pdata->cruft_mtime) + REALLOC_ARRAY(pdata->cruft_mtime, pdata->nr_alloc); } new_entry = pdata->objects + pdata->nr_objects++; @@ -198,6 +201,9 @@ struct object_entry *packlist_alloc(struct packing_data *pdata, if (pdata->layer) pdata->layer[pdata->nr_objects - 1] = 0; + if (pdata->cruft_mtime) + pdata->cruft_mtime[pdata->nr_objects - 1] = 0; + return new_entry; } diff --git a/pack-objects.h b/pack-objects.h index dca2351ef9..f17119de26 100644 --- a/pack-objects.h +++ b/pack-objects.h @@ -168,6 +168,9 @@ struct packing_data { /* delta islands */ unsigned int *tree_depth; unsigned char *layer; + + /* cruft packs */ + uint32_t *cruft_mtime; }; void prepare_packing_data(struct repository *r, struct packing_data *pdata); @@ -289,4 +292,21 @@ static inline void oe_set_layer(struct packing_data *pack, pack->layer[e - pack->objects] = layer; } +static inline uint32_t oe_cruft_mtime(struct packing_data *pack, + struct object_entry *e) +{ + if (!pack->cruft_mtime) + return 0; + return pack->cruft_mtime[e - pack->objects]; +} + +static inline void oe_set_cruft_mtime(struct packing_data *pack, + struct object_entry *e, + uint32_t mtime) +{ + if (!pack->cruft_mtime) + CALLOC_ARRAY(pack->cruft_mtime, pack->nr_alloc); + pack->cruft_mtime[e - pack->objects] = mtime; +} + #endif diff --git a/pack-write.c b/pack-write.c index ff305b404c..8c3efda2c3 100644 --- a/pack-write.c +++ b/pack-write.c @@ -3,6 +3,10 @@ #include "csum-file.h" #include "remote.h" #include "chunk-format.h" +#include "pack-mtimes.h" +#include "oidmap.h" +#include "chunk-format.h" +#include "pack-objects.h" void reset_pack_idx_option(struct pack_idx_option *opts) { @@ -276,6 +280,65 @@ const char *write_rev_file_order(const char *rev_name, return rev_name; } +static void write_mtimes_header(struct hashfile *f) +{ + hashwrite_be32(f, MTIMES_SIGNATURE); + hashwrite_be32(f, MTIMES_VERSION); + hashwrite_be32(f, oid_version(the_hash_algo)); +} + +static void write_mtimes_objects(struct hashfile *f, + struct packing_data *to_pack, + struct pack_idx_entry **objects, + uint32_t nr_objects) +{ + uint32_t i; + for (i = 0; i < nr_objects; i++) { + struct object_entry *e = (struct object_entry*)objects[i]; + hashwrite_be32(f, oe_cruft_mtime(to_pack, e)); + } +} + +static void write_mtimes_trailer(struct hashfile *f, const unsigned char *hash) +{ + hashwrite(f, hash, the_hash_algo->rawsz); +} + +static const char *write_mtimes_file(const char *mtimes_name, + struct packing_data *to_pack, + struct pack_idx_entry **objects, + uint32_t nr_objects, + const unsigned char *hash) +{ + struct hashfile *f; + int fd; + + if (!to_pack) + BUG("cannot call write_mtimes_file with NULL packing_data"); + + if (!mtimes_name) { + struct strbuf tmp_file = STRBUF_INIT; + fd = odb_mkstemp(&tmp_file, "pack/tmp_mtimes_XXXXXX"); + mtimes_name = strbuf_detach(&tmp_file, NULL); + } else { + unlink(mtimes_name); + fd = xopen(mtimes_name, O_CREAT|O_EXCL|O_WRONLY, 0600); + } + f = hashfd(fd, mtimes_name); + + write_mtimes_header(f); + write_mtimes_objects(f, to_pack, objects, nr_objects); + write_mtimes_trailer(f, hash); + + if (mtimes_name && adjust_shared_perm(mtimes_name) < 0) + die(_("failed to make %s readable"), mtimes_name); + + finalize_hashfile(f, NULL, + CSUM_HASH_IN_STREAM | CSUM_CLOSE | CSUM_FSYNC); + + return mtimes_name; +} + off_t write_pack_header(struct hashfile *f, uint32_t nr_entries) { struct pack_header hdr; @@ -478,6 +541,7 @@ void stage_tmp_packfiles(struct strbuf *name_buffer, char **idx_tmp_name) { const char *rev_tmp_name = NULL; + const char *mtimes_tmp_name = NULL; if (adjust_shared_perm(pack_tmp_name)) die_errno("unable to make temporary pack file readable"); @@ -490,9 +554,19 @@ void stage_tmp_packfiles(struct strbuf *name_buffer, rev_tmp_name = write_rev_file(NULL, written_list, nr_written, hash, pack_idx_opts->flags); + if (pack_idx_opts->flags & WRITE_MTIMES) { + mtimes_tmp_name = write_mtimes_file(NULL, to_pack, written_list, + nr_written, + hash); + if (adjust_shared_perm(mtimes_tmp_name)) + die_errno("unable to make temporary mtimes file readable"); + } + rename_tmp_packfile(name_buffer, pack_tmp_name, "pack"); if (rev_tmp_name) rename_tmp_packfile(name_buffer, rev_tmp_name, "rev"); + if (mtimes_tmp_name) + rename_tmp_packfile(name_buffer, mtimes_tmp_name, "mtimes"); } void write_promisor_file(const char *promisor_name, struct ref **sought, int nr_sought) diff --git a/pack.h b/pack.h index fd27cfdfd7..01d385903a 100644 --- a/pack.h +++ b/pack.h @@ -44,6 +44,7 @@ struct pack_idx_option { #define WRITE_IDX_STRICT 02 #define WRITE_REV 04 #define WRITE_REV_VERIFY 010 +#define WRITE_MTIMES 020 uint32_t version; uint32_t off32_limit;
Now that the `.mtimes` format is defined, supplement the pack-write API to be able to conditionally write an `.mtimes` file along with a pack by setting an additional flag and passing an oidmap that contains the timestamps corresponding to each object in the pack. Signed-off-by: Taylor Blau <me@ttaylorr.com> --- pack-objects.c | 6 ++++ pack-objects.h | 20 ++++++++++++++ pack-write.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++ pack.h | 1 + 4 files changed, 101 insertions(+)