diff mbox series

[05/17] pack-mtimes: support writing pack .mtimes files

Message ID deece9eb70e9750bb8350946679b521e59139fe2.1638224692.git.me@ttaylorr.com (mailing list archive)
State New, archived
Headers show
Series cruft packs | expand

Commit Message

Taylor Blau Nov. 29, 2021, 10:25 p.m. UTC
Now that the `.mtimes` format is defined, supplement the pack-write API
to be able to conditionally write an `.mtimes` file along with a pack by
setting an additional flag and passing an oidmap that contains the
timestamps corresponding to each object in the pack.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 pack-objects.c |  6 ++++
 pack-objects.h | 20 ++++++++++++++
 pack-write.c   | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++
 pack.h         |  1 +
 4 files changed, 101 insertions(+)

Comments

Derrick Stolee Dec. 2, 2021, 3:36 p.m. UTC | #1
On 11/29/2021 5:25 PM, Taylor Blau wrote:> @@ -168,6 +168,9 @@ struct packing_data {
>  	/* delta islands */
>  	unsigned int *tree_depth;
>  	unsigned char *layer;
> +
> +	/* cruft packs */
> +	uint32_t *cruft_mtime;

This comment is a bit terse. Perhaps...

	/* Used when writing cruft packs. */

> +static inline uint32_t oe_cruft_mtime(struct packing_data *pack,
> +				      struct object_entry *e)
> +{
> +	if (!pack->cruft_mtime)
> +		return 0;
> +	return pack->cruft_mtime[e - pack->objects];
> +}

When writing a pack, it appears that the cruft_mtime array
maps to objects in pack-order, not idx-order, correct? That
might be worth mentioning in the struct definition because
it differs from the .mtimes file.

> +static void write_mtimes_objects(struct hashfile *f,
> +				 struct packing_data *to_pack,
> +				 struct pack_idx_entry **objects,
> +				 uint32_t nr_objects)
> +{
> +	uint32_t i;
> +	for (i = 0; i < nr_objects; i++) {
> +		struct object_entry *e = (struct object_entry*)objects[i];
> +		hashwrite_be32(f, oe_cruft_mtime(to_pack, e));
> +	}

The name "objects" here confused me at first, thinking it
corresponded to the objects member of 'struct packing_data', but
that is being handled by the fact that 'objects' is actually a
lex-sorted list of pack_idx_entry pointers (and they happen to
also point to 'struct object_entry' values because the 'struct
pack_idx_entry' is the first member.

So this is (very densely) handling the translation from pack-order
to lex-order through the double pointer 'objects'. I'm not sure if
there is a way to make it more clear or if every reader will need
to do the same mental gymnastics I had to do.

> +}
> +
> +static void write_mtimes_trailer(struct hashfile *f, const unsigned char *hash)
> +{
> +	hashwrite(f, hash, the_hash_algo->rawsz);
> +}
> +
> +static const char *write_mtimes_file(const char *mtimes_name,
> +				     struct packing_data *to_pack,
> +				     struct pack_idx_entry **objects,
> +				     uint32_t nr_objects,
> +				     const unsigned char *hash)
> +{
> +	struct hashfile *f;
> +	int fd;
> +
> +	if (!to_pack)
> +		BUG("cannot call write_mtimes_file with NULL packing_data");
> +
> +	if (!mtimes_name) {
> +		struct strbuf tmp_file = STRBUF_INIT;
> +		fd = odb_mkstemp(&tmp_file, "pack/tmp_mtimes_XXXXXX");
> +		mtimes_name = strbuf_detach(&tmp_file, NULL);
> +	} else {
> +		unlink(mtimes_name);
> +		fd = xopen(mtimes_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
> +	}
> +	f = hashfd(fd, mtimes_name);
> +
> +	write_mtimes_header(f);
> +	write_mtimes_objects(f, to_pack, objects, nr_objects);
> +	write_mtimes_trailer(f, hash);
> +
> +	if (mtimes_name && adjust_shared_perm(mtimes_name) < 0)
> +		die(_("failed to make %s readable"), mtimes_name);

What could cause 'mtimes_name' to be NULL here? It seems that it would
be initialized in the "if (!mtimes_name)" block above.

> +
> +	finalize_hashfile(f, NULL,
> +			  CSUM_HASH_IN_STREAM | CSUM_CLOSE | CSUM_FSYNC);
> +
> +	return mtimes_name;

Note that you return the name here...

> +	if (pack_idx_opts->flags & WRITE_MTIMES) {
> +		mtimes_tmp_name = write_mtimes_file(NULL, to_pack, written_list,
> +						    nr_written,
> +						    hash);
> +		if (adjust_shared_perm(mtimes_tmp_name))
> +			die_errno("unable to make temporary mtimes file readable");

...and then adjust the perms again. I think that this adjustment is
redundant, because it already happened within the write_mtimes_file()
method.

> +	}
> +
>  	rename_tmp_packfile(name_buffer, pack_tmp_name, "pack");
>  	if (rev_tmp_name)
>  		rename_tmp_packfile(name_buffer, rev_tmp_name, "rev");
> +	if (mtimes_tmp_name)
> +		rename_tmp_packfile(name_buffer, mtimes_tmp_name, "mtimes");

And then it is finally renamed here, if it had a temporary name to
start.

Thanks,
-Stolee
Taylor Blau Dec. 3, 2021, 11:04 p.m. UTC | #2
On Thu, Dec 02, 2021 at 10:36:16AM -0500, Derrick Stolee wrote:
> On 11/29/2021 5:25 PM, Taylor Blau wrote:> @@ -168,6 +168,9 @@ struct packing_data {
> >  	/* delta islands */
> >  	unsigned int *tree_depth;
> >  	unsigned char *layer;
> > +
> > +	/* cruft packs */
> > +	uint32_t *cruft_mtime;
>
> This comment is a bit terse. Perhaps...
>
> 	/* Used when writing cruft packs. */

Sure; here I was imitating the terseness of the "delta islands" comment
a few lines above. But I don't mind changing it here.

> > +static inline uint32_t oe_cruft_mtime(struct packing_data *pack,
> > +				      struct object_entry *e)
> > +{
> > +	if (!pack->cruft_mtime)
> > +		return 0;
> > +	return pack->cruft_mtime[e - pack->objects];
> > +}
>
> When writing a pack, it appears that the cruft_mtime array
> maps to objects in pack-order, not idx-order, correct? That
> might be worth mentioning in the struct definition because
> it differs from the .mtimes file.

Great observation and suggestion, thank you! The comment that I
ultimately settled on is:

  /*
   * Used when writing cruft packs.
   *
   * Object mtimes  are stored in pack order when writing, but
   * written out in lexicographic (index) order.
   */
   uint32_t *cruft_mtime;

> > +static void write_mtimes_objects(struct hashfile *f,
> > +				 struct packing_data *to_pack,
> > +				 struct pack_idx_entry **objects,
> > +				 uint32_t nr_objects)
> > +{
> > +	uint32_t i;
> > +	for (i = 0; i < nr_objects; i++) {
> > +		struct object_entry *e = (struct object_entry*)objects[i];
> > +		hashwrite_be32(f, oe_cruft_mtime(to_pack, e));
> > +	}
>
> The name "objects" here confused me at first, thinking it
> corresponded to the objects member of 'struct packing_data', but
> that is being handled by the fact that 'objects' is actually a
> lex-sorted list of pack_idx_entry pointers (and they happen to
> also point to 'struct object_entry' values because the 'struct
> pack_idx_entry' is the first member.
>
> So this is (very densely) handling the translation from pack-order
> to lex-order through the double pointer 'objects'. I'm not sure if
> there is a way to make it more clear or if every reader will need
> to do the same mental gymnastics I had to do.

Exactly, and sorry that I didn't point this out more clearly. It's been
long enough since I wrote this code that I can sympathize with the
mental gymnastics required ;).

> > +}
> > +
> > +static void write_mtimes_trailer(struct hashfile *f, const unsigned char *hash)
> > +{
> > +	hashwrite(f, hash, the_hash_algo->rawsz);
> > +}
> > +
> > +static const char *write_mtimes_file(const char *mtimes_name,
> > +				     struct packing_data *to_pack,
> > +				     struct pack_idx_entry **objects,
> > +				     uint32_t nr_objects,
> > +				     const unsigned char *hash)
> > +{
> > +	struct hashfile *f;
> > +	int fd;
> > +
> > +	if (!to_pack)
> > +		BUG("cannot call write_mtimes_file with NULL packing_data");
> > +
> > +	if (!mtimes_name) {
> > +		struct strbuf tmp_file = STRBUF_INIT;
> > +		fd = odb_mkstemp(&tmp_file, "pack/tmp_mtimes_XXXXXX");
> > +		mtimes_name = strbuf_detach(&tmp_file, NULL);
> > +	} else {
> > +		unlink(mtimes_name);
> > +		fd = xopen(mtimes_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
> > +	}
> > +	f = hashfd(fd, mtimes_name);
> > +
> > +	write_mtimes_header(f);
> > +	write_mtimes_objects(f, to_pack, objects, nr_objects);
> > +	write_mtimes_trailer(f, hash);
> > +
> > +	if (mtimes_name && adjust_shared_perm(mtimes_name) < 0)
> > +		die(_("failed to make %s readable"), mtimes_name);
>
> What could cause 'mtimes_name' to be NULL here? It seems that it would
> be initialized in the "if (!mtimes_name)" block above.

You're right, it's impossible for it to be NULL here. I'll remove the
redundant side of the &&-expression here.

> > +
> > +	finalize_hashfile(f, NULL,
> > +			  CSUM_HASH_IN_STREAM | CSUM_CLOSE | CSUM_FSYNC);
> > +
> > +	return mtimes_name;
>
> Note that you return the name here...
>
> > +	if (pack_idx_opts->flags & WRITE_MTIMES) {
> > +		mtimes_tmp_name = write_mtimes_file(NULL, to_pack, written_list,
> > +						    nr_written,
> > +						    hash);
> > +		if (adjust_shared_perm(mtimes_tmp_name))
> > +			die_errno("unable to make temporary mtimes file readable");
>
> ...and then adjust the perms again. I think that this adjustment is
> redundant, because it already happened within the write_mtimes_file()
> method.

Yep, thanks. I'll clean it up here to just call adjust_shared_perm()
witin write_mtimes_file().

Thanks,
Taylor
diff mbox series

Patch

diff --git a/pack-objects.c b/pack-objects.c
index fe2a4eace9..272e8d4517 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -170,6 +170,9 @@  struct object_entry *packlist_alloc(struct packing_data *pdata,
 
 		if (pdata->layer)
 			REALLOC_ARRAY(pdata->layer, pdata->nr_alloc);
+
+		if (pdata->cruft_mtime)
+			REALLOC_ARRAY(pdata->cruft_mtime, pdata->nr_alloc);
 	}
 
 	new_entry = pdata->objects + pdata->nr_objects++;
@@ -198,6 +201,9 @@  struct object_entry *packlist_alloc(struct packing_data *pdata,
 	if (pdata->layer)
 		pdata->layer[pdata->nr_objects - 1] = 0;
 
+	if (pdata->cruft_mtime)
+		pdata->cruft_mtime[pdata->nr_objects - 1] = 0;
+
 	return new_entry;
 }
 
diff --git a/pack-objects.h b/pack-objects.h
index dca2351ef9..f17119de26 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -168,6 +168,9 @@  struct packing_data {
 	/* delta islands */
 	unsigned int *tree_depth;
 	unsigned char *layer;
+
+	/* cruft packs */
+	uint32_t *cruft_mtime;
 };
 
 void prepare_packing_data(struct repository *r, struct packing_data *pdata);
@@ -289,4 +292,21 @@  static inline void oe_set_layer(struct packing_data *pack,
 	pack->layer[e - pack->objects] = layer;
 }
 
+static inline uint32_t oe_cruft_mtime(struct packing_data *pack,
+				      struct object_entry *e)
+{
+	if (!pack->cruft_mtime)
+		return 0;
+	return pack->cruft_mtime[e - pack->objects];
+}
+
+static inline void oe_set_cruft_mtime(struct packing_data *pack,
+				      struct object_entry *e,
+				      uint32_t mtime)
+{
+	if (!pack->cruft_mtime)
+		CALLOC_ARRAY(pack->cruft_mtime, pack->nr_alloc);
+	pack->cruft_mtime[e - pack->objects] = mtime;
+}
+
 #endif
diff --git a/pack-write.c b/pack-write.c
index ff305b404c..8c3efda2c3 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -3,6 +3,10 @@ 
 #include "csum-file.h"
 #include "remote.h"
 #include "chunk-format.h"
+#include "pack-mtimes.h"
+#include "oidmap.h"
+#include "chunk-format.h"
+#include "pack-objects.h"
 
 void reset_pack_idx_option(struct pack_idx_option *opts)
 {
@@ -276,6 +280,65 @@  const char *write_rev_file_order(const char *rev_name,
 	return rev_name;
 }
 
+static void write_mtimes_header(struct hashfile *f)
+{
+	hashwrite_be32(f, MTIMES_SIGNATURE);
+	hashwrite_be32(f, MTIMES_VERSION);
+	hashwrite_be32(f, oid_version(the_hash_algo));
+}
+
+static void write_mtimes_objects(struct hashfile *f,
+				 struct packing_data *to_pack,
+				 struct pack_idx_entry **objects,
+				 uint32_t nr_objects)
+{
+	uint32_t i;
+	for (i = 0; i < nr_objects; i++) {
+		struct object_entry *e = (struct object_entry*)objects[i];
+		hashwrite_be32(f, oe_cruft_mtime(to_pack, e));
+	}
+}
+
+static void write_mtimes_trailer(struct hashfile *f, const unsigned char *hash)
+{
+	hashwrite(f, hash, the_hash_algo->rawsz);
+}
+
+static const char *write_mtimes_file(const char *mtimes_name,
+				     struct packing_data *to_pack,
+				     struct pack_idx_entry **objects,
+				     uint32_t nr_objects,
+				     const unsigned char *hash)
+{
+	struct hashfile *f;
+	int fd;
+
+	if (!to_pack)
+		BUG("cannot call write_mtimes_file with NULL packing_data");
+
+	if (!mtimes_name) {
+		struct strbuf tmp_file = STRBUF_INIT;
+		fd = odb_mkstemp(&tmp_file, "pack/tmp_mtimes_XXXXXX");
+		mtimes_name = strbuf_detach(&tmp_file, NULL);
+	} else {
+		unlink(mtimes_name);
+		fd = xopen(mtimes_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
+	}
+	f = hashfd(fd, mtimes_name);
+
+	write_mtimes_header(f);
+	write_mtimes_objects(f, to_pack, objects, nr_objects);
+	write_mtimes_trailer(f, hash);
+
+	if (mtimes_name && adjust_shared_perm(mtimes_name) < 0)
+		die(_("failed to make %s readable"), mtimes_name);
+
+	finalize_hashfile(f, NULL,
+			  CSUM_HASH_IN_STREAM | CSUM_CLOSE | CSUM_FSYNC);
+
+	return mtimes_name;
+}
+
 off_t write_pack_header(struct hashfile *f, uint32_t nr_entries)
 {
 	struct pack_header hdr;
@@ -478,6 +541,7 @@  void stage_tmp_packfiles(struct strbuf *name_buffer,
 			 char **idx_tmp_name)
 {
 	const char *rev_tmp_name = NULL;
+	const char *mtimes_tmp_name = NULL;
 
 	if (adjust_shared_perm(pack_tmp_name))
 		die_errno("unable to make temporary pack file readable");
@@ -490,9 +554,19 @@  void stage_tmp_packfiles(struct strbuf *name_buffer,
 	rev_tmp_name = write_rev_file(NULL, written_list, nr_written, hash,
 				      pack_idx_opts->flags);
 
+	if (pack_idx_opts->flags & WRITE_MTIMES) {
+		mtimes_tmp_name = write_mtimes_file(NULL, to_pack, written_list,
+						    nr_written,
+						    hash);
+		if (adjust_shared_perm(mtimes_tmp_name))
+			die_errno("unable to make temporary mtimes file readable");
+	}
+
 	rename_tmp_packfile(name_buffer, pack_tmp_name, "pack");
 	if (rev_tmp_name)
 		rename_tmp_packfile(name_buffer, rev_tmp_name, "rev");
+	if (mtimes_tmp_name)
+		rename_tmp_packfile(name_buffer, mtimes_tmp_name, "mtimes");
 }
 
 void write_promisor_file(const char *promisor_name, struct ref **sought, int nr_sought)
diff --git a/pack.h b/pack.h
index fd27cfdfd7..01d385903a 100644
--- a/pack.h
+++ b/pack.h
@@ -44,6 +44,7 @@  struct pack_idx_option {
 #define WRITE_IDX_STRICT 02
 #define WRITE_REV 04
 #define WRITE_REV_VERIFY 010
+#define WRITE_MTIMES 020
 
 	uint32_t version;
 	uint32_t off32_limit;