diff mbox series

[v6,3/6] object-file.c: refactor write_loose_object() to reuse in stream version

Message ID 20211217112629.12334-4-chiyutianyi@gmail.com (mailing list archive)
State Superseded
Headers show
Series unpack large blobs in stream | expand

Commit Message

Han Xin Dec. 17, 2021, 11:26 a.m. UTC
From: Han Xin <hanxin.hx@alibaba-inc.com>

We used to call "get_data()" in "unpack_non_delta_entry()" to read the
entire contents of a blob object, no matter how big it is. This
implementation may consume all the memory and cause OOM.

This can be improved by feeding data to "stream_loose_object()" in
stream instead of read into the whole buf.

As this new method "stream_loose_object()" has many similarities with
"write_loose_object()", we split up "write_loose_object()" into some
steps:
 1. Figuring out a path for the (temp) object file.
 2. Creating the tempfile.
 3. Setting up zlib and write header.
 4. Write object data and handle errors.
 5. Optionally, do someting after write, maybe force a loose object if
"mtime".

Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Han Xin <hanxin.hx@alibaba-inc.com>
---
 object-file.c | 98 +++++++++++++++++++++++++++++++++------------------
 1 file changed, 63 insertions(+), 35 deletions(-)
diff mbox series

Patch

diff --git a/object-file.c b/object-file.c
index 95fcd5435d..dd29e5372e 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1751,6 +1751,25 @@  static void write_object_file_prepare(const struct git_hash_algo *algo,
 	algo->final_oid_fn(oid, &c);
 }
 
+/*
+ * Move the just written object with proper mtime into its final resting place.
+ */
+static int finalize_object_file_with_mtime(const char *tmpfile,
+					   const char *filename,
+					   time_t mtime,
+					   unsigned flags)
+{
+	struct utimbuf utb;
+
+	if (mtime) {
+		utb.actime = mtime;
+		utb.modtime = mtime;
+		if (utime(tmpfile, &utb) < 0 && !(flags & HASH_SILENT))
+			warning_errno(_("failed utime() on %s"), tmpfile);
+	}
+	return finalize_object_file(tmpfile, filename);
+}
+
 /*
  * Move the just written object into its final resting place.
  */
@@ -1836,7 +1855,8 @@  static inline int directory_size(const char *filename)
  * We want to avoid cross-directory filename renames, because those
  * can have problems on various filesystems (FAT, NFS, Coda).
  */
-static int create_tmpfile(struct strbuf *tmp, const char *filename)
+static int create_tmpfile(struct strbuf *tmp, const char *filename,
+			  unsigned flags)
 {
 	int fd, dirlen = directory_size(filename);
 
@@ -1844,7 +1864,9 @@  static int create_tmpfile(struct strbuf *tmp, const char *filename)
 	strbuf_add(tmp, filename, dirlen);
 	strbuf_addstr(tmp, "tmp_obj_XXXXXX");
 	fd = git_mkstemp_mode(tmp->buf, 0444);
-	if (fd < 0 && dirlen && errno == ENOENT) {
+	do {
+		if (fd >= 0 || !dirlen || errno != ENOENT)
+			break;
 		/*
 		 * Make sure the directory exists; note that the contents
 		 * of the buffer are undefined after mkstemp returns an
@@ -1854,17 +1876,48 @@  static int create_tmpfile(struct strbuf *tmp, const char *filename)
 		strbuf_reset(tmp);
 		strbuf_add(tmp, filename, dirlen - 1);
 		if (mkdir(tmp->buf, 0777) && errno != EEXIST)
-			return -1;
+			break;
 		if (adjust_shared_perm(tmp->buf))
-			return -1;
+			break;
 
 		/* Try again */
 		strbuf_addstr(tmp, "/tmp_obj_XXXXXX");
 		fd = git_mkstemp_mode(tmp->buf, 0444);
+	} while (0);
+
+	if (fd < 0 && !(flags & HASH_SILENT)) {
+		if (errno == EACCES)
+			return error(_("insufficient permission for adding an "
+				       "object to repository database %s"),
+				     get_object_directory());
+		else
+			return error_errno(_("unable to create temporary file"));
 	}
+
 	return fd;
 }
 
+static void setup_stream_and_header(git_zstream *stream,
+				    unsigned char *compressed,
+				    unsigned long compressed_size,
+				    git_hash_ctx *c,
+				    char *hdr,
+				    int hdrlen)
+{
+	/* Set it up */
+	git_deflate_init(stream, zlib_compression_level);
+	stream->next_out = compressed;
+	stream->avail_out = compressed_size;
+	the_hash_algo->init_fn(c);
+
+	/* First header.. */
+	stream->next_in = (unsigned char *)hdr;
+	stream->avail_in = hdrlen;
+	while (git_deflate(stream, 0) == Z_OK)
+		; /* nothing */
+	the_hash_algo->update_fn(c, hdr, hdrlen);
+}
+
 static int write_loose_object(const struct object_id *oid, char *hdr,
 			      int hdrlen, const void *buf, unsigned long len,
 			      time_t mtime, unsigned flags)
@@ -1879,31 +1932,15 @@  static int write_loose_object(const struct object_id *oid, char *hdr,
 
 	loose_object_path(the_repository, &filename, oid);
 
-	fd = create_tmpfile(&tmp_file, filename.buf);
+	fd = create_tmpfile(&tmp_file, filename.buf, flags);
 	if (fd < 0) {
-		if (flags & HASH_SILENT)
-			ret = -1;
-		else if (errno == EACCES)
-			ret = error(_("insufficient permission for adding an "
-				      "object to repository database %s"),
-				    get_object_directory());
-		else
-			ret = error_errno(_("unable to create temporary file"));
+		ret = -1;
 		goto cleanup;
 	}
 
-	/* Set it up */
-	git_deflate_init(&stream, zlib_compression_level);
-	stream.next_out = compressed;
-	stream.avail_out = sizeof(compressed);
-	the_hash_algo->init_fn(&c);
-
-	/* First header.. */
-	stream.next_in = (unsigned char *)hdr;
-	stream.avail_in = hdrlen;
-	while (git_deflate(&stream, 0) == Z_OK)
-		; /* nothing */
-	the_hash_algo->update_fn(&c, hdr, hdrlen);
+	/* Set it up and write header */
+	setup_stream_and_header(&stream, compressed, sizeof(compressed),
+				&c, hdr, hdrlen);
 
 	/* Then the data itself.. */
 	stream.next_in = (void *)buf;
@@ -1932,16 +1969,7 @@  static int write_loose_object(const struct object_id *oid, char *hdr,
 
 	close_loose_object(fd);
 
-	if (mtime) {
-		struct utimbuf utb;
-		utb.actime = mtime;
-		utb.modtime = mtime;
-		if (utime(tmp_file.buf, &utb) < 0 &&
-		    !(flags & HASH_SILENT))
-			warning_errno(_("failed utime() on %s"), tmp_file.buf);
-	}
-
-	ret = finalize_object_file(tmp_file.buf, filename.buf);
+	ret = finalize_object_file_with_mtime(tmp_file.buf, filename.buf, mtime, flags);
 cleanup:
 	strbuf_release(&filename);
 	strbuf_release(&tmp_file);