Message ID | 3ffd3a001f742713eb0fde8508f876ff95103d82.1635454237.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Allow clean/smudge filters to handle huge files in the LLP64 data model | expand |
On Thu, Oct 28, 2021 at 1:50 PM Matt Cooper via GitGitGadget <gitgitgadget@gmail.com> wrote: > diff --git a/entry.c b/entry.c > index 711ee0693c7..4cb3942dbdc 100644 > --- a/entry.c > +++ b/entry.c > @@ -82,11 +82,13 @@ static int create_file(const char *path, unsigned int mode) > return open(path, O_WRONLY | O_CREAT | O_EXCL, mode); > } > > -void *read_blob_entry(const struct cache_entry *ce, unsigned long *size) > +void *read_blob_entry(const struct cache_entry *ce, size_t *size) > { > enum object_type type; > - void *blob_data = read_object_file(&ce->oid, &type, size); > + unsigned long ul; > + void *blob_data = read_object_file(&ce->oid, &type, &ul); > > + *size = ul; Considering this unsigned long variable is obviously holding an incorrect value, might be worth adding a "TODO" comment here, mentioning it should be changed to a real size_t (probably after release, since it is obvious that change is too intrusive to need this as a kludge for now). Carlo
Hi Carlo, On Thu, 28 Oct 2021, Carlo Arenas wrote: > On Thu, Oct 28, 2021 at 1:50 PM Matt Cooper via GitGitGadget > <gitgitgadget@gmail.com> wrote: > > diff --git a/entry.c b/entry.c > > index 711ee0693c7..4cb3942dbdc 100644 > > --- a/entry.c > > +++ b/entry.c > > @@ -82,11 +82,13 @@ static int create_file(const char *path, unsigned int mode) > > return open(path, O_WRONLY | O_CREAT | O_EXCL, mode); > > } > > > > -void *read_blob_entry(const struct cache_entry *ce, unsigned long *size) > > +void *read_blob_entry(const struct cache_entry *ce, size_t *size) > > { > > enum object_type type; > > - void *blob_data = read_object_file(&ce->oid, &type, size); > > + unsigned long ul; > > + void *blob_data = read_object_file(&ce->oid, &type, &ul); > > > > + *size = ul; > > Considering this unsigned long variable is obviously holding an > incorrect value, might be worth adding a "TODO" comment here, > mentioning it should be changed to a real size_t (probably after > release, since it is obvious that change is too intrusive to need this > as a kludge for now). I do not think it would be a good idea to introduce a `TODO` comment here. Why _here_, of all places? Just because we use `size_t` correctly for a bit of the function? No, that `TODO` would easily be forgotten when somebody tackles the big `unsigned long` -> `size_t` project. I therefore do not even want to add it. We know about this problem. No need for a code comment that is prone to become stale. Ciao, Dscho
diff --git a/entry.c b/entry.c index 711ee0693c7..4cb3942dbdc 100644 --- a/entry.c +++ b/entry.c @@ -82,11 +82,13 @@ static int create_file(const char *path, unsigned int mode) return open(path, O_WRONLY | O_CREAT | O_EXCL, mode); } -void *read_blob_entry(const struct cache_entry *ce, unsigned long *size) +void *read_blob_entry(const struct cache_entry *ce, size_t *size) { enum object_type type; - void *blob_data = read_object_file(&ce->oid, &type, size); + unsigned long ul; + void *blob_data = read_object_file(&ce->oid, &type, &ul); + *size = ul; if (blob_data) { if (type == OBJ_BLOB) return blob_data; @@ -270,7 +272,7 @@ static int write_entry(struct cache_entry *ce, char *path, struct conv_attrs *ca int fd, ret, fstat_done = 0; char *new_blob; struct strbuf buf = STRBUF_INIT; - unsigned long size; + size_t size; ssize_t wrote; size_t newsize = 0; struct stat st; diff --git a/entry.h b/entry.h index b8c0e170dc7..61ee8c17604 100644 --- a/entry.h +++ b/entry.h @@ -51,7 +51,7 @@ int finish_delayed_checkout(struct checkout *state, int *nr_checkouts); */ void unlink_entry(const struct cache_entry *ce); -void *read_blob_entry(const struct cache_entry *ce, unsigned long *size); +void *read_blob_entry(const struct cache_entry *ce, size_t *size); int fstat_checkout_output(int fd, const struct checkout *state, struct stat *st); void update_ce_after_write(const struct checkout *state, struct cache_entry *ce, struct stat *st); diff --git a/parallel-checkout.c b/parallel-checkout.c index 6b1af32bb3d..b6f4a25642e 100644 --- a/parallel-checkout.c +++ b/parallel-checkout.c @@ -261,7 +261,7 @@ static int write_pc_item_to_fd(struct parallel_checkout_item *pc_item, int fd, struct stream_filter *filter; struct strbuf buf = STRBUF_INIT; char *blob; - unsigned long size; + size_t size; ssize_t wrote; /* Sanity check */ diff --git a/t/t1051-large-conversion.sh b/t/t1051-large-conversion.sh index 7c1a2845005..5ba03d02682 100755 --- a/t/t1051-large-conversion.sh +++ b/t/t1051-large-conversion.sh @@ -85,7 +85,7 @@ test_expect_success 'ident converts on output' ' # This smudge filter prepends 5GB of zeros to the file it checks out. This # ensures that smudging doesn't mangle large files on 64-bit Windows. -test_expect_failure EXPENSIVE,!LONG_IS_64BIT 'files over 4GB convert on output' ' +test_expect_success EXPENSIVE,!LONG_IS_64BIT 'files over 4GB convert on output' ' test_commit test small "a small file" && test_config filter.makelarge.smudge \ "test-tool genzeros $((5*1024*1024*1024)) && cat" &&