Message ID | 45662cf582ab7c8b1c32f55c9a34f4d73a28b71d.1652210824.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | scalar: implement the subcommand "diagnose" | expand |
"Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com> writes: > @@ -514,14 +522,38 @@ static int add_file_cb(const struct option *opt, const char *arg, int unset) > if (!arg) > return -1; > > - path = prefix_filename(args->prefix, arg); > - item = string_list_append_nodup(&args->extra_files, path); > - item->util = info = xmalloc(sizeof(*info)); > + info = xmalloc(sizeof(*info)); > info->base = xstrdup_or_null(base); > - if (stat(path, &info->stat)) > - die(_("File not found: %s"), path); > - if (!S_ISREG(info->stat.st_mode)) > - die(_("Not a regular file: %s"), path); > + > + if (!strcmp(opt->long_name, "add-file")) { > + path = prefix_filename(args->prefix, arg); > + if (stat(path, &info->stat)) > + die(_("File not found: %s"), path); > + if (!S_ISREG(info->stat.st_mode)) > + die(_("Not a regular file: %s"), path); > + info->content = NULL; /* read the file later */ > + } else { This pretends that this new one will stay to be the only other option that uses the same callback in the future. To be more defensive, it should do } else if (!strcmp(opt->long_name, "...")) { and end the if/else if/else cascade with } else { BUG("add_file_cb called for unknown option"); } > + const char *colon = strchr(arg, ':'); > + char *p; > + > + if (!colon) > + die(_("missing colon: '%s'"), arg); > + > + p = xstrndup(arg, colon - arg); > + if (!args->prefix) > + path = p; > + else { > + path = prefix_filename(args->prefix, p); > + free(p); > + } > + memset(&info->stat, 0, sizeof(info->stat)); > + info->stat.st_mode = S_IFREG | 0644; I can sympathize with the desire to omit the mode bits because it may not be useful for the immediate purpose of "scalar diagnose" where the extracting end won't care what the file's permission bits are, but by letting this "mode is hardcoded" thing squat here would later make it more work when other people want to add an option that truely lets the caller add a "vitual" file, in response to end-user complaints that they cannot use the existing one to add an exectuable file, for example. I do not care too much about the pathname limitation that does not allow a colon in it, simply because it is unusual enough, but I am not sure about hardcoded permission bits. If we did "--add-virtual-file=<path>:0644:<contents>" instead from day one, it certainly adds a few more lines of logic to this patch, and the calling "scalar diagnose" may have to pass a few more bytes, but I suspect that such a change would help the project in the longer run. Thanks.
On May 10, 2022 5:48 PM, Junio C Hamano wrote: >"Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com> >writes: > >> @@ -514,14 +522,38 @@ static int add_file_cb(const struct option *opt, const >char *arg, int unset) >> if (!arg) >> return -1; >> >> - path = prefix_filename(args->prefix, arg); >> - item = string_list_append_nodup(&args->extra_files, path); >> - item->util = info = xmalloc(sizeof(*info)); >> + info = xmalloc(sizeof(*info)); >> info->base = xstrdup_or_null(base); >> - if (stat(path, &info->stat)) >> - die(_("File not found: %s"), path); >> - if (!S_ISREG(info->stat.st_mode)) >> - die(_("Not a regular file: %s"), path); >> + >> + if (!strcmp(opt->long_name, "add-file")) { >> + path = prefix_filename(args->prefix, arg); >> + if (stat(path, &info->stat)) >> + die(_("File not found: %s"), path); >> + if (!S_ISREG(info->stat.st_mode)) >> + die(_("Not a regular file: %s"), path); >> + info->content = NULL; /* read the file later */ >> + } else { > >This pretends that this new one will stay to be the only other option that uses the >same callback in the future. To be more defensive, it should do > > } else if (!strcmp(opt->long_name, "...")) { > >and end the if/else if/else cascade with > > } else { > BUG("add_file_cb called for unknown option"); > } > >> + const char *colon = strchr(arg, ':'); >> + char *p; >> + >> + if (!colon) >> + die(_("missing colon: '%s'"), arg); >> + >> + p = xstrndup(arg, colon - arg); >> + if (!args->prefix) >> + path = p; >> + else { >> + path = prefix_filename(args->prefix, p); >> + free(p); >> + } >> + memset(&info->stat, 0, sizeof(info->stat)); >> + info->stat.st_mode = S_IFREG | 0644; > >I can sympathize with the desire to omit the mode bits because it may not be >useful for the immediate purpose of "scalar diagnose" >where the extracting end won't care what the file's permission bits are, but by >letting this "mode is hardcoded" thing squat here would later make it more work >when other people want to add an option that truely lets the caller add a "vitual" >file, in response to end-user complaints that they cannot use the existing one to >add an exectuable file, for example. I do not care too much about the pathname >limitation that does not allow a colon in it, simply because it is unusual enough, but >I am not sure about hardcoded permission bits. > >If we did "--add-virtual-file=<path>:0644:<contents>" instead from day one, it >certainly adds a few more lines of logic to this patch, and the calling "scalar >diagnose" may have to pass a few more bytes, but I suspect that such a change >would help the project in the longer run. Would not core.filemode=false somewhat simulate this? The consumer-client would not care/do anything with the mode anyway. Or am I missing something? --Randall
<rsbecker@nexbridge.com> writes: >>If we did "--add-virtual-file=<path>:0644:<contents>" instead from day one, it >>certainly adds a few more lines of logic to this patch, and the calling "scalar >>diagnose" may have to pass a few more bytes, but I suspect that such a change >>would help the project in the longer run. > > Would not core.filemode=false somewhat simulate this? The > consumer-client would not care/do anything with the mode > anyway. Or am I missing something? Or I must be missing something. This is part of "git archive" where its output is a tarball (or a zipfile) in which each entry knows its permission bits (or at least, if it is executable). Running "tar xf" or "unzip" on the receiving end of the output of this command should set the executable bit (and other permission bits) correctly I would certainly hope, so it does matter, no? I did say "scalar diagnose" may not care. But a patch to "git archive" will affect other people, and among them there would be people who say "gee, now I can add a handful of files from the command line with their contents, without actually having them in throw-away untracked files, when running 'git archive'. That's handy!", try it out and get disappointed by their inability to create executable files that way. And obviously I care more about "git archive" than "scalar diagnose". I very welcome to enhance the former to support the need for the latter. I do not see a good reason to stop at a half-feature added to the former, even that added half is enough to satisfy the latter, when the other half is not all that hard to add, and it is reasonably expected that users other than "scalar diagnose" would naturally want the other half, too.
Am 11.05.22 um 01:21 schrieb Junio C Hamano: > <rsbecker@nexbridge.com> writes: > >>> If we did "--add-virtual-file=<path>:0644:<contents>" instead from day one, it >>> certainly adds a few more lines of logic to this patch, and the calling "scalar >>> diagnose" may have to pass a few more bytes, but I suspect that such a change >>> would help the project in the longer run. > I did say "scalar diagnose" may not care. But a patch to "git > archive" will affect other people, and among them there would be > people who say "gee, now I can add a handful of files from the > command line with their contents, without actually having them in > throw-away untracked files, when running 'git archive'. That's > handy!", try it out and get disappointed by their inability to > create executable files that way. Which might motivate them to contribute a patch to add that feature. Give them a chance! :) > And obviously I care more about > "git archive" than "scalar diagnose". I very welcome to enhance the > former to support the need for the latter. I do not see a good > reason to stop at a half-feature added to the former, even that > added half is enough to satisfy the latter, when the other half is > not all that hard to add, and it is reasonably expected that users > other than "scalar diagnose" would naturally want the other half, > too. FWIW, I'd already be satisfied by a convincing outline of a way towards a complete solution to accept the partial feature, just to be sure we don't paint ourselves into a corner. But I'm bad at both strategy and saying no, so that's that. Regarding file modes: We only effectively support the executable bit, so an additional option --add-virtual-executable-file=<path>:<contents> would suffice. It would also prevent the false impression that arbitrary file modes can be used ("I said 0123 and got 0644, bug!"). And it would not even be the longest Git option.. René
René Scharfe <l.s.r@web.de> writes: > Am 11.05.22 um 01:21 schrieb Junio C Hamano: >> <rsbecker@nexbridge.com> writes: >> >>>> If we did "--add-virtual-file=<path>:0644:<contents>" instead from day one, it >>>> certainly adds a few more lines of logic to this patch, and the calling "scalar >>>> diagnose" may have to pass a few more bytes, but I suspect that such a change >>>> would help the project in the longer run. > >> I did say "scalar diagnose" may not care. But a patch to "git >> archive" will affect other people, and among them there would be >> people who say "gee, now I can add a handful of files from the >> command line with their contents, without actually having them in >> throw-away untracked files, when running 'git archive'. That's >> handy!", try it out and get disappointed by their inability to >> create executable files that way. > > Which might motivate them to contribute a patch to add that feature. > Give them a chance! :) Yes, but there is no way to reuse the same option in a backward compatible way to later add the mode information, and that is why we want to be careful before a half-feature squats on an option. > FWIW, I'd already be satisfied by a convincing outline of a way towards > a complete solution to accept the partial feature, just to be sure we > don't paint ourselves into a corner. Exactly. As you say, an extra and separate option can be used. I do not know if that is a workaround because we didn't design the first option to take an additional option, or a welcome feature. > Regarding file modes: We only effectively support the executable bit, > so an additional option --add-virtual-executable-file=<path>:<contents> > would suffice. While I do not think we want to support more than one "is it executable or not?" bit, I am not so sure about what the current code does, though, for these "not from a tree, but added as extra files" entries. If you add an extra file from an on-disk untracked file, the add_file_cb() callback picks up the full st.st_mode for the file, and write_archive_entries() in its loop over args->extra_files pass the full info->stat.st_mode down to write_entry(), which is used by archive-tar.c::write_tar_entry() to obtain mode bits pretty much as-is. For tracked paths, we probably are normalizing the blobs between 0644 and 0755 way before the values are passed as "mode" parameter to the write_entry() functions, but for these extra files, there is no such massaging. So, I am OK with --add-virtual-executable=<path>:<contents> (but the point still stands that the way the code in the patch squats in the codepath makes it necessary to first refator it before it can happen) as a separate option. We may want to massage the mode bit we grab from these extra files, if we were to go that route, though. Thanks.
Am 11.05.22 um 21:27 schrieb Junio C Hamano: > René Scharfe <l.s.r@web.de> writes: > >> Regarding file modes: We only effectively support the executable bit, >> so an additional option --add-virtual-executable-file=<path>:<contents> >> would suffice. > > While I do not think we want to support more than one "is it > executable or not?" bit, I am not so sure about what the current > code does, though, for these "not from a tree, but added as extra > files" entries. > > If you add an extra file from an on-disk untracked file, the > add_file_cb() callback picks up the full st.st_mode for the file, > and write_archive_entries() in its loop over args->extra_files pass > the full info->stat.st_mode down to write_entry(), which is used by > archive-tar.c::write_tar_entry() to obtain mode bits pretty much > as-is. Good point. write_tar_entry() actually normalizes the permission bits and applies tar.umask (0002 by default): if (S_ISDIR(mode) || S_ISGITLINK(mode)) { *header.typeflag = TYPEFLAG_DIR; mode = (mode | 0777) & ~tar_umask; } else if (S_ISLNK(mode)) { *header.typeflag = TYPEFLAG_LNK; mode |= 0777; } else if (S_ISREG(mode)) { *header.typeflag = TYPEFLAG_REG; mode = (mode | ((mode & 0100) ? 0777 : 0666)) & ~tar_umask; But write_zip_entry() only normalizes (drops) the permission bits of non-executable files: attr2 = S_ISLNK(mode) ? ((mode | 0777) << 16) : (mode & 0111) ? ((mode) << 16) : 0; if (S_ISLNK(mode) || (mode & 0111)) creator_version = 0x0317; attr2 corresponds to the field "external file attributes" mentioned in the ZIP format specification, APPNOTE.TXT. It's interpreted based on the "version made by" (creator_version here); that 0x03 part above means "UNIX". The default is MS-DOS (FAT filesystem), with effectivly no support for file permissions. So we currently leak permission bits of executable files into ZIP archives, but not tar files. :-| Normalizing those to 0755 would be more consistent. > For tracked paths, we probably are normalizing the blobs > between 0644 and 0755 way before the values are passed as "mode" > parameter to the write_entry() functions, but for these extra files, > there is no such massaging. Right, mode values from read_tree() pass through canon_mode(), so only untracked files (those appended with --add-file) are affected by the leakage mentioned above. René
René Scharfe <l.s.r@web.de> writes: > Good point. write_tar_entry() actually normalizes the permission bits > and applies tar.umask (0002 by default): > > if (S_ISDIR(mode) || S_ISGITLINK(mode)) { > *header.typeflag = TYPEFLAG_DIR; > mode = (mode | 0777) & ~tar_umask; > } else if (S_ISLNK(mode)) { > *header.typeflag = TYPEFLAG_LNK; > mode |= 0777; > } else if (S_ISREG(mode)) { > *header.typeflag = TYPEFLAG_REG; > mode = (mode | ((mode & 0100) ? 0777 : 0666)) & ~tar_umask; Yeah, this side seems to care only about u+x bit, so "add-executable" as a separate option would fly we.. > But write_zip_entry() only normalizes (drops) the permission bits of > non-executable files: > > attr2 = S_ISLNK(mode) ? ((mode | 0777) << 16) : > (mode & 0111) ? ((mode) << 16) : 0; > if (S_ISLNK(mode) || (mode & 0111)) > creator_version = 0x0317; > > attr2 corresponds to the field "external file attributes" mentioned in > the ZIP format specification, APPNOTE.TXT. It's interpreted based on > the "version made by" (creator_version here); that 0x03 part above > means "UNIX". The default is MS-DOS (FAT filesystem), with effectivly > no support for file permissions. > > So we currently leak permission bits of executable files into ZIP > archives, but not tar files. :-| Normalizing those to 0755 would be > more consistent. Yup. >> For tracked paths, we probably are normalizing the blobs >> between 0644 and 0755 way before the values are passed as "mode" >> parameter to the write_entry() functions, but for these extra files, >> there is no such massaging. > > Right, mode values from read_tree() pass through canon_mode(), so only > untracked files (those appended with --add-file) are affected by the > leakage mentioned above. Thanks for sanity-checking.
Junio C Hamano <gitster@pobox.com> writes: >> So we currently leak permission bits of executable files into ZIP >> archives, but not tar files. :-| Normalizing those to 0755 would be >> more consistent. Today, I was scanning the "What's cooking" draft and saw too many topics that are marked with "Expecting a reroll". It turns out that this "mode bits" thing will not be a blocker to make us wait for a reroll of the topic, so let's handle it separately, before we forget, as an independent fix outside the series under discussion. Thanks. --- >8 --- Subject: [PATCH] archive: do not let on-disk mode leak to zip archives When the "--add-file" option is used to add the contents from an untracked file to the archive, the permission mode bits for these files are sent to the archive-backend specific "write_entry()" method as-is. We normalize the mode bits for tracked files way before we pass them to the write_entry() method; we should do the same here. This is not strictly needed for "tar" archive-backend, as it has its own code to further clean them up, but "zip" archive-backend is not so well prepared. Signed-off-by: Junio C Hamano <gitster@pobox.com> --- archive.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/archive.c b/archive.c index e29d0e00f6..12a08af531 100644 --- a/archive.c +++ b/archive.c @@ -342,7 +342,7 @@ int write_archive_entries(struct archiver_args *args, else err = write_entry(args, &fake_oid, path_in_archive.buf, path_in_archive.len, - info->stat.st_mode, + canon_mode(info->stat.st_mode), content.buf, content.len); if (err) break;
Am 12.05.22 um 23:31 schrieb Junio C Hamano: > Junio C Hamano <gitster@pobox.com> writes: > >>> So we currently leak permission bits of executable files into ZIP >>> archives, but not tar files. :-| Normalizing those to 0755 would be >>> more consistent. > > Today, I was scanning the "What's cooking" draft and saw too many > topics that are marked with "Expecting a reroll". It turns out that > this "mode bits" thing will not be a blocker to make us wait for a > reroll of the topic, so let's handle it separately, before we > forget, as an independent fix outside the series under discussion. > > Thanks. > > --- >8 --- > Subject: [PATCH] archive: do not let on-disk mode leak to zip archives > > When the "--add-file" option is used to add the contents from an > untracked file to the archive, the permission mode bits for these > files are sent to the archive-backend specific "write_entry()" > method as-is. We normalize the mode bits for tracked files way > before we pass them to the write_entry() method; we should do the > same here. > > This is not strictly needed for "tar" archive-backend, as it has its > own code to further clean them up, but "zip" archive-backend is not > so well prepared. > > Signed-off-by: Junio C Hamano <gitster@pobox.com> > --- > archive.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/archive.c b/archive.c > index e29d0e00f6..12a08af531 100644 > --- a/archive.c > +++ b/archive.c > @@ -342,7 +342,7 @@ int write_archive_entries(struct archiver_args *args, > else > err = write_entry(args, &fake_oid, path_in_archive.buf, > path_in_archive.len, > - info->stat.st_mode, > + canon_mode(info->stat.st_mode), > content.buf, content.len); > if (err) > break; Looks good to me, thank you! René
diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt index bc4e76a7834..a0edc9167b2 100644 --- a/Documentation/git-archive.txt +++ b/Documentation/git-archive.txt @@ -61,6 +61,17 @@ OPTIONS by concatenating the value for `--prefix` (if any) and the basename of <file>. +--add-file-with-content=<path>:<content>:: + Add the specified contents to the archive. Can be repeated to add + multiple files. The path of the file in the archive is built + by concatenating the value for `--prefix` (if any) and the + basename of <file>. ++ +The `<path>` cannot contain any colon, the file mode is limited to +a regular file, and the option may be subject to platform-dependent +command-line limits. For non-trivial cases, write an untracked file +and use `--add-file` instead. + --worktree-attributes:: Look for attributes in .gitattributes files in the working tree as well (see <<ATTRIBUTES>>). diff --git a/archive.c b/archive.c index a3bbb091256..d798624cd5f 100644 --- a/archive.c +++ b/archive.c @@ -263,6 +263,7 @@ static int queue_or_write_archive_entry(const struct object_id *oid, struct extra_file_info { char *base; struct stat stat; + void *content; }; int write_archive_entries(struct archiver_args *args, @@ -337,7 +338,13 @@ int write_archive_entries(struct archiver_args *args, strbuf_addstr(&path_in_archive, basename(path)); strbuf_reset(&content); - if (strbuf_read_file(&content, path, info->stat.st_size) < 0) + if (info->content) + err = write_entry(args, &fake_oid, path_in_archive.buf, + path_in_archive.len, + info->stat.st_mode, + info->content, info->stat.st_size); + else if (strbuf_read_file(&content, path, + info->stat.st_size) < 0) err = error_errno(_("could not read '%s'"), path); else err = write_entry(args, &fake_oid, path_in_archive.buf, @@ -493,6 +500,7 @@ static void extra_file_info_clear(void *util, const char *str) { struct extra_file_info *info = util; free(info->base); + free(info->content); free(info); } @@ -514,14 +522,38 @@ static int add_file_cb(const struct option *opt, const char *arg, int unset) if (!arg) return -1; - path = prefix_filename(args->prefix, arg); - item = string_list_append_nodup(&args->extra_files, path); - item->util = info = xmalloc(sizeof(*info)); + info = xmalloc(sizeof(*info)); info->base = xstrdup_or_null(base); - if (stat(path, &info->stat)) - die(_("File not found: %s"), path); - if (!S_ISREG(info->stat.st_mode)) - die(_("Not a regular file: %s"), path); + + if (!strcmp(opt->long_name, "add-file")) { + path = prefix_filename(args->prefix, arg); + if (stat(path, &info->stat)) + die(_("File not found: %s"), path); + if (!S_ISREG(info->stat.st_mode)) + die(_("Not a regular file: %s"), path); + info->content = NULL; /* read the file later */ + } else { + const char *colon = strchr(arg, ':'); + char *p; + + if (!colon) + die(_("missing colon: '%s'"), arg); + + p = xstrndup(arg, colon - arg); + if (!args->prefix) + path = p; + else { + path = prefix_filename(args->prefix, p); + free(p); + } + memset(&info->stat, 0, sizeof(info->stat)); + info->stat.st_mode = S_IFREG | 0644; + info->content = xstrdup(colon + 1); + info->stat.st_size = strlen(info->content); + } + item = string_list_append_nodup(&args->extra_files, path); + item->util = info; + return 0; } @@ -554,6 +586,9 @@ static int parse_archive_args(int argc, const char **argv, { OPTION_CALLBACK, 0, "add-file", args, N_("file"), N_("add untracked file to archive"), 0, add_file_cb, (intptr_t)&base }, + { OPTION_CALLBACK, 0, "add-file-with-content", args, + N_("path:content"), N_("add untracked file to archive"), 0, + add_file_cb, (intptr_t)&base }, OPT_STRING('o', "output", &output, N_("file"), N_("write the archive to this file")), OPT_BOOL(0, "worktree-attributes", &worktree_attributes, diff --git a/t/t5003-archive-zip.sh b/t/t5003-archive-zip.sh index 1e6d18b140e..8ff1257f1a0 100755 --- a/t/t5003-archive-zip.sh +++ b/t/t5003-archive-zip.sh @@ -206,6 +206,18 @@ test_expect_success 'git archive --format=zip --add-file' ' check_zip with_untracked check_added with_untracked untracked untracked +test_expect_success UNZIP 'git archive --format=zip --add-file-with-content' ' + git archive --format=zip >with_file_with_content.zip \ + --add-file-with-content=hello:world $EMPTY_TREE && + test_when_finished "rm -rf tmp-unpack" && + mkdir tmp-unpack && ( + cd tmp-unpack && + "$GIT_UNZIP" ../with_file_with_content.zip && + test_path_is_file hello && + test world = $(cat hello) + ) +' + test_expect_success 'git archive --format=zip --add-file twice' ' echo untracked >untracked && git archive --format=zip --prefix=one/ --add-file=untracked \