Message ID | 49ff3c1f2b32b16df2b4216aa016d715b6de46bc.1644187146.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | scalar: implement the subcommand "diagnose" | expand |
Am 06.02.22 um 23:39 schrieb Johannes Schindelin via GitGitGadget: > From: Johannes Schindelin <johannes.schindelin@gmx.de> > > With the `--add-file-with-content=<path>:<content>` option, `git > archive` now supports use cases where relatively trivial files need to > be added that do not exist on disk. > > Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> > --- > Documentation/git-archive.txt | 11 ++++++++ > archive.c | 51 +++++++++++++++++++++++++++++------ > t/t5003-archive-zip.sh | 12 +++++++++ > 3 files changed, 66 insertions(+), 8 deletions(-) > > diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt > index bc4e76a7834..1b52a0a65a1 100644 > --- a/Documentation/git-archive.txt > +++ b/Documentation/git-archive.txt > @@ -61,6 +61,17 @@ OPTIONS > by concatenating the value for `--prefix` (if any) and the > basename of <file>. > > +--add-file-with-content=<path>:<content>:: > + Add the specified contents to the archive. Can be repeated to add > + multiple files. The path of the file in the archive is built > + by concatenating the value for `--prefix` (if any) and the > + basename of <file>. > ++ > +The `<path>` cannot contain any colon, the file mode is limited to > +a regular file, and the option may be subject platform-dependent s/subject/& to/ > +command-line limits. For non-trivial cases, write an untracked file > +and use `--add-file` instead. > + We could use that option in Git's own Makefile to add the file named "version", which contains $GIT_VERSION. Hmm, but it also contains a terminating newline, which would be a bit tricky (but not impossible) to add. Would it make sense to add one automatically if it's missing (e.g. with strbuf_complete_line)? Not sure. > --worktree-attributes:: > Look for attributes in .gitattributes files in the working tree > as well (see <<ATTRIBUTES>>). > diff --git a/archive.c b/archive.c > index a3bbb091256..172efd690c3 100644 > --- a/archive.c > +++ b/archive.c > @@ -263,6 +263,7 @@ static int queue_or_write_archive_entry(const struct object_id *oid, > struct extra_file_info { > char *base; > struct stat stat; > + void *content; > }; > > int write_archive_entries(struct archiver_args *args, > @@ -337,7 +338,13 @@ int write_archive_entries(struct archiver_args *args, > strbuf_addstr(&path_in_archive, basename(path)); > > strbuf_reset(&content); > - if (strbuf_read_file(&content, path, info->stat.st_size) < 0) > + if (info->content) > + err = write_entry(args, &fake_oid, path_in_archive.buf, > + path_in_archive.len, > + info->stat.st_mode, > + info->content, info->stat.st_size); > + else if (strbuf_read_file(&content, path, > + info->stat.st_size) < 0) > err = error_errno(_("could not read '%s'"), path); > else > err = write_entry(args, &fake_oid, path_in_archive.buf, > @@ -493,6 +500,7 @@ static void extra_file_info_clear(void *util, const char *str) > { > struct extra_file_info *info = util; > free(info->base); > + free(info->content); > free(info); > } > > @@ -514,14 +522,38 @@ static int add_file_cb(const struct option *opt, const char *arg, int unset) > if (!arg) > return -1; > > - path = prefix_filename(args->prefix, arg); > - item = string_list_append_nodup(&args->extra_files, path); > - item->util = info = xmalloc(sizeof(*info)); > + info = xmalloc(sizeof(*info)); > info->base = xstrdup_or_null(base); > - if (stat(path, &info->stat)) > - die(_("File not found: %s"), path); > - if (!S_ISREG(info->stat.st_mode)) > - die(_("Not a regular file: %s"), path); > + > + if (strcmp(opt->long_name, "add-file-with-content")) { Equivalent to: if (!strcmp(opt->long_name, "add-file")) { I mention that because the inequality check confused me a bit at first. > + path = prefix_filename(args->prefix, arg); > + if (stat(path, &info->stat)) > + die(_("File not found: %s"), path); > + if (!S_ISREG(info->stat.st_mode)) > + die(_("Not a regular file: %s"), path); > + info->content = NULL; /* read the file later */ > + } else { > + const char *colon = strchr(arg, ':'); > + char *p; > + > + if (!colon) > + die(_("missing colon: '%s'"), arg); > + > + p = xstrndup(arg, colon - arg); > + if (!args->prefix) > + path = p; > + else { > + path = prefix_filename(args->prefix, p); > + free(p); > + } > + memset(&info->stat, 0, sizeof(info->stat)); > + info->stat.st_mode = S_IFREG | 0644; > + info->content = xstrdup(colon + 1); > + info->stat.st_size = strlen(info->content); > + } > + item = string_list_append_nodup(&args->extra_files, path); > + item->util = info; > + > return 0; > } > > @@ -554,6 +586,9 @@ static int parse_archive_args(int argc, const char **argv, > { OPTION_CALLBACK, 0, "add-file", args, N_("file"), > N_("add untracked file to archive"), 0, add_file_cb, > (intptr_t)&base }, > + { OPTION_CALLBACK, 0, "add-file-with-content", args, > + N_("file"), N_("add untracked file to archive"), 0, ^^^^ "<file>" seems wrong, because there is no actual file. It should rather be "<name>:<content>" for the virtual one, right? > + add_file_cb, (intptr_t)&base }, > OPT_STRING('o', "output", &output, N_("file"), > N_("write the archive to this file")), > OPT_BOOL(0, "worktree-attributes", &worktree_attributes, > diff --git a/t/t5003-archive-zip.sh b/t/t5003-archive-zip.sh > index 1e6d18b140e..8ff1257f1a0 100755 > --- a/t/t5003-archive-zip.sh > +++ b/t/t5003-archive-zip.sh > @@ -206,6 +206,18 @@ test_expect_success 'git archive --format=zip --add-file' ' > check_zip with_untracked > check_added with_untracked untracked untracked > > +test_expect_success UNZIP 'git archive --format=zip --add-file-with-content' ' > + git archive --format=zip >with_file_with_content.zip \ > + --add-file-with-content=hello:world $EMPTY_TREE && > + test_when_finished "rm -rf tmp-unpack" && > + mkdir tmp-unpack && ( > + cd tmp-unpack && > + "$GIT_UNZIP" ../with_file_with_content.zip && > + test_path_is_file hello && > + test world = $(cat hello) > + ) > +' > + > test_expect_success 'git archive --format=zip --add-file twice' ' > echo untracked >untracked && > git archive --format=zip --prefix=one/ --add-file=untracked \
René Scharfe <l.s.r@web.de> writes: > We could use that option in Git's own Makefile to add the file named > "version", which contains $GIT_VERSION. Hmm, but it also contains a > terminating newline, which would be a bit tricky (but not impossible) to > add. Would it make sense to add one automatically if it's missing (e.g. > with strbuf_complete_line)? Not sure. I do not think it is a good UI to give raw file content from the command line, which will be usable only for trivial, even single liner files, and forces people to learn two parallel option, one for trivial ones and the other for contents with meaningful size. "--add-blob=<path>:<blob-object-name>" may be another option, useful when you have done "hash-object -w" already, and can be used to add single-liner, or an entire novel. In any case, "--add-file=<file>", which we already have, would be more appropriate feature to use to record our "version" file, so there is no need to change our Makefile for it.
Hi René, On Mon, 7 Feb 2022, René Scharfe wrote: > Am 06.02.22 um 23:39 schrieb Johannes Schindelin via GitGitGadget: > > From: Johannes Schindelin <johannes.schindelin@gmx.de> > > > > With the `--add-file-with-content=<path>:<content>` option, `git > > archive` now supports use cases where relatively trivial files need to > > be added that do not exist on disk. > > > > Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> > > --- > > Documentation/git-archive.txt | 11 ++++++++ > > archive.c | 51 +++++++++++++++++++++++++++++------ > > t/t5003-archive-zip.sh | 12 +++++++++ > > 3 files changed, 66 insertions(+), 8 deletions(-) > > > > diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt > > index bc4e76a7834..1b52a0a65a1 100644 > > --- a/Documentation/git-archive.txt > > +++ b/Documentation/git-archive.txt > > @@ -61,6 +61,17 @@ OPTIONS > > by concatenating the value for `--prefix` (if any) and the > > basename of <file>. > > > > +--add-file-with-content=<path>:<content>:: > > + Add the specified contents to the archive. Can be repeated to add > > + multiple files. The path of the file in the archive is built > > + by concatenating the value for `--prefix` (if any) and the > > + basename of <file>. > > ++ > > +The `<path>` cannot contain any colon, the file mode is limited to > > +a regular file, and the option may be subject platform-dependent > > s/subject/& to/ Thanks. > > +command-line limits. For non-trivial cases, write an untracked file > > +and use `--add-file` instead. > > + > > We could use that option in Git's own Makefile to add the file named > "version", which contains $GIT_VERSION. We could do that, that opportunity is a side effect of this patch series. > Hmm, but it also contains a terminating newline, which would be a bit > tricky (but not impossible) to add. Would it make sense to add one > automatically if it's missing (e.g. with strbuf_complete_line)? Not > sure. It is really easy: LF=' ' git archive --add-file-with-content=version:"$GIT_VERSION$LF" ... (That's shell script, in the Makefile it would need those `\` continuations.) > > --worktree-attributes:: > > Look for attributes in .gitattributes files in the working tree > > as well (see <<ATTRIBUTES>>). > > diff --git a/archive.c b/archive.c > > index a3bbb091256..172efd690c3 100644 > > --- a/archive.c > > +++ b/archive.c > > @@ -263,6 +263,7 @@ static int queue_or_write_archive_entry(const struct object_id *oid, > > struct extra_file_info { > > char *base; > > struct stat stat; > > + void *content; > > }; > > > > int write_archive_entries(struct archiver_args *args, > > @@ -337,7 +338,13 @@ int write_archive_entries(struct archiver_args *args, > > strbuf_addstr(&path_in_archive, basename(path)); > > > > strbuf_reset(&content); > > - if (strbuf_read_file(&content, path, info->stat.st_size) < 0) > > + if (info->content) > > + err = write_entry(args, &fake_oid, path_in_archive.buf, > > + path_in_archive.len, > > + info->stat.st_mode, > > + info->content, info->stat.st_size); > > + else if (strbuf_read_file(&content, path, > > + info->stat.st_size) < 0) > > err = error_errno(_("could not read '%s'"), path); > > else > > err = write_entry(args, &fake_oid, path_in_archive.buf, > > @@ -493,6 +500,7 @@ static void extra_file_info_clear(void *util, const char *str) > > { > > struct extra_file_info *info = util; > > free(info->base); > > + free(info->content); > > free(info); > > } > > > > @@ -514,14 +522,38 @@ static int add_file_cb(const struct option *opt, const char *arg, int unset) > > if (!arg) > > return -1; > > > > - path = prefix_filename(args->prefix, arg); > > - item = string_list_append_nodup(&args->extra_files, path); > > - item->util = info = xmalloc(sizeof(*info)); > > + info = xmalloc(sizeof(*info)); > > info->base = xstrdup_or_null(base); > > - if (stat(path, &info->stat)) > > - die(_("File not found: %s"), path); > > - if (!S_ISREG(info->stat.st_mode)) > > - die(_("Not a regular file: %s"), path); > > + > > + if (strcmp(opt->long_name, "add-file-with-content")) { > > Equivalent to: > > if (!strcmp(opt->long_name, "add-file")) { > > I mention that because the inequality check confused me a bit at first. Good point. For some reason I thought it would be clearer to handle everything but `--add-file-with-content` here, but that "everything but" is only `--add-file`, so I sowed more confusion. Sorry about that. > > > + path = prefix_filename(args->prefix, arg); > > + if (stat(path, &info->stat)) > > + die(_("File not found: %s"), path); > > + if (!S_ISREG(info->stat.st_mode)) > > + die(_("Not a regular file: %s"), path); > > + info->content = NULL; /* read the file later */ > > + } else { > > + const char *colon = strchr(arg, ':'); > > + char *p; > > + > > + if (!colon) > > + die(_("missing colon: '%s'"), arg); > > + > > + p = xstrndup(arg, colon - arg); > > + if (!args->prefix) > > + path = p; > > + else { > > + path = prefix_filename(args->prefix, p); > > + free(p); > > + } > > + memset(&info->stat, 0, sizeof(info->stat)); > > + info->stat.st_mode = S_IFREG | 0644; > > + info->content = xstrdup(colon + 1); > > + info->stat.st_size = strlen(info->content); > > + } > > + item = string_list_append_nodup(&args->extra_files, path); > > + item->util = info; > > + > > return 0; > > } > > > > @@ -554,6 +586,9 @@ static int parse_archive_args(int argc, const char **argv, > > { OPTION_CALLBACK, 0, "add-file", args, N_("file"), > > N_("add untracked file to archive"), 0, add_file_cb, > > (intptr_t)&base }, > > + { OPTION_CALLBACK, 0, "add-file-with-content", args, > > + N_("file"), N_("add untracked file to archive"), 0, > ^^^^ > "<file>" seems wrong, because there is no actual file. It should rather > be "<name>:<content>" for the virtual one, right? Or `<path>:<content>`. Yes. Again, thank you for your clear and helpful review, Dscho
Hi Junio, On Mon, 7 Feb 2022, Junio C Hamano wrote: > René Scharfe <l.s.r@web.de> writes: > > > We could use that option in Git's own Makefile to add the file named > > "version", which contains $GIT_VERSION. Hmm, but it also contains a > > terminating newline, which would be a bit tricky (but not impossible) to > > add. Would it make sense to add one automatically if it's missing (e.g. > > with strbuf_complete_line)? Not sure. > > I do not think it is a good UI to give raw file content from the > command line, which will be usable only for trivial, even single > liner files, and forces people to learn two parallel option, one > for trivial ones and the other for contents with meaningful size. Nevertheless, it is still the most elegant way that I can think of to generate a diagnostic `.zip` file without messing up the very things that are to be diagnosed: the repository and the worktree. > "--add-blob=<path>:<blob-object-name>" may be another option, useful > when you have done "hash-object -w" already, and can be used to add > single-liner, or an entire novel. This would mess with the repository. Granted, it is unlikely that adding a tiny blob will all of a sudden work around a bug that the user wanted to report, but less big mutations have been known to subtly change a bug's manifested symptoms. So I really do not want to do that, not in `scalar diagnose. > In any case, "--add-file=<file>", which we already have, would be > more appropriate feature to use to record our "version" file, so > there is no need to change our Makefile for it. Same here. It is bad enough that `scalar diagnose` has to create a directory in the current enlistment. Let's not make the situation even worse. The most elegant solution would have been that streaming `--add-file` mode suggested by René, I think, but that's too involved to implement just to benefit `scalar diagnose`. It's not like we can simply stream the contents via `stdin`, as there are more than one "virtual" file we need to add to that `.zip` file. Ciao, Dscho
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes: >> > We could use that option in Git's own Makefile to add the file named >> > "version", which contains $GIT_VERSION. Hmm, but it also contains a >> > terminating newline, which would be a bit tricky (but not impossible) to >> > add. Would it make sense to add one automatically if it's missing (e.g. >> > with strbuf_complete_line)? Not sure. >> >> I do not think it is a good UI to give raw file content from the >> command line, which will be usable only for trivial, even single >> liner files, and forces people to learn two parallel option, one >> for trivial ones and the other for contents with meaningful size. > > Nevertheless, it is still the most elegant way that I can think of to > generate a diagnostic `.zip` file without messing up the very things that > are to be diagnosed: the repository and the worktree. Puzzled. Are you feeding contents of a .zip file from the command line? I was mostly worried about busting command line argument limit by trying to feed too many bytes, as the ceiling is fairly low on some platforms. Another worry was that when <contents> can have arbitrary bytes, with --opt=<path>:<contents> syntax, the input becomes ambiguous (i.e. "which colon is the <path> separator?"), without some way to escape a colon in the payload. For a single-liner, --add-file-with-contents=<path>:<contents> would be an OK way, and my comment was not a strong objection against this new option existing. It was primarily an objection against changing the way to add the 'version' file in our "make dist" procedure to use it anyway. But now I think about it more, I am becoming less happy about it existing in the first place. This will throw another monkey wrench to Konstantin's plan [*] to make "git archive" output verifiable with the signature on original Git objects, but it is not a new problem ;-) [Reference] * https://lore.kernel.org/git/20220207213449.ljqjhdx4f45a3lx5@meerkat.local/
Am 08.02.22 um 18:44 schrieb Junio C Hamano: > Johannes Schindelin <Johannes.Schindelin@gmx.de> writes: > >>>> We could use that option in Git's own Makefile to add the file named >>>> "version", which contains $GIT_VERSION. Hmm, but it also contains a >>>> terminating newline, which would be a bit tricky (but not impossible) to >>>> add. Would it make sense to add one automatically if it's missing (e.g. >>>> with strbuf_complete_line)? Not sure. >>> >>> I do not think it is a good UI to give raw file content from the >>> command line, which will be usable only for trivial, even single >>> liner files, and forces people to learn two parallel option, one >>> for trivial ones and the other for contents with meaningful size. >> >> Nevertheless, it is still the most elegant way that I can think of to >> generate a diagnostic `.zip` file without messing up the very things that >> are to be diagnosed: the repository and the worktree. > > Puzzled. Are you feeding contents of a .zip file from the command > line? Kind of. Command line arguments are built and handed to write_archive() in-process. It's done by patch 3 and extended by 5 and 6. The number of files is relatively low and they aren't huge, right? Staging their content in the object database would be messy, but $TMPDIR might be able to take them with a low impact. Unless the problem to diagnose is that this directory is full -- but you don't need a fancy report for that. :) Currently there is no easy way to write a temporary file with a chosen name. diff.c would benefit from such a thing when running an external diff program; currently it adds a random prefix. git archive --add-file also uses the filename (and discards the directory part). The patch below adds a function to create temporary files with a chosen name. Perhaps it would be useful here as well, instead of the new option? > I was mostly worried about busting command line argument limit by > trying to feed too many bytes, as the ceiling is fairly low on some > platforms. Command line length limits don't apply to the way scalar uses the new option. > Another worry was that when <contents> can have > arbitrary bytes, with --opt=<path>:<contents> syntax, the input > becomes ambiguous (i.e. "which colon is the <path> separator?"), > without some way to escape a colon in the payload. The first colon is the separator here. > For a single-liner, --add-file-with-contents=<path>:<contents> would > be an OK way, and my comment was not a strong objection against this > new option existing. It was primarily an objection against changing > the way to add the 'version' file in our "make dist" procedure to > use it anyway. > > But now I think about it more, I am becoming less happy about it > existing in the first place. > > This will throw another monkey wrench to Konstantin's plan [*] to > make "git archive" output verifiable with the signature on original > Git objects, but it is not a new problem ;-) > > > [Reference] > > * https://lore.kernel.org/git/20220207213449.ljqjhdx4f45a3lx5@meerkat.local/ I don't see the conflict: If an untracked file is added to an archive using --add-file, --add-file-with-content, or ZIP or tar then we'd *want* the verification against a signed commit or tag to fail, no? A different signature would be required for the non-tracked parts. René --- >8 --- Subject: [PATCH] tempfile: add mks_tempfile_dt() Add a function to create a temporary file with a certain name in a temporary directory created using mkdtemp(3). Its result is more sightly than the paths created by mks_tempfile_ts(), which include a random prefix. That's useful for files passed to a program that displays their name, e.g. an external diff tool. Signed-off-by: René Scharfe <l.s.r@web.de> --- tempfile.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ tempfile.h | 13 +++++++++++ 2 files changed, 76 insertions(+) diff --git a/tempfile.c b/tempfile.c index 94aa18f3f7..2024c82691 100644 --- a/tempfile.c +++ b/tempfile.c @@ -56,6 +56,20 @@ static VOLATILE_LIST_HEAD(tempfile_list); +static void remove_template_directory(struct tempfile *tempfile, + int in_signal_handler) +{ + if (tempfile->directorylen > 0 && + tempfile->directorylen < tempfile->filename.len && + tempfile->filename.buf[tempfile->directorylen] == '/') { + strbuf_setlen(&tempfile->filename, tempfile->directorylen); + if (in_signal_handler) + rmdir(tempfile->filename.buf); + else + rmdir_or_warn(tempfile->filename.buf); + } +} + static void remove_tempfiles(int in_signal_handler) { pid_t me = getpid(); @@ -74,6 +88,7 @@ static void remove_tempfiles(int in_signal_handler) unlink(p->filename.buf); else unlink_or_warn(p->filename.buf); + remove_template_directory(p, in_signal_handler); p->active = 0; } @@ -100,6 +115,7 @@ static struct tempfile *new_tempfile(void) tempfile->owner = 0; INIT_LIST_HEAD(&tempfile->list); strbuf_init(&tempfile->filename, 0); + tempfile->directorylen = 0; return tempfile; } @@ -198,6 +214,52 @@ struct tempfile *mks_tempfile_tsm(const char *filename_template, int suffixlen, return tempfile; } +struct tempfile *mks_tempfile_dt(const char *directory_template, + const char *filename) +{ + struct tempfile *tempfile; + const char *tmpdir; + struct strbuf sb = STRBUF_INIT; + int fd; + size_t directorylen; + + if (!ends_with(directory_template, "XXXXXX")) { + errno = EINVAL; + return NULL; + } + + tmpdir = getenv("TMPDIR"); + if (!tmpdir) + tmpdir = "/tmp"; + + strbuf_addf(&sb, "%s/%s", tmpdir, directory_template); + directorylen = sb.len; + if (!mkdtemp(sb.buf)) { + int orig_errno = errno; + strbuf_release(&sb); + errno = orig_errno; + return NULL; + } + + strbuf_addf(&sb, "/%s", filename); + fd = open(sb.buf, O_CREAT | O_EXCL | O_RDWR, 0600); + if (fd < 0) { + int orig_errno = errno; + strbuf_setlen(&sb, directorylen); + rmdir(sb.buf); + strbuf_release(&sb); + errno = orig_errno; + return NULL; + } + + tempfile = new_tempfile(); + strbuf_swap(&tempfile->filename, &sb); + tempfile->directorylen = directorylen; + tempfile->fd = fd; + activate_tempfile(tempfile); + return tempfile; +} + struct tempfile *xmks_tempfile_m(const char *filename_template, int mode) { struct tempfile *tempfile; @@ -316,6 +378,7 @@ void delete_tempfile(struct tempfile **tempfile_p) close_tempfile_gently(tempfile); unlink_or_warn(tempfile->filename.buf); + remove_template_directory(tempfile, 0); deactivate_tempfile(tempfile); *tempfile_p = NULL; } diff --git a/tempfile.h b/tempfile.h index 4de3bc77d2..d7804a214a 100644 --- a/tempfile.h +++ b/tempfile.h @@ -82,6 +82,7 @@ struct tempfile { FILE *volatile fp; volatile pid_t owner; struct strbuf filename; + size_t directorylen; }; /* @@ -198,6 +199,18 @@ static inline struct tempfile *xmks_tempfile(const char *filename_template) return xmks_tempfile_m(filename_template, 0600); } +/* + * Attempt to create a temporary directory in $TMPDIR and to create and + * open a file in that new directory. Derive the directory name from the + * template in the manner of mkdtemp(). Arrange for directory and file + * to be deleted if the program exits before they are deleted + * explicitly. On success return a tempfile whose "filename" member + * contains the full path of the file and its "fd" member is open for + * writing the file. On error return NULL and set errno appropriately. + */ +struct tempfile *mks_tempfile_dt(const char *directory_template, + const char *filename); + /* * Associate a stdio stream with the temporary file (which must still * be open). Return `NULL` (*without* deleting the file) on error. The -- 2.35.1
René Scharfe <l.s.r@web.de> writes: >>> Nevertheless, it is still the most elegant way that I can think of to >>> generate a diagnostic `.zip` file without messing up the very things that >>> are to be diagnosed: the repository and the worktree. >> >> Puzzled. Are you feeding contents of a .zip file from the command >> line? > > Kind of. Command line arguments are built and handed to write_archive() > in-process. It's done by patch 3 and extended by 5 and 6. I meant to ask if this is doing git archive --store-contents-at-path="report.zip:$(cat diag.zip)" as I misunderstood what 'the diagnostic .zip file' referred to. That was a reference to the output of the "git archive" command. > The number of files is relatively low and they aren't huge, right? As long as it is expected to fit on the command line, that's fine. But if the question is "it is OK to add a new option with known limitation", then it should be stated a bit differently. "We add this option for use cases where we handle only small number of one-liner files", and it is OK. We may however want to do something imilar to what we do to the "-m '<message>'" option used by "git commit" and "git merge", i.e. add the final LF when it is missing to make it a complete line, to hint the fact that this is meant to add a small number of single liner files. >> Another worry was that when <contents> can have >> arbitrary bytes, with --opt=<path>:<contents> syntax, the input >> becomes ambiguous (i.e. "which colon is the <path> separator?"), >> without some way to escape a colon in the payload. > > The first colon is the separator here. Meaning you cannot have a colon in the path, which is not exactly pleasing limitation. I know you may not be able to do so on Windows or CIFS mounted on non-Windows, but we do not limit ourselves to portable filename character set (POSIX.1 3.282), either. >> This will throw another monkey wrench to Konstantin's plan [*] to >> make "git archive" output verifiable with the signature on original >> Git objects, but it is not a new problem ;-) >> >> >> [Reference] >> >> * https://lore.kernel.org/git/20220207213449.ljqjhdx4f45a3lx5@meerkat.local/ > > I don't see the conflict: If an untracked file is added to an archive > using --add-file, --add-file-with-content, or ZIP or tar then we'd > *want* the verification against a signed commit or tag to fail, no? A > different signature would be required for the non-tracked parts. Yes, which is exactly how this (and existing --add-file) makes Konstantin's plan much less useful. Thanks.
Am 09.02.22 um 23:48 schrieb Junio C Hamano: > René Scharfe <l.s.r@web.de> writes: > >> The number of files is relatively low and they aren't huge, right? > > As long as it is expected to fit on the command line, that's fine. > But if the question is "it is OK to add a new option with known > limitation", then it should be stated a bit differently. I asked this question to find out if writing the files to $TMPDIR and adding them with --add-file instead of with --add-file-with-content would be feasible in patches 3 to 6. git archive would not have to be changed in that case. >>> This will throw another monkey wrench to Konstantin's plan [*] to >>> make "git archive" output verifiable with the signature on original >>> Git objects, but it is not a new problem ;-) >>> >>> >>> [Reference] >>> >>> * https://lore.kernel.org/git/20220207213449.ljqjhdx4f45a3lx5@meerkat.local/ >> >> I don't see the conflict: If an untracked file is added to an archive >> using --add-file, --add-file-with-content, or ZIP or tar then we'd >> *want* the verification against a signed commit or tag to fail, no? A >> different signature would be required for the non-tracked parts. > > Yes, which is exactly how this (and existing --add-file) makes > Konstantin's plan much less useful. People added untracked files to archives before --add-file existed. --add-file-with-content could be used to add the .GIT_ARCHIVE_SIG file. Additional untracked files would need a manifest to specify which files are (not) covered by the signed commit/tag. Or the .GIT_ARCHIVE_SIG files could be added just after the signed files as a rule, before any other untracked files, as some kind of a separator. Just listing untracked files and verifying the others might still be useful. Warning about untracked files shadowing tracked ones would be very useful. Some equivalent to the .GIT_ARCHIVE_SIG file containing a signature of the untracked files could optionally be added at the end to allow full verification -- but would require signing at archive creation time. René
René Scharfe <l.s.r@web.de> writes: >> Yes, which is exactly how this (and existing --add-file) makes >> Konstantin's plan much less useful. > People added untracked files to archives before --add-file existed. > > --add-file-with-content could be used to add the .GIT_ARCHIVE_SIG file. > > Additional untracked files would need a manifest to specify which files > are (not) covered by the signed commit/tag. Or the .GIT_ARCHIVE_SIG > files could be added just after the signed files as a rule, before any > other untracked files, as some kind of a separator. Or if people do not _exclude_ tracked files from the archive, then the verifier who has a tarball and a Git tree object can consult the tree object to see which ones are added untracked cruft. > Just listing untracked files and verifying the others might still be > useful. Warning about untracked files shadowing tracked ones would be > very useful. Yup. > Some equivalent to the .GIT_ARCHIVE_SIG file containing a signature of > the untracked files could optionally be added at the end to allow full > verification -- but would require signing at archive creation time. Yeah, and at that point, it is not much more convenient than just signing the whole archive (sans the SIG part, obviously), which is what people have always done ;-)
Am 10.02.22 um 20:23 schrieb Junio C Hamano: > René Scharfe <l.s.r@web.de> writes: > >>> Yes, which is exactly how this (and existing --add-file) makes >>> Konstantin's plan much less useful. A harder obstacle to verification would be end-of-line conversion. Retrying a failed signature check after applying convert_to_git() might work, but not for files that have mixed line endings in the repository and end up being homogenized during checkout (and thus archiving). >> People added untracked files to archives before --add-file existed. >> >> --add-file-with-content could be used to add the .GIT_ARCHIVE_SIG file. >> >> Additional untracked files would need a manifest to specify which files >> are (not) covered by the signed commit/tag. Or the .GIT_ARCHIVE_SIG >> files could be added just after the signed files as a rule, before any >> other untracked files, as some kind of a separator. > > Or if people do not _exclude_ tracked files from the archive, then > the verifier who has a tarball and a Git tree object can consult the > tree object to see which ones are added untracked cruft. True, but if you have the tree objects then you probably also have the blobs and don't need the archive? Or is this some kind of sparse checkout scenario? >> Some equivalent to the .GIT_ARCHIVE_SIG file containing a signature of >> the untracked files could optionally be added at the end to allow full >> verification -- but would require signing at archive creation time. > > Yeah, and at that point, it is not much more convenient than just > signing the whole archive (sans the SIG part, obviously), which is > what people have always done ;-) Indeed. René
René Scharfe <l.s.r@web.de> writes: >> Or if people do not _exclude_ tracked files from the archive, then >> the verifier who has a tarball and a Git tree object can consult the >> tree object to see which ones are added untracked cruft. > > True, but if you have the tree objects then you probably also have the > blobs and don't need the archive? Or is this some kind of sparse > checkout scenario? My phrasing was too loose. This is a "how to verify a distro tarball" (without having a copy of the project repository, but with some common tools like "git") scenario. The verifier has a tarball. In addition, the verifier knows the object name of the Git tree object the tarball was taken from, and somehow trusts that the object name is genuine. We can do either "untar + git-add . && git write-tree" or its equivalent to see how the contents hashes to the expected tree (or not). How the verifier trusts the object name is out of scope (it may come from a copy of a signed tag object and a copy of the commit object that the tag points at and the contents of signed tag object, with its known format, would allow you to write a stand alone tool to verify the PGP signature). Line-end normalization and smudge filter rules may get in the way, if we truly did "untar" to the filesystem, but I thought "git archive" didn't do smudge conversion and core.crlf handling when creating the archive?
Am 11.02.22 um 22:27 schrieb Junio C Hamano: > René Scharfe <l.s.r@web.de> writes: > >>> Or if people do not _exclude_ tracked files from the archive, then >>> the verifier who has a tarball and a Git tree object can consult the >>> tree object to see which ones are added untracked cruft. >> >> True, but if you have the tree objects then you probably also have the >> blobs and don't need the archive? Or is this some kind of sparse >> checkout scenario? > > My phrasing was too loose. This is a "how to verify a distro > tarball" (without having a copy of the project repository, but with > some common tools like "git") scenario. > > The verifier has a tarball. In addition, the verifier knows the > object name of the Git tree object the tarball was taken from, and > somehow trusts that the object name is genuine. We can do either > "untar + git-add . && git write-tree" or its equivalent to see how > the contents hashes to the expected tree (or not). > > How the verifier trusts the object name is out of scope (it may come > from a copy of a signed tag object and a copy of the commit object > that the tag points at and the contents of signed tag object, with > its known format, would allow you to write a stand alone tool to > verify the PGP signature). Right, but the tree hash does not directly allow to see which objects are tracked or not. This information is necessary to reconstruct the signed tree. (Having tracked files first, then the signature file and then untracked files in the archive would be an easy way to transmit it.) > Line-end normalization and smudge filter rules may get in the way, > if we truly did "untar" to the filesystem, but I thought "git > archive" didn't do smudge conversion and core.crlf handling when > creating the archive? git archive uses convert_to_working_tree() to archive the same file contents as tar or zip would. René
René Scharfe <l.s.r@web.de> writes: >> The verifier has a tarball. In addition, the verifier knows the >> object name of the Git tree object the tarball was taken from, and >> somehow trusts that the object name is genuine. We can do either >> "untar + git-add . && git write-tree" or its equivalent to see how >> the contents hashes to the expected tree (or not). > ... > Right, but the tree hash does not directly allow to see which objects > are tracked or not. Ah, of course---it was silly of me to overlook this obvious fact X-<. So we do need some extra "manifest" to declare what's untracked etc., if we allow --add-file etc. to munge the tree when creating a tarball out of it.
Am 13.02.22 um 07:25 schrieb Junio C Hamano: > > So we do need some extra "manifest" to declare what's untracked etc., > if we allow --add-file etc. to munge the tree when creating a tarball > out of it. Right, or get that information from the order of files in the archive, by having tracked files come first, then the signature file with a certain name and then untracked files. René
René Scharfe <l.s.r@web.de> writes: > Am 13.02.22 um 07:25 schrieb Junio C Hamano: >> >> So we do need some extra "manifest" to declare what's untracked etc., >> if we allow --add-file etc. to munge the tree when creating a tarball >> out of it. > > Right, or get that information from the order of files in the archive, > by having tracked files come first, then the signature file with a > certain name and then untracked files. That sounds like a workable approach, modulo that the details of the "signature file with a certain name" part needs to be worked out. We should make sure that we clearly document that "--add-file=" and friends add their material after the contents that come from the tree-ish, and make sure that the program does so and will stay doing so. Otherwise users cannot easily create an archive that follows the above rule. Thanks.
diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt index bc4e76a7834..1b52a0a65a1 100644 --- a/Documentation/git-archive.txt +++ b/Documentation/git-archive.txt @@ -61,6 +61,17 @@ OPTIONS by concatenating the value for `--prefix` (if any) and the basename of <file>. +--add-file-with-content=<path>:<content>:: + Add the specified contents to the archive. Can be repeated to add + multiple files. The path of the file in the archive is built + by concatenating the value for `--prefix` (if any) and the + basename of <file>. ++ +The `<path>` cannot contain any colon, the file mode is limited to +a regular file, and the option may be subject platform-dependent +command-line limits. For non-trivial cases, write an untracked file +and use `--add-file` instead. + --worktree-attributes:: Look for attributes in .gitattributes files in the working tree as well (see <<ATTRIBUTES>>). diff --git a/archive.c b/archive.c index a3bbb091256..172efd690c3 100644 --- a/archive.c +++ b/archive.c @@ -263,6 +263,7 @@ static int queue_or_write_archive_entry(const struct object_id *oid, struct extra_file_info { char *base; struct stat stat; + void *content; }; int write_archive_entries(struct archiver_args *args, @@ -337,7 +338,13 @@ int write_archive_entries(struct archiver_args *args, strbuf_addstr(&path_in_archive, basename(path)); strbuf_reset(&content); - if (strbuf_read_file(&content, path, info->stat.st_size) < 0) + if (info->content) + err = write_entry(args, &fake_oid, path_in_archive.buf, + path_in_archive.len, + info->stat.st_mode, + info->content, info->stat.st_size); + else if (strbuf_read_file(&content, path, + info->stat.st_size) < 0) err = error_errno(_("could not read '%s'"), path); else err = write_entry(args, &fake_oid, path_in_archive.buf, @@ -493,6 +500,7 @@ static void extra_file_info_clear(void *util, const char *str) { struct extra_file_info *info = util; free(info->base); + free(info->content); free(info); } @@ -514,14 +522,38 @@ static int add_file_cb(const struct option *opt, const char *arg, int unset) if (!arg) return -1; - path = prefix_filename(args->prefix, arg); - item = string_list_append_nodup(&args->extra_files, path); - item->util = info = xmalloc(sizeof(*info)); + info = xmalloc(sizeof(*info)); info->base = xstrdup_or_null(base); - if (stat(path, &info->stat)) - die(_("File not found: %s"), path); - if (!S_ISREG(info->stat.st_mode)) - die(_("Not a regular file: %s"), path); + + if (strcmp(opt->long_name, "add-file-with-content")) { + path = prefix_filename(args->prefix, arg); + if (stat(path, &info->stat)) + die(_("File not found: %s"), path); + if (!S_ISREG(info->stat.st_mode)) + die(_("Not a regular file: %s"), path); + info->content = NULL; /* read the file later */ + } else { + const char *colon = strchr(arg, ':'); + char *p; + + if (!colon) + die(_("missing colon: '%s'"), arg); + + p = xstrndup(arg, colon - arg); + if (!args->prefix) + path = p; + else { + path = prefix_filename(args->prefix, p); + free(p); + } + memset(&info->stat, 0, sizeof(info->stat)); + info->stat.st_mode = S_IFREG | 0644; + info->content = xstrdup(colon + 1); + info->stat.st_size = strlen(info->content); + } + item = string_list_append_nodup(&args->extra_files, path); + item->util = info; + return 0; } @@ -554,6 +586,9 @@ static int parse_archive_args(int argc, const char **argv, { OPTION_CALLBACK, 0, "add-file", args, N_("file"), N_("add untracked file to archive"), 0, add_file_cb, (intptr_t)&base }, + { OPTION_CALLBACK, 0, "add-file-with-content", args, + N_("file"), N_("add untracked file to archive"), 0, + add_file_cb, (intptr_t)&base }, OPT_STRING('o', "output", &output, N_("file"), N_("write the archive to this file")), OPT_BOOL(0, "worktree-attributes", &worktree_attributes, diff --git a/t/t5003-archive-zip.sh b/t/t5003-archive-zip.sh index 1e6d18b140e..8ff1257f1a0 100755 --- a/t/t5003-archive-zip.sh +++ b/t/t5003-archive-zip.sh @@ -206,6 +206,18 @@ test_expect_success 'git archive --format=zip --add-file' ' check_zip with_untracked check_added with_untracked untracked untracked +test_expect_success UNZIP 'git archive --format=zip --add-file-with-content' ' + git archive --format=zip >with_file_with_content.zip \ + --add-file-with-content=hello:world $EMPTY_TREE && + test_when_finished "rm -rf tmp-unpack" && + mkdir tmp-unpack && ( + cd tmp-unpack && + "$GIT_UNZIP" ../with_file_with_content.zip && + test_path_is_file hello && + test world = $(cat hello) + ) +' + test_expect_success 'git archive --format=zip --add-file twice' ' echo untracked >untracked && git archive --format=zip --prefix=one/ --add-file=untracked \