Message ID | 0d570137bb6aef675f4f5d74d140ace1dfba5eb7.1644187146.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | scalar: implement the subcommand "diagnose" | expand |
Am 06.02.22 um 23:39 schrieb Johannes Schindelin via GitGitGadget: > From: Johannes Schindelin <johannes.schindelin@gmx.de> > > Over the course of Scalar's development, it became obvious that there is > a need for a command that can gather all kinds of useful information > that can help identify the most typical problems with large > worktrees/repositories. > > The `diagnose` command is the culmination of this hard-won knowledge: it > gathers the installed hooks, the config, a couple statistics describing > the data shape, among other pieces of information, and then wraps > everything up in a tidy, neat `.zip` archive. > > Note: originally, Scalar was implemented in C# using the .NET API, where > we had the luxury of a comprehensive standard library that includes > basic functionality such as writing a `.zip` file. In the C version, we > lack such a commodity. Rather than introducing a dependency on, say, > libzip, we slightly abuse Git's `archive` command: Instead of writing > the `.zip` file directly, we stage the file contents in a Git index of a > temporary, bare repository, only to let `git archive` have at it, and > finally removing the temporary repository. > > Also note: Due to the frequently-spawned `git hash-object` processes, > this command is quite a bit slow on Windows. Should it turn out to be a > big problem, the lack of a batch mode of the `hash-object` command could > potentially be worked around via using `git fast-import` with a crafted > `stdin`. The two paragraphs above are not in sync with the patch. > > Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> > --- > contrib/scalar/scalar.c | 143 +++++++++++++++++++++++++++++++ > contrib/scalar/scalar.txt | 12 +++ > contrib/scalar/t/t9099-scalar.sh | 14 +++ > 3 files changed, 169 insertions(+) > > diff --git a/contrib/scalar/scalar.c b/contrib/scalar/scalar.c > index 00dcd4b50ef..30ce0799c7a 100644 > --- a/contrib/scalar/scalar.c > +++ b/contrib/scalar/scalar.c > @@ -11,6 +11,7 @@ > #include "dir.h" > #include "packfile.h" > #include "help.h" > +#include "archive.h" > > /* > * Remove the deepest subdirectory in the provided path string. Path must not > @@ -261,6 +262,44 @@ static int unregister_dir(void) > return res; > } > > +static int add_directory_to_archiver(struct strvec *archiver_args, > + const char *path, int recurse) > +{ > + int at_root = !*path; > + DIR *dir = opendir(at_root ? "." : path); > + struct dirent *e; > + struct strbuf buf = STRBUF_INIT; > + size_t len; > + int res = 0; > + > + if (!dir) > + return error(_("could not open directory '%s'"), path); > + > + if (!at_root) > + strbuf_addf(&buf, "%s/", path); > + len = buf.len; > + strvec_pushf(archiver_args, "--prefix=%s", buf.buf); > + > + while (!res && (e = readdir(dir))) { > + if (!strcmp(".", e->d_name) || !strcmp("..", e->d_name)) > + continue; > + > + strbuf_setlen(&buf, len); > + strbuf_addstr(&buf, e->d_name); > + > + if (e->d_type == DT_REG) > + strvec_pushf(archiver_args, "--add-file=%s", buf.buf); > + else if (e->d_type != DT_DIR) > + res = -1; > + else if (recurse) > + add_directory_to_archiver(archiver_args, buf.buf, recurse); > + } > + > + closedir(dir); > + strbuf_release(&buf); > + return res; > +} > + > /* printf-style interface, expects `<key>=<value>` argument */ > static int set_config(const char *fmt, ...) > { > @@ -501,6 +540,109 @@ cleanup: > return res; > } > > +static int cmd_diagnose(int argc, const char **argv) > +{ > + struct option options[] = { > + OPT_END(), > + }; > + const char * const usage[] = { > + N_("scalar diagnose [<enlistment>]"), > + NULL > + }; > + struct strbuf zip_path = STRBUF_INIT; > + struct strvec archiver_args = STRVEC_INIT; > + char **argv_copy = NULL; > + int stdout_fd = -1, archiver_fd = -1; > + time_t now = time(NULL); > + struct tm tm; > + struct strbuf path = STRBUF_INIT, buf = STRBUF_INIT; > + size_t off; > + int res = 0; > + > + argc = parse_options(argc, argv, NULL, options, > + usage, 0); > + > + setup_enlistment_directory(argc, argv, usage, options, &zip_path); > + > + strbuf_addstr(&zip_path, "/.scalarDiagnostics/scalar_"); > + strbuf_addftime(&zip_path, > + "%Y%m%d_%H%M%S", localtime_r(&now, &tm), 0, 0); > + strbuf_addstr(&zip_path, ".zip"); > + switch (safe_create_leading_directories(zip_path.buf)) { > + case SCLD_EXISTS: > + case SCLD_OK: > + break; > + default: > + error_errno(_("could not create directory for '%s'"), > + zip_path.buf); > + goto diagnose_cleanup; > + } > + stdout_fd = dup(1); > + if (stdout_fd < 0) { > + res = error_errno(_("could not duplicate stdout")); > + goto diagnose_cleanup; > + } > + > + archiver_fd = xopen(zip_path.buf, O_CREAT | O_WRONLY | O_TRUNC, 0666); > + if (archiver_fd < 0 || dup2(archiver_fd, 1) < 0) { > + res = error_errno(_("could not redirect output")); > + goto diagnose_cleanup; > + } > + > + init_zip_archiver(); > + strvec_pushl(&archiver_args, "scalar-diagnose", "--format=zip", NULL); > + > + strbuf_reset(&buf); > + strbuf_addstr(&buf, > + "--add-file-with-content=diagnostics.log:" > + "Collecting diagnostic info\n\n"); > + get_version_info(&buf, 1); > + > + strbuf_addf(&buf, "Enlistment root: %s\n", the_repository->worktree); > + off = strchr(buf.buf, ':') + 1 - buf.buf; > + write_or_die(stdout_fd, buf.buf + off, buf.len - off); > + strvec_push(&archiver_args, buf.buf); Fun trick to reuse the buffer for both the ZIP entry and stdout. :) I'd have omitted the option from buf and added it like this, for simplicity: strvec_pushf(&archiver_args, "--add-file-with-content=diagnostics.log:%s", buf.buf); Just a thought. > + > + if ((res = add_directory_to_archiver(&archiver_args, ".git", 0)) || > + (res = add_directory_to_archiver(&archiver_args, ".git/hooks", 0)) || > + (res = add_directory_to_archiver(&archiver_args, ".git/info", 0)) || > + (res = add_directory_to_archiver(&archiver_args, ".git/logs", 1)) || > + (res = add_directory_to_archiver(&archiver_args, ".git/objects/info", 0))) > + goto diagnose_cleanup; > + > + strvec_pushl(&archiver_args, "--prefix=", > + oid_to_hex(the_hash_algo->empty_tree), "--", NULL); > + > + /* `write_archive()` modifies the `argv` passed to it. Let it. */ > + argv_copy = xmemdupz(archiver_args.v, > + sizeof(char *) * archiver_args.nr); Leaking the whole thing would be fine as well for this command, but cleaning up is tidier, of course. > + res = write_archive(archiver_args.nr, (const char **)argv_copy, NULL, > + the_repository, NULL, 0); Ah -- no shell means no command line length limits. :) > + if (res) { > + error(_("failed to write archive")); > + goto diagnose_cleanup; > + } > + > + if (!res) > + printf("\n" > + "Diagnostics complete.\n" > + "All of the gathered info is captured in '%s'\n", > + zip_path.buf); Is this message appended to the ZIP file or does it go to stdout? In any case: mixing write(2) and stdio(3) is not a good idea. Using fwrite(3) instead of write_or_die above and doing the stdout dup(2) dance only tightly around the write_archive call would help, I think. > + > +diagnose_cleanup: > + if (archiver_fd >= 0) { > + close(1); > + dup2(stdout_fd, 1); > + } > + free(argv_copy); > + strvec_clear(&archiver_args); > + strbuf_release(&zip_path); > + strbuf_release(&path); > + strbuf_release(&buf); > + > + return res; > +} > + > static int cmd_list(int argc, const char **argv) > { > if (argc != 1) > @@ -802,6 +944,7 @@ static struct { > { "reconfigure", cmd_reconfigure }, > { "delete", cmd_delete }, > { "version", cmd_version }, > + { "diagnose", cmd_diagnose }, > { NULL, NULL}, > }; > > diff --git a/contrib/scalar/scalar.txt b/contrib/scalar/scalar.txt > index f416d637289..22583fe046e 100644 > --- a/contrib/scalar/scalar.txt > +++ b/contrib/scalar/scalar.txt > @@ -14,6 +14,7 @@ scalar register [<enlistment>] > scalar unregister [<enlistment>] > scalar run ( all | config | commit-graph | fetch | loose-objects | pack-files ) [<enlistment>] > scalar reconfigure [ --all | <enlistment> ] > +scalar diagnose [<enlistment>] > scalar delete <enlistment> > > DESCRIPTION > @@ -129,6 +130,17 @@ reconfigure the enlistment. > With the `--all` option, all enlistments currently registered with Scalar > will be reconfigured. Use this option after each Scalar upgrade. > > +Diagnose > +~~~~~~~~ > + > +diagnose [<enlistment>]:: > + When reporting issues with Scalar, it is often helpful to provide the > + information gathered by this command, including logs and certain > + statistics describing the data shape of the current enlistment. > ++ > +The output of this command is a `.zip` file that is written into > +a directory adjacent to the worktree in the `src` directory. > + > Delete > ~~~~~~ > > diff --git a/contrib/scalar/t/t9099-scalar.sh b/contrib/scalar/t/t9099-scalar.sh > index 9d83fdf25e8..bbd07a44426 100755 > --- a/contrib/scalar/t/t9099-scalar.sh > +++ b/contrib/scalar/t/t9099-scalar.sh > @@ -90,4 +90,18 @@ test_expect_success '`scalar [...] <dir>` errors out when dir is missing' ' > grep "cloned. does not exist" err > ' > > +SQ="'" > +test_expect_success UNZIP 'scalar diagnose' ' > + scalar clone "file://$(pwd)" cloned --single-branch && > + scalar diagnose cloned >out && > + sed -n "s/.*$SQ\\(.*\\.zip\\)$SQ.*/\\1/p" <out >zip_path && > + zip_path=$(cat zip_path) && > + test -n "$zip_path" && > + unzip -v "$zip_path" && > + folder=${zip_path%.zip} && > + test_path_is_missing "$folder" && > + unzip -p "$zip_path" diagnostics.log >out && > + test_file_not_empty out > +' > + > test_done
Hi René, On Mon, 7 Feb 2022, René Scharfe wrote: > > Note: originally, Scalar was implemented in C# using the .NET API, where > > we had the luxury of a comprehensive standard library that includes > > basic functionality such as writing a `.zip` file. In the C version, we > > lack such a commodity. Rather than introducing a dependency on, say, > > libzip, we slightly abuse Git's `archive` command: Instead of writing > > the `.zip` file directly, we stage the file contents in a Git index of a > > temporary, bare repository, only to let `git archive` have at it, and > > finally removing the temporary repository. > > > > Also note: Due to the frequently-spawned `git hash-object` processes, > > this command is quite a bit slow on Windows. Should it turn out to be a > > big problem, the lack of a batch mode of the `hash-object` command could > > potentially be worked around via using `git fast-import` with a crafted > > `stdin`. > > The two paragraphs above are not in sync with the patch. Whoopsie! > > + archiver_fd = xopen(zip_path.buf, O_CREAT | O_WRONLY | O_TRUNC, 0666); > > + if (archiver_fd < 0 || dup2(archiver_fd, 1) < 0) { > > + res = error_errno(_("could not redirect output")); > > + goto diagnose_cleanup; > > + } > > + > > + init_zip_archiver(); > > + strvec_pushl(&archiver_args, "scalar-diagnose", "--format=zip", NULL); > > + > > + strbuf_reset(&buf); > > + strbuf_addstr(&buf, > > + "--add-file-with-content=diagnostics.log:" > > + "Collecting diagnostic info\n\n"); > > + get_version_info(&buf, 1); > > + > > + strbuf_addf(&buf, "Enlistment root: %s\n", the_repository->worktree); > > + off = strchr(buf.buf, ':') + 1 - buf.buf; > > + write_or_die(stdout_fd, buf.buf + off, buf.len - off); > > + strvec_push(&archiver_args, buf.buf); > > Fun trick to reuse the buffer for both the ZIP entry and stdout. :) I'd > have omitted the option from buf and added it like this, for simplicity: > > strvec_pushf(&archiver_args, > "--add-file-with-content=diagnostics.log:%s", buf.buf); > > Just a thought. Oh, that's even better. I did not like that `off` pattern at all but forgot to think of `pushf()`. Thanks! > > + > > + if ((res = add_directory_to_archiver(&archiver_args, ".git", 0)) || > > + (res = add_directory_to_archiver(&archiver_args, ".git/hooks", 0)) || > > + (res = add_directory_to_archiver(&archiver_args, ".git/info", 0)) || > > + (res = add_directory_to_archiver(&archiver_args, ".git/logs", 1)) || > > + (res = add_directory_to_archiver(&archiver_args, ".git/objects/info", 0))) > > + goto diagnose_cleanup; > > + > > + strvec_pushl(&archiver_args, "--prefix=", > > + oid_to_hex(the_hash_algo->empty_tree), "--", NULL); > > + > > + /* `write_archive()` modifies the `argv` passed to it. Let it. */ > > + argv_copy = xmemdupz(archiver_args.v, > > + sizeof(char *) * archiver_args.nr); > > Leaking the whole thing would be fine as well for this command, but > cleaning up is tidier, of course. > > > + res = write_archive(archiver_args.nr, (const char **)argv_copy, NULL, > > + the_repository, NULL, 0); > > Ah -- no shell means no command line length limits. :) Yes!!! It also makes the command a ridiculous amount faster on Windows. > > + if (res) { > > + error(_("failed to write archive")); > > + goto diagnose_cleanup; > > + } > > + > > + if (!res) > > + printf("\n" > > + "Diagnostics complete.\n" > > + "All of the gathered info is captured in '%s'\n", > > + zip_path.buf); > > Is this message appended to the ZIP file or does it go to stdout? It goes to `stdout`, this is for the user who runs `scalar diagnose`. Hmm. Now that you pointed it out, I think I want it to go to `stderr` instead. > In any case: mixing write(2) and stdio(3) is not a good idea. Using > fwrite(3) instead of write_or_die above and doing the stdout dup(2) > dance only tightly around the write_archive call would help, I think. Sure, but let's print this message to `stderr` instead, that'll be much cleaner, right? Alternatively, I think I'd rather move the `printf()` below... > > > + > > +diagnose_cleanup: > > + if (archiver_fd >= 0) { > > + close(1); > > + dup2(stdout_fd, 1); > > + } ... this re-redirection. What do you think? `stdout` or `stderr`? Thank you for your review! Dscho
diff --git a/contrib/scalar/scalar.c b/contrib/scalar/scalar.c index 00dcd4b50ef..30ce0799c7a 100644 --- a/contrib/scalar/scalar.c +++ b/contrib/scalar/scalar.c @@ -11,6 +11,7 @@ #include "dir.h" #include "packfile.h" #include "help.h" +#include "archive.h" /* * Remove the deepest subdirectory in the provided path string. Path must not @@ -261,6 +262,44 @@ static int unregister_dir(void) return res; } +static int add_directory_to_archiver(struct strvec *archiver_args, + const char *path, int recurse) +{ + int at_root = !*path; + DIR *dir = opendir(at_root ? "." : path); + struct dirent *e; + struct strbuf buf = STRBUF_INIT; + size_t len; + int res = 0; + + if (!dir) + return error(_("could not open directory '%s'"), path); + + if (!at_root) + strbuf_addf(&buf, "%s/", path); + len = buf.len; + strvec_pushf(archiver_args, "--prefix=%s", buf.buf); + + while (!res && (e = readdir(dir))) { + if (!strcmp(".", e->d_name) || !strcmp("..", e->d_name)) + continue; + + strbuf_setlen(&buf, len); + strbuf_addstr(&buf, e->d_name); + + if (e->d_type == DT_REG) + strvec_pushf(archiver_args, "--add-file=%s", buf.buf); + else if (e->d_type != DT_DIR) + res = -1; + else if (recurse) + add_directory_to_archiver(archiver_args, buf.buf, recurse); + } + + closedir(dir); + strbuf_release(&buf); + return res; +} + /* printf-style interface, expects `<key>=<value>` argument */ static int set_config(const char *fmt, ...) { @@ -501,6 +540,109 @@ cleanup: return res; } +static int cmd_diagnose(int argc, const char **argv) +{ + struct option options[] = { + OPT_END(), + }; + const char * const usage[] = { + N_("scalar diagnose [<enlistment>]"), + NULL + }; + struct strbuf zip_path = STRBUF_INIT; + struct strvec archiver_args = STRVEC_INIT; + char **argv_copy = NULL; + int stdout_fd = -1, archiver_fd = -1; + time_t now = time(NULL); + struct tm tm; + struct strbuf path = STRBUF_INIT, buf = STRBUF_INIT; + size_t off; + int res = 0; + + argc = parse_options(argc, argv, NULL, options, + usage, 0); + + setup_enlistment_directory(argc, argv, usage, options, &zip_path); + + strbuf_addstr(&zip_path, "/.scalarDiagnostics/scalar_"); + strbuf_addftime(&zip_path, + "%Y%m%d_%H%M%S", localtime_r(&now, &tm), 0, 0); + strbuf_addstr(&zip_path, ".zip"); + switch (safe_create_leading_directories(zip_path.buf)) { + case SCLD_EXISTS: + case SCLD_OK: + break; + default: + error_errno(_("could not create directory for '%s'"), + zip_path.buf); + goto diagnose_cleanup; + } + stdout_fd = dup(1); + if (stdout_fd < 0) { + res = error_errno(_("could not duplicate stdout")); + goto diagnose_cleanup; + } + + archiver_fd = xopen(zip_path.buf, O_CREAT | O_WRONLY | O_TRUNC, 0666); + if (archiver_fd < 0 || dup2(archiver_fd, 1) < 0) { + res = error_errno(_("could not redirect output")); + goto diagnose_cleanup; + } + + init_zip_archiver(); + strvec_pushl(&archiver_args, "scalar-diagnose", "--format=zip", NULL); + + strbuf_reset(&buf); + strbuf_addstr(&buf, + "--add-file-with-content=diagnostics.log:" + "Collecting diagnostic info\n\n"); + get_version_info(&buf, 1); + + strbuf_addf(&buf, "Enlistment root: %s\n", the_repository->worktree); + off = strchr(buf.buf, ':') + 1 - buf.buf; + write_or_die(stdout_fd, buf.buf + off, buf.len - off); + strvec_push(&archiver_args, buf.buf); + + if ((res = add_directory_to_archiver(&archiver_args, ".git", 0)) || + (res = add_directory_to_archiver(&archiver_args, ".git/hooks", 0)) || + (res = add_directory_to_archiver(&archiver_args, ".git/info", 0)) || + (res = add_directory_to_archiver(&archiver_args, ".git/logs", 1)) || + (res = add_directory_to_archiver(&archiver_args, ".git/objects/info", 0))) + goto diagnose_cleanup; + + strvec_pushl(&archiver_args, "--prefix=", + oid_to_hex(the_hash_algo->empty_tree), "--", NULL); + + /* `write_archive()` modifies the `argv` passed to it. Let it. */ + argv_copy = xmemdupz(archiver_args.v, + sizeof(char *) * archiver_args.nr); + res = write_archive(archiver_args.nr, (const char **)argv_copy, NULL, + the_repository, NULL, 0); + if (res) { + error(_("failed to write archive")); + goto diagnose_cleanup; + } + + if (!res) + printf("\n" + "Diagnostics complete.\n" + "All of the gathered info is captured in '%s'\n", + zip_path.buf); + +diagnose_cleanup: + if (archiver_fd >= 0) { + close(1); + dup2(stdout_fd, 1); + } + free(argv_copy); + strvec_clear(&archiver_args); + strbuf_release(&zip_path); + strbuf_release(&path); + strbuf_release(&buf); + + return res; +} + static int cmd_list(int argc, const char **argv) { if (argc != 1) @@ -802,6 +944,7 @@ static struct { { "reconfigure", cmd_reconfigure }, { "delete", cmd_delete }, { "version", cmd_version }, + { "diagnose", cmd_diagnose }, { NULL, NULL}, }; diff --git a/contrib/scalar/scalar.txt b/contrib/scalar/scalar.txt index f416d637289..22583fe046e 100644 --- a/contrib/scalar/scalar.txt +++ b/contrib/scalar/scalar.txt @@ -14,6 +14,7 @@ scalar register [<enlistment>] scalar unregister [<enlistment>] scalar run ( all | config | commit-graph | fetch | loose-objects | pack-files ) [<enlistment>] scalar reconfigure [ --all | <enlistment> ] +scalar diagnose [<enlistment>] scalar delete <enlistment> DESCRIPTION @@ -129,6 +130,17 @@ reconfigure the enlistment. With the `--all` option, all enlistments currently registered with Scalar will be reconfigured. Use this option after each Scalar upgrade. +Diagnose +~~~~~~~~ + +diagnose [<enlistment>]:: + When reporting issues with Scalar, it is often helpful to provide the + information gathered by this command, including logs and certain + statistics describing the data shape of the current enlistment. ++ +The output of this command is a `.zip` file that is written into +a directory adjacent to the worktree in the `src` directory. + Delete ~~~~~~ diff --git a/contrib/scalar/t/t9099-scalar.sh b/contrib/scalar/t/t9099-scalar.sh index 9d83fdf25e8..bbd07a44426 100755 --- a/contrib/scalar/t/t9099-scalar.sh +++ b/contrib/scalar/t/t9099-scalar.sh @@ -90,4 +90,18 @@ test_expect_success '`scalar [...] <dir>` errors out when dir is missing' ' grep "cloned. does not exist" err ' +SQ="'" +test_expect_success UNZIP 'scalar diagnose' ' + scalar clone "file://$(pwd)" cloned --single-branch && + scalar diagnose cloned >out && + sed -n "s/.*$SQ\\(.*\\.zip\\)$SQ.*/\\1/p" <out >zip_path && + zip_path=$(cat zip_path) && + test -n "$zip_path" && + unzip -v "$zip_path" && + folder=${zip_path%.zip} && + test_path_is_missing "$folder" && + unzip -p "$zip_path" diagnostics.log >out && + test_file_not_empty out +' + test_done