Message ID | a1686ab52f1bec4bddeaab973c9b77e55e8b539b.1700680717.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | Accepted |
Commit | b7d49ac1ecd05669edada26b990eee677a7d3c25 |
Headers | show |
Series | Redact unsafe URLs in the Trace2 output | expand |
On Wed, Nov 22, 2023 at 11:19 AM Johannes Schindelin via GitGitGadget <gitgitgadget@gmail.com> wrote: > > From: Johannes Schindelin <johannes.schindelin@gmx.de> > > It is an unsafe practice to call something like > > git clone https://user:password@example.com/ > > This not only risks leaking the password "over the shoulder" or into the > readline history of the current Unix shell, it also gets logged via > Trace2 if enabled. Indeed. Clone urls _also_ seem to be slurped up by other tools, such as IDEs, and possibly sent off to various third-party cloud services when users have various AI-assist plugins installed in their IDEs, resulting in some infosec incidents and fire drills. (Not a theoretical scenario, and not fun.) > Let's at least avoid logging such secrets via Trace2, much like we avoid > logging secrets in `http.c`. Much like the code in `http.c` is guarded > via `GIT_TRACE_REDACT` (defaulting to `true`), we guard the new code via > `GIT_TRACE2_REDACT` (also defaulting to `true`). Training users is hard. I appreciate the changes here to make trace2 not be a leak vector, but is it time to perhaps consider bigger safety measures: At the clone/fetch level, automatically warn loudly whenever such a URL is provided, accompanied with a note that in the future it will be turned into a hard error? Either way, I agree with your "at least" comment here and the changes you are making. > The new tests added in this commit uncover leaks in `builtin/clone.c` > and `remote.c`. Therefore we need to turn off > `TEST_PASSES_SANITIZE_LEAK`. The reasons: > > - We observed that `the_repository->remote_status` is not released > properly. > > - We are using `url...insteadOf` and that runs into a code path where an > allocated URL is replaced with another URL, and the original URL is > never released. > > - `remote_states` contains plenty of `struct remote`s whose refspecs > seem to be usually allocated by never released. > > More investigation is needed here to identify the exact cause and > proper fixes for these leaks/bugs. Thanks for carefully documenting and explaining. > Co-authored-by: Jeff Hostetler <jeffhostetler@github.com> > Signed-off-by: Jeff Hostetler <jeffhostetler@github.com> > Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> > --- > t/t0210-trace2-normal.sh | 20 ++++++- > trace2.c | 120 ++++++++++++++++++++++++++++++++++++++- > 2 files changed, 136 insertions(+), 4 deletions(-) > > diff --git a/t/t0210-trace2-normal.sh b/t/t0210-trace2-normal.sh > index 80e76a4695e..c312657a12c 100755 > --- a/t/t0210-trace2-normal.sh > +++ b/t/t0210-trace2-normal.sh > @@ -2,7 +2,7 @@ > > test_description='test trace2 facility (normal target)' > > -TEST_PASSES_SANITIZE_LEAK=true > +TEST_PASSES_SANITIZE_LEAK=false > . ./test-lib.sh > > # Turn off any inherited trace2 settings for this test. > @@ -283,4 +283,22 @@ test_expect_success 'using global config with include' ' > test_cmp expect actual > ' > > +test_expect_success 'unsafe URLs are redacted by default' ' > + test_when_finished \ > + "rm -r trace.normal unredacted.normal clone clone2" && > + > + test_config_global \ > + "url.$(pwd).insteadOf" https://user:pwd@example.com/ && > + test_config_global trace2.configParams "core.*,remote.*.url" && > + > + GIT_TRACE2="$(pwd)/trace.normal" \ > + git clone https://user:pwd@example.com/ clone && > + ! grep user:pwd trace.normal && > + > + GIT_TRACE2_REDACT=0 GIT_TRACE2="$(pwd)/unredacted.normal" \ > + git clone https://user:pwd@example.com/ clone2 && > + grep "start .* clone https://user:pwd@example.com" unredacted.normal && > + grep "remote.origin.url=https://user:pwd@example.com" unredacted.normal > +' > + > test_done > diff --git a/trace2.c b/trace2.c > index 6dc74dff4c7..87d9a3a0361 100644 > --- a/trace2.c > +++ b/trace2.c > @@ -20,6 +20,7 @@ > #include "trace2/tr2_tmr.h" > > static int trace2_enabled; > +static int trace2_redact = 1; > > static int tr2_next_child_id; /* modify under lock */ > static int tr2_next_exec_id; /* modify under lock */ > @@ -227,6 +228,8 @@ void trace2_initialize_fl(const char *file, int line) > if (!tr2_tgt_want_builtins()) > return; > trace2_enabled = 1; > + if (!git_env_bool("GIT_TRACE2_REDACT", 1)) > + trace2_redact = 0; > > tr2_sid_get(); > > @@ -247,12 +250,93 @@ int trace2_is_enabled(void) > return trace2_enabled; > } > > +/* > + * Redacts an argument, i.e. ensures that no password in > + * https://user:password@host/-style URLs is logged. > + * > + * Returns the original if nothing needed to be redacted. > + * Returns a pointer that needs to be `free()`d otherwise. > + */ > +static const char *redact_arg(const char *arg) > +{ > + const char *p, *colon; > + size_t at; > + > + if (!trace2_redact || > + (!skip_prefix(arg, "https://", &p) && > + !skip_prefix(arg, "http://", &p))) > + return arg; > + > + at = strcspn(p, "@/"); > + if (p[at] != '@') > + return arg; > + > + colon = memchr(p, ':', at); > + if (!colon) > + return arg; > + > + return xstrfmt("%.*s:<REDACTED>%s", (int)(colon - arg), arg, p + at); > +} > + > +/* > + * Redacts arguments in an argument list. > + * > + * Returns the original if nothing needed to be redacted. > + * Otherwise, returns a new array that needs to be released > + * via `free_redacted_argv()`. > + */ > +static const char **redact_argv(const char **argv) > +{ > + int i, j; > + const char *redacted = NULL; > + const char **ret; > + > + if (!trace2_redact) > + return argv; > + > + for (i = 0; argv[i]; i++) > + if ((redacted = redact_arg(argv[i])) != argv[i]) > + break; > + > + if (!argv[i]) > + return argv; > + > + for (j = 0; argv[j]; j++) > + ; /* keep counting */ > + > + ALLOC_ARRAY(ret, j + 1); > + ret[j] = NULL; > + > + for (j = 0; j < i; j++) > + ret[j] = argv[j]; > + ret[i] = redacted; > + for (++i; argv[i]; i++) { > + redacted = redact_arg(argv[i]); > + ret[i] = redacted ? redacted : argv[i]; > + } > + > + return ret; > +} > + > +static void free_redacted_argv(const char **redacted, const char **argv) > +{ > + int i; > + > + if (redacted != argv) { > + for (i = 0; argv[i]; i++) > + if (redacted[i] != argv[i]) > + free((void *)redacted[i]); > + free((void *)redacted); > + } > +} > + > void trace2_cmd_start_fl(const char *file, int line, const char **argv) > { > struct tr2_tgt *tgt_j; > int j; > uint64_t us_now; > uint64_t us_elapsed_absolute; > + const char **redacted; > > if (!trace2_enabled) > return; > @@ -260,10 +344,14 @@ void trace2_cmd_start_fl(const char *file, int line, const char **argv) > us_now = getnanotime() / 1000; > us_elapsed_absolute = tr2tls_absolute_elapsed(us_now); > > + redacted = redact_argv(argv); > + > for_each_wanted_builtin (j, tgt_j) > if (tgt_j->pfn_start_fl) > tgt_j->pfn_start_fl(file, line, us_elapsed_absolute, > - argv); > + redacted); > + > + free_redacted_argv(redacted, argv); > } > > void trace2_cmd_exit_fl(const char *file, int line, int code) > @@ -409,6 +497,7 @@ void trace2_child_start_fl(const char *file, int line, > int j; > uint64_t us_now; > uint64_t us_elapsed_absolute; > + const char **orig_argv = cmd->args.v; > > if (!trace2_enabled) > return; > @@ -419,10 +508,24 @@ void trace2_child_start_fl(const char *file, int line, > cmd->trace2_child_id = tr2tls_locked_increment(&tr2_next_child_id); > cmd->trace2_child_us_start = us_now; > > + /* > + * The `pfn_child_start_fl` API takes a `struct child_process` > + * rather than a simple `argv` for the child because some > + * targets make use of the additional context bits/values. So > + * temporarily replace the original argv (inside the `strvec`) > + * with a possibly redacted version. > + */ > + cmd->args.v = redact_argv(orig_argv); > + > for_each_wanted_builtin (j, tgt_j) > if (tgt_j->pfn_child_start_fl) > tgt_j->pfn_child_start_fl(file, line, > us_elapsed_absolute, cmd); > + > + if (cmd->args.v != orig_argv) { > + free_redacted_argv(cmd->args.v, orig_argv); > + cmd->args.v = orig_argv; > + } > } > > void trace2_child_exit_fl(const char *file, int line, struct child_process *cmd, > @@ -493,6 +596,7 @@ int trace2_exec_fl(const char *file, int line, const char *exe, > int exec_id; > uint64_t us_now; > uint64_t us_elapsed_absolute; > + const char **redacted; > > if (!trace2_enabled) > return -1; > @@ -502,10 +606,14 @@ int trace2_exec_fl(const char *file, int line, const char *exe, > > exec_id = tr2tls_locked_increment(&tr2_next_exec_id); > > + redacted = redact_argv(argv); > + > for_each_wanted_builtin (j, tgt_j) > if (tgt_j->pfn_exec_fl) > tgt_j->pfn_exec_fl(file, line, us_elapsed_absolute, > - exec_id, exe, argv); > + exec_id, exe, redacted); > + > + free_redacted_argv(redacted, argv); > > return exec_id; > } > @@ -637,13 +745,19 @@ void trace2_def_param_fl(const char *file, int line, const char *param, > { > struct tr2_tgt *tgt_j; > int j; > + const char *redacted; > > if (!trace2_enabled) > return; > > + redacted = redact_arg(value); > + > for_each_wanted_builtin (j, tgt_j) > if (tgt_j->pfn_param_fl) > - tgt_j->pfn_param_fl(file, line, param, value, kvi); > + tgt_j->pfn_param_fl(file, line, param, redacted, kvi); > + > + if (redacted != value) > + free((void *)redacted); > } > > void trace2_def_repo_fl(const char *file, int line, struct repository *repo) > -- > gitgitgadget
On Thu, Nov 23, 2023 at 10:59:20AM -0800, Elijah Newren wrote: > > Let's at least avoid logging such secrets via Trace2, much like we avoid > > logging secrets in `http.c`. Much like the code in `http.c` is guarded > > via `GIT_TRACE_REDACT` (defaulting to `true`), we guard the new code via > > `GIT_TRACE2_REDACT` (also defaulting to `true`). > > Training users is hard. I appreciate the changes here to make trace2 > not be a leak vector, but is it time to perhaps consider bigger safety > measures: At the clone/fetch level, automatically warn loudly whenever > such a URL is provided, accompanied with a note that in the future it > will be turned into a hard error? Yes, the password in such a case ends up in the plaintext .git/config file, which is not great. There's some discussion and patches here: https://lore.kernel.org/git/nycvar.QRO.7.76.6.1905172121130.46@tvgsbejvaqbjf.bet/ I meant to follow up on them, but never did. -Peff
diff --git a/t/t0210-trace2-normal.sh b/t/t0210-trace2-normal.sh index 80e76a4695e..c312657a12c 100755 --- a/t/t0210-trace2-normal.sh +++ b/t/t0210-trace2-normal.sh @@ -2,7 +2,7 @@ test_description='test trace2 facility (normal target)' -TEST_PASSES_SANITIZE_LEAK=true +TEST_PASSES_SANITIZE_LEAK=false . ./test-lib.sh # Turn off any inherited trace2 settings for this test. @@ -283,4 +283,22 @@ test_expect_success 'using global config with include' ' test_cmp expect actual ' +test_expect_success 'unsafe URLs are redacted by default' ' + test_when_finished \ + "rm -r trace.normal unredacted.normal clone clone2" && + + test_config_global \ + "url.$(pwd).insteadOf" https://user:pwd@example.com/ && + test_config_global trace2.configParams "core.*,remote.*.url" && + + GIT_TRACE2="$(pwd)/trace.normal" \ + git clone https://user:pwd@example.com/ clone && + ! grep user:pwd trace.normal && + + GIT_TRACE2_REDACT=0 GIT_TRACE2="$(pwd)/unredacted.normal" \ + git clone https://user:pwd@example.com/ clone2 && + grep "start .* clone https://user:pwd@example.com" unredacted.normal && + grep "remote.origin.url=https://user:pwd@example.com" unredacted.normal +' + test_done diff --git a/trace2.c b/trace2.c index 6dc74dff4c7..87d9a3a0361 100644 --- a/trace2.c +++ b/trace2.c @@ -20,6 +20,7 @@ #include "trace2/tr2_tmr.h" static int trace2_enabled; +static int trace2_redact = 1; static int tr2_next_child_id; /* modify under lock */ static int tr2_next_exec_id; /* modify under lock */ @@ -227,6 +228,8 @@ void trace2_initialize_fl(const char *file, int line) if (!tr2_tgt_want_builtins()) return; trace2_enabled = 1; + if (!git_env_bool("GIT_TRACE2_REDACT", 1)) + trace2_redact = 0; tr2_sid_get(); @@ -247,12 +250,93 @@ int trace2_is_enabled(void) return trace2_enabled; } +/* + * Redacts an argument, i.e. ensures that no password in + * https://user:password@host/-style URLs is logged. + * + * Returns the original if nothing needed to be redacted. + * Returns a pointer that needs to be `free()`d otherwise. + */ +static const char *redact_arg(const char *arg) +{ + const char *p, *colon; + size_t at; + + if (!trace2_redact || + (!skip_prefix(arg, "https://", &p) && + !skip_prefix(arg, "http://", &p))) + return arg; + + at = strcspn(p, "@/"); + if (p[at] != '@') + return arg; + + colon = memchr(p, ':', at); + if (!colon) + return arg; + + return xstrfmt("%.*s:<REDACTED>%s", (int)(colon - arg), arg, p + at); +} + +/* + * Redacts arguments in an argument list. + * + * Returns the original if nothing needed to be redacted. + * Otherwise, returns a new array that needs to be released + * via `free_redacted_argv()`. + */ +static const char **redact_argv(const char **argv) +{ + int i, j; + const char *redacted = NULL; + const char **ret; + + if (!trace2_redact) + return argv; + + for (i = 0; argv[i]; i++) + if ((redacted = redact_arg(argv[i])) != argv[i]) + break; + + if (!argv[i]) + return argv; + + for (j = 0; argv[j]; j++) + ; /* keep counting */ + + ALLOC_ARRAY(ret, j + 1); + ret[j] = NULL; + + for (j = 0; j < i; j++) + ret[j] = argv[j]; + ret[i] = redacted; + for (++i; argv[i]; i++) { + redacted = redact_arg(argv[i]); + ret[i] = redacted ? redacted : argv[i]; + } + + return ret; +} + +static void free_redacted_argv(const char **redacted, const char **argv) +{ + int i; + + if (redacted != argv) { + for (i = 0; argv[i]; i++) + if (redacted[i] != argv[i]) + free((void *)redacted[i]); + free((void *)redacted); + } +} + void trace2_cmd_start_fl(const char *file, int line, const char **argv) { struct tr2_tgt *tgt_j; int j; uint64_t us_now; uint64_t us_elapsed_absolute; + const char **redacted; if (!trace2_enabled) return; @@ -260,10 +344,14 @@ void trace2_cmd_start_fl(const char *file, int line, const char **argv) us_now = getnanotime() / 1000; us_elapsed_absolute = tr2tls_absolute_elapsed(us_now); + redacted = redact_argv(argv); + for_each_wanted_builtin (j, tgt_j) if (tgt_j->pfn_start_fl) tgt_j->pfn_start_fl(file, line, us_elapsed_absolute, - argv); + redacted); + + free_redacted_argv(redacted, argv); } void trace2_cmd_exit_fl(const char *file, int line, int code) @@ -409,6 +497,7 @@ void trace2_child_start_fl(const char *file, int line, int j; uint64_t us_now; uint64_t us_elapsed_absolute; + const char **orig_argv = cmd->args.v; if (!trace2_enabled) return; @@ -419,10 +508,24 @@ void trace2_child_start_fl(const char *file, int line, cmd->trace2_child_id = tr2tls_locked_increment(&tr2_next_child_id); cmd->trace2_child_us_start = us_now; + /* + * The `pfn_child_start_fl` API takes a `struct child_process` + * rather than a simple `argv` for the child because some + * targets make use of the additional context bits/values. So + * temporarily replace the original argv (inside the `strvec`) + * with a possibly redacted version. + */ + cmd->args.v = redact_argv(orig_argv); + for_each_wanted_builtin (j, tgt_j) if (tgt_j->pfn_child_start_fl) tgt_j->pfn_child_start_fl(file, line, us_elapsed_absolute, cmd); + + if (cmd->args.v != orig_argv) { + free_redacted_argv(cmd->args.v, orig_argv); + cmd->args.v = orig_argv; + } } void trace2_child_exit_fl(const char *file, int line, struct child_process *cmd, @@ -493,6 +596,7 @@ int trace2_exec_fl(const char *file, int line, const char *exe, int exec_id; uint64_t us_now; uint64_t us_elapsed_absolute; + const char **redacted; if (!trace2_enabled) return -1; @@ -502,10 +606,14 @@ int trace2_exec_fl(const char *file, int line, const char *exe, exec_id = tr2tls_locked_increment(&tr2_next_exec_id); + redacted = redact_argv(argv); + for_each_wanted_builtin (j, tgt_j) if (tgt_j->pfn_exec_fl) tgt_j->pfn_exec_fl(file, line, us_elapsed_absolute, - exec_id, exe, argv); + exec_id, exe, redacted); + + free_redacted_argv(redacted, argv); return exec_id; } @@ -637,13 +745,19 @@ void trace2_def_param_fl(const char *file, int line, const char *param, { struct tr2_tgt *tgt_j; int j; + const char *redacted; if (!trace2_enabled) return; + redacted = redact_arg(value); + for_each_wanted_builtin (j, tgt_j) if (tgt_j->pfn_param_fl) - tgt_j->pfn_param_fl(file, line, param, value, kvi); + tgt_j->pfn_param_fl(file, line, param, redacted, kvi); + + if (redacted != value) + free((void *)redacted); } void trace2_def_repo_fl(const char *file, int line, struct repository *repo)