From patchwork Fri Apr 10 19:43:41 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jeff King X-Patchwork-Id: 11483579 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A40DF15AB for ; Fri, 10 Apr 2020 19:43:44 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 8E45D20769 for ; Fri, 10 Apr 2020 19:43:44 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726646AbgDJTnn (ORCPT ); Fri, 10 Apr 2020 15:43:43 -0400 Received: from cloud.peff.net ([104.130.231.41]:40060 "HELO cloud.peff.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with SMTP id S1726177AbgDJTnm (ORCPT ); Fri, 10 Apr 2020 15:43:42 -0400 Received: (qmail 7614 invoked by uid 109); 10 Apr 2020 19:43:43 -0000 Received: from Unknown (HELO peff.net) (10.0.1.2) by cloud.peff.net (qpsmtpd/0.94) with SMTP; Fri, 10 Apr 2020 19:43:43 +0000 Authentication-Results: cloud.peff.net; auth=none Received: (qmail 2186 invoked by uid 111); 10 Apr 2020 19:54:15 -0000 Received: from coredump.intra.peff.net (HELO sigill.intra.peff.net) (10.0.0.2) by peff.net (qpsmtpd/0.94) with (TLS_AES_256_GCM_SHA384 encrypted) ESMTPS; Fri, 10 Apr 2020 15:54:15 -0400 Authentication-Results: peff.net; auth=none Date: Fri, 10 Apr 2020 15:43:41 -0400 From: Jeff King To: git@vger.kernel.org Subject: [PATCH 1/6] remote: drop auto-strlen behavior of make_branch() and make_rewrite() Message-ID: <20200410194341.GA1363756@coredump.intra.peff.net> References: <20200410194211.GA1363484@coredump.intra.peff.net> MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: <20200410194211.GA1363484@coredump.intra.peff.net> Sender: git-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: git@vger.kernel.org The make_branch() and make_rewrite() functions can take a NUL-terminated string or a ptr/len pair. They use a sentinel value of "0" for the len to tell the difference between the two. However, when parsing config like: [branch ""] merge = whatever whose key flattens to: branch..merge we might actually have a zero-length branch name. This is obviously nonsense, but the current code would consider it as a NUL-terminated string and use the branch name ".merge". We could use a better sentinel value here (like "-1"), but that gets in the way of moving to size_t, which is a more appropriate type for a ptr/len combo. Let's instead just drop this feature and have the callers (of which there are only two total) use strlen() themselves. This simplifies the code, and lets us move to using size_t. Signed-off-by: Jeff King --- remote.c | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/remote.c b/remote.c index c43196ec06..35bc3f9f2d 100644 --- a/remote.c +++ b/remote.c @@ -174,54 +174,43 @@ static void add_merge(struct branch *branch, const char *name) branch->merge_name[branch->merge_nr++] = name; } -static struct branch *make_branch(const char *name, int len) +static struct branch *make_branch(const char *name, size_t len) { struct branch *ret; int i; for (i = 0; i < branches_nr; i++) { - if (len ? (!strncmp(name, branches[i]->name, len) && - !branches[i]->name[len]) : - !strcmp(name, branches[i]->name)) + if (!strncmp(name, branches[i]->name, len) && + !branches[i]->name[len]) return branches[i]; } ALLOC_GROW(branches, branches_nr + 1, branches_alloc); ret = xcalloc(1, sizeof(struct branch)); branches[branches_nr++] = ret; - if (len) - ret->name = xstrndup(name, len); - else - ret->name = xstrdup(name); + ret->name = xstrndup(name, len); ret->refname = xstrfmt("refs/heads/%s", ret->name); return ret; } -static struct rewrite *make_rewrite(struct rewrites *r, const char *base, int len) +static struct rewrite *make_rewrite(struct rewrites *r, + const char *base, size_t len) { struct rewrite *ret; int i; for (i = 0; i < r->rewrite_nr; i++) { - if (len - ? (len == r->rewrite[i]->baselen && - !strncmp(base, r->rewrite[i]->base, len)) - : !strcmp(base, r->rewrite[i]->base)) + if (len == r->rewrite[i]->baselen && + !strncmp(base, r->rewrite[i]->base, len)) return r->rewrite[i]; } ALLOC_GROW(r->rewrite, r->rewrite_nr + 1, r->rewrite_alloc); ret = xcalloc(1, sizeof(struct rewrite)); r->rewrite[r->rewrite_nr++] = ret; - if (len) { - ret->base = xstrndup(base, len); - ret->baselen = len; - } - else { - ret->base = xstrdup(base); - ret->baselen = strlen(base); - } + ret->base = xstrndup(base, len); + ret->baselen = len; return ret; } @@ -470,7 +459,7 @@ static void read_config(void) const char *head_ref = resolve_ref_unsafe("HEAD", 0, NULL, &flag); if (head_ref && (flag & REF_ISSYMREF) && skip_prefix(head_ref, "refs/heads/", &head_ref)) { - current_branch = make_branch(head_ref, 0); + current_branch = make_branch(head_ref, strlen(head_ref)); } } git_config(handle_config, NULL); @@ -1584,7 +1573,7 @@ struct branch *branch_get(const char *name) if (!name || !*name || !strcmp(name, "HEAD")) ret = current_branch; else - ret = make_branch(name, 0); + ret = make_branch(name, strlen(name)); set_merge(ret); return ret; } From patchwork Fri Apr 10 19:44:28 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jeff King X-Patchwork-Id: 11483581 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 9FAA314DD for ; Fri, 10 Apr 2020 19:44:30 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 8089320801 for ; Fri, 10 Apr 2020 19:44:30 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726648AbgDJTo3 (ORCPT ); Fri, 10 Apr 2020 15:44:29 -0400 Received: from cloud.peff.net ([104.130.231.41]:40066 "HELO cloud.peff.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with SMTP id S1726177AbgDJTo2 (ORCPT ); Fri, 10 Apr 2020 15:44:28 -0400 Received: (qmail 7633 invoked by uid 109); 10 Apr 2020 19:44:29 -0000 Received: from Unknown (HELO peff.net) (10.0.1.2) by cloud.peff.net (qpsmtpd/0.94) with SMTP; Fri, 10 Apr 2020 19:44:29 +0000 Authentication-Results: cloud.peff.net; auth=none Received: (qmail 2211 invoked by uid 111); 10 Apr 2020 19:55:02 -0000 Received: from coredump.intra.peff.net (HELO sigill.intra.peff.net) (10.0.0.2) by peff.net (qpsmtpd/0.94) with (TLS_AES_256_GCM_SHA384 encrypted) ESMTPS; Fri, 10 Apr 2020 15:55:02 -0400 Authentication-Results: peff.net; auth=none Date: Fri, 10 Apr 2020 15:44:28 -0400 From: Jeff King To: git@vger.kernel.org Subject: [PATCH 2/6] parse_config_key(): return subsection len as size_t Message-ID: <20200410194428.GB1363756@coredump.intra.peff.net> References: <20200410194211.GA1363484@coredump.intra.peff.net> MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: <20200410194211.GA1363484@coredump.intra.peff.net> Sender: git-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: git@vger.kernel.org We return the length to a subset of a string using an "int *" out-parameter. This is fine most of the time, as we'd expect config keys to be relatively short, but it could behave oddly if we had a gigantic config key. A more appropriate type is size_t. Let's switch over, which lets our callers use size_t as appropriate (they are bound by our type because they must pass the out-parameter as a pointer). This is mostly just a cleanup to make it clear this code handles long strings correctly. In practice, our config parser already chokes on long key names (because of a similar int/size_t mixup!). When doing an int/size_t conversion, we have to be careful that nobody was trying to assign a negative value to the variable. I manually confirmed that for each case here. They tend to just feed the result to xmemdupz() or similar; in a few cases I adjusted the parameter types for helper functions to make sure the size_t is preserved. Signed-off-by: Jeff King --- archive-tar.c | 4 ++-- builtin/help.c | 2 +- builtin/reflog.c | 2 +- config.c | 4 ++-- config.h | 2 +- convert.c | 2 +- fsck.c | 2 +- ll-merge.c | 2 +- promisor-remote.c | 2 +- remote.c | 2 +- submodule-config.c | 3 ++- userdiff.c | 4 ++-- 12 files changed, 16 insertions(+), 15 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index 5a77701a15..5ceec3684b 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -364,7 +364,7 @@ static struct archiver **tar_filters; static int nr_tar_filters; static int alloc_tar_filters; -static struct archiver *find_tar_filter(const char *name, int len) +static struct archiver *find_tar_filter(const char *name, size_t len) { int i; for (i = 0; i < nr_tar_filters; i++) { @@ -380,7 +380,7 @@ static int tar_filter_config(const char *var, const char *value, void *data) struct archiver *ar; const char *name; const char *type; - int namelen; + size_t namelen; if (parse_config_key(var, "tar", &name, &namelen, &type) < 0 || !name) return 0; diff --git a/builtin/help.c b/builtin/help.c index e5590d7787..c024110531 100644 --- a/builtin/help.c +++ b/builtin/help.c @@ -242,7 +242,7 @@ static int add_man_viewer_cmd(const char *name, static int add_man_viewer_info(const char *var, const char *value) { const char *name, *subkey; - int namelen; + size_t namelen; if (parse_config_key(var, "man", &name, &namelen, &subkey) < 0 || !name) return 0; diff --git a/builtin/reflog.c b/builtin/reflog.c index 81dfd563c0..52ecf6d43c 100644 --- a/builtin/reflog.c +++ b/builtin/reflog.c @@ -459,7 +459,7 @@ static struct reflog_expire_cfg *find_cfg_ent(const char *pattern, size_t len) static int reflog_expire_config(const char *var, const char *value, void *cb) { const char *pattern, *key; - int pattern_len; + size_t pattern_len; timestamp_t expire; int slot; struct reflog_expire_cfg *ent; diff --git a/config.c b/config.c index d17d2bd9dc..ff7998df46 100644 --- a/config.c +++ b/config.c @@ -309,7 +309,7 @@ int git_config_include(const char *var, const char *value, void *data) { struct config_include_data *inc = data; const char *cond, *key; - int cond_len; + size_t cond_len; int ret; /* @@ -3238,7 +3238,7 @@ int config_error_nonbool(const char *var) int parse_config_key(const char *var, const char *section, - const char **subsection, int *subsection_len, + const char **subsection, size_t *subsection_len, const char **key) { const char *dot; diff --git a/config.h b/config.h index 9b3773f778..d57df283b3 100644 --- a/config.h +++ b/config.h @@ -359,7 +359,7 @@ int git_config_include(const char *name, const char *value, void *data); */ int parse_config_key(const char *var, const char *section, - const char **subsection, int *subsection_len, + const char **subsection, size_t *subsection_len, const char **key); /** diff --git a/convert.c b/convert.c index 5aa87d45e3..572449825c 100644 --- a/convert.c +++ b/convert.c @@ -1018,7 +1018,7 @@ static int apply_filter(const char *path, const char *src, size_t len, static int read_convert_config(const char *var, const char *value, void *cb) { const char *key, *name; - int namelen; + size_t namelen; struct convert_driver *drv; /* diff --git a/fsck.c b/fsck.c index 640d813d84..41031e459b 100644 --- a/fsck.c +++ b/fsck.c @@ -920,7 +920,7 @@ static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata) { struct fsck_gitmodules_data *data = vdata; const char *subsection, *key; - int subsection_len; + size_t subsection_len; char *name; if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 || diff --git a/ll-merge.c b/ll-merge.c index d65a8971db..1ec0b959e0 100644 --- a/ll-merge.c +++ b/ll-merge.c @@ -247,7 +247,7 @@ static int read_merge_config(const char *var, const char *value, void *cb) { struct ll_merge_driver *fn; const char *key, *name; - int namelen; + size_t namelen; if (!strcmp(var, "merge.default")) return git_config_string(&default_ll_merge, var, value); diff --git a/promisor-remote.c b/promisor-remote.c index 9f338c945f..ff8721fd56 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -101,7 +101,7 @@ static void promisor_remote_move_to_tail(struct promisor_remote *r, static int promisor_remote_config(const char *var, const char *value, void *data) { const char *name; - int namelen; + size_t namelen; const char *subkey; if (!strcmp(var, "core.partialclonefilter")) diff --git a/remote.c b/remote.c index 35bc3f9f2d..534c6426f1 100644 --- a/remote.c +++ b/remote.c @@ -305,7 +305,7 @@ static void read_branches_file(struct remote *remote) static int handle_config(const char *key, const char *value, void *cb) { const char *name; - int namelen; + size_t namelen; const char *subkey; struct remote *remote; struct branch *branch; diff --git a/submodule-config.c b/submodule-config.c index 4d1c92d582..e175dfbc38 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -225,7 +225,8 @@ static int name_and_item_from_var(const char *var, struct strbuf *name, struct strbuf *item) { const char *subsection, *key; - int subsection_len, parse; + size_t subsection_len; + int parse; parse = parse_config_key(var, "submodule", &subsection, &subsection_len, &key); if (parse < 0 || !subsection) diff --git a/userdiff.c b/userdiff.c index efbe05e5a5..30ab42df8e 100644 --- a/userdiff.c +++ b/userdiff.c @@ -222,7 +222,7 @@ static struct userdiff_driver driver_false = { { NULL, 0 } }; -static struct userdiff_driver *userdiff_find_by_namelen(const char *k, int len) +static struct userdiff_driver *userdiff_find_by_namelen(const char *k, size_t len) { int i; for (i = 0; i < ndrivers; i++) { @@ -266,7 +266,7 @@ int userdiff_config(const char *k, const char *v) { struct userdiff_driver *drv; const char *name, *type; - int namelen; + size_t namelen; if (parse_config_key(k, "diff", &name, &namelen, &type) || !name) return 0; From patchwork Fri Apr 10 19:44:45 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jeff King X-Patchwork-Id: 11483583 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id B8BCC1392 for ; Fri, 10 Apr 2020 19:44:48 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id A34D720801 for ; Fri, 10 Apr 2020 19:44:48 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726690AbgDJTor (ORCPT ); Fri, 10 Apr 2020 15:44:47 -0400 Received: from cloud.peff.net ([104.130.231.41]:40072 "HELO cloud.peff.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with SMTP id S1726142AbgDJTop (ORCPT ); Fri, 10 Apr 2020 15:44:45 -0400 Received: (qmail 7640 invoked by uid 109); 10 Apr 2020 19:44:46 -0000 Received: from Unknown (HELO peff.net) (10.0.1.2) by cloud.peff.net (qpsmtpd/0.94) with SMTP; Fri, 10 Apr 2020 19:44:46 +0000 Authentication-Results: cloud.peff.net; auth=none Received: (qmail 2228 invoked by uid 111); 10 Apr 2020 19:55:19 -0000 Received: from coredump.intra.peff.net (HELO sigill.intra.peff.net) (10.0.0.2) by peff.net (qpsmtpd/0.94) with (TLS_AES_256_GCM_SHA384 encrypted) ESMTPS; Fri, 10 Apr 2020 15:55:19 -0400 Authentication-Results: peff.net; auth=none Date: Fri, 10 Apr 2020 15:44:45 -0400 From: Jeff King To: git@vger.kernel.org Subject: [PATCH 3/6] config: drop useless length variable in write_pair() Message-ID: <20200410194445.GC1363756@coredump.intra.peff.net> References: <20200410194211.GA1363484@coredump.intra.peff.net> MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: <20200410194211.GA1363484@coredump.intra.peff.net> Sender: git-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: git@vger.kernel.org We compute the length of a subset of a string, but then use that length only to feed a "%.*s" printf placeholder for the same string. We can just use "%s" to achieve the same thing. The variable became useless in cb891a5989 (Use a strbuf for building up section header and key/value pair strings., 2007-12-14), which swapped out a write() which _did_ use the length for a strbuf_addf() call. Signed-off-by: Jeff King --- config.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/config.c b/config.c index ff7998df46..7ea588a7e0 100644 --- a/config.c +++ b/config.c @@ -2545,7 +2545,6 @@ static ssize_t write_pair(int fd, const char *key, const char *value, { int i; ssize_t ret; - int length = strlen(key + store->baselen + 1); const char *quote = ""; struct strbuf sb = STRBUF_INIT; @@ -2564,8 +2563,7 @@ static ssize_t write_pair(int fd, const char *key, const char *value, if (i && value[i - 1] == ' ') quote = "\""; - strbuf_addf(&sb, "\t%.*s = %s", - length, key + store->baselen + 1, quote); + strbuf_addf(&sb, "\t%s = %s", key + store->baselen + 1, quote); for (i = 0; value[i]; i++) switch (value[i]) { From patchwork Fri Apr 10 19:46:07 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jeff King X-Patchwork-Id: 11483585 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A4421174A for ; Fri, 10 Apr 2020 19:46:09 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 8EA7220801 for ; Fri, 10 Apr 2020 19:46:09 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726669AbgDJTqH (ORCPT ); Fri, 10 Apr 2020 15:46:07 -0400 Received: from cloud.peff.net ([104.130.231.41]:40078 "HELO cloud.peff.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with SMTP id S1726142AbgDJTqH (ORCPT ); Fri, 10 Apr 2020 15:46:07 -0400 Received: (qmail 7650 invoked by uid 109); 10 Apr 2020 19:46:08 -0000 Received: from Unknown (HELO peff.net) (10.0.1.2) by cloud.peff.net (qpsmtpd/0.94) with SMTP; Fri, 10 Apr 2020 19:46:08 +0000 Authentication-Results: cloud.peff.net; auth=none Received: (qmail 2245 invoked by uid 111); 10 Apr 2020 19:56:41 -0000 Received: from coredump.intra.peff.net (HELO sigill.intra.peff.net) (10.0.0.2) by peff.net (qpsmtpd/0.94) with (TLS_AES_256_GCM_SHA384 encrypted) ESMTPS; Fri, 10 Apr 2020 15:56:41 -0400 Authentication-Results: peff.net; auth=none Date: Fri, 10 Apr 2020 15:46:07 -0400 From: Jeff King To: git@vger.kernel.org Subject: [PATCH 4/6] git_config_parse_key(): return baselen as size_t Message-ID: <20200410194607.GD1363756@coredump.intra.peff.net> References: <20200410194211.GA1363484@coredump.intra.peff.net> MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: <20200410194211.GA1363484@coredump.intra.peff.net> Sender: git-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: git@vger.kernel.org As with the recent change to parse_config_key(), the best type to return a string length is a size_t, as it won't cause integer truncation for a gigantic key. And as with that change, this is mostly a clarity / hygiene issue for now, as our config parser would choke on such a large key anyway. There are a few ripple effects within the config code, as callers switch to using size_t. I also adjusted a few related variables that iterate over strings. The most unexpected change is that a call to strbuf_addf() had to switch to strbuf_add(). We can't use a size_t with "%.*s", because printf precisions must have type "int" (we could cast, of course, but that would miss the point of using size_t in the first place). Signed-off-by: Jeff King --- config.c | 17 ++++++++++------- config.h | 2 +- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/config.c b/config.c index 7ea588a7e0..c48bb35dc0 100644 --- a/config.c +++ b/config.c @@ -358,12 +358,13 @@ static inline int iskeychar(int c) * * store_key - pointer to char* which will hold a copy of the key with * lowercase section and variable name - * baselen - pointer to int which will hold the length of the + * baselen - pointer to size_t which will hold the length of the * section + subsection part, can be NULL */ -static int git_config_parse_key_1(const char *key, char **store_key, int *baselen_, int quiet) +static int git_config_parse_key_1(const char *key, char **store_key, size_t *baselen_, int quiet) { - int i, dot, baselen; + size_t i, baselen; + int dot; const char *last_dot = strrchr(key, '.'); /* @@ -425,7 +426,7 @@ static int git_config_parse_key_1(const char *key, char **store_key, int *basele return -CONFIG_INVALID_KEY; } -int git_config_parse_key(const char *key, char **store_key, int *baselen) +int git_config_parse_key(const char *key, char **store_key, size_t *baselen) { return git_config_parse_key_1(key, store_key, baselen, 0); } @@ -2383,7 +2384,7 @@ void git_die_config(const char *key, const char *err, ...) */ struct config_store_data { - int baselen; + size_t baselen; char *key; int do_not_match; regex_t *value_regex; @@ -2509,7 +2510,7 @@ static struct strbuf store_create_section(const char *key, const struct config_store_data *store) { const char *dot; - int i; + size_t i; struct strbuf sb = STRBUF_INIT; dot = memchr(key, '.', store->baselen); @@ -2522,7 +2523,9 @@ static struct strbuf store_create_section(const char *key, } strbuf_addstr(&sb, "\"]\n"); } else { - strbuf_addf(&sb, "[%.*s]\n", store->baselen, key); + strbuf_addch(&sb, '['); + strbuf_add(&sb, key, store->baselen); + strbuf_addstr(&sb, "]\n"); } return sb; diff --git a/config.h b/config.h index d57df283b3..060874488f 100644 --- a/config.h +++ b/config.h @@ -254,7 +254,7 @@ int git_config_set_gently(const char *, const char *); */ void git_config_set(const char *, const char *); -int git_config_parse_key(const char *, char **, int *); +int git_config_parse_key(const char *, char **, size_t *); int git_config_key_is_valid(const char *key); int git_config_set_multivar_gently(const char *, const char *, const char *, int); void git_config_set_multivar(const char *, const char *, const char *, int); From patchwork Fri Apr 10 19:47:51 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jeff King X-Patchwork-Id: 11483587 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 07F2B174A for ; Fri, 10 Apr 2020 19:47:54 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id DD3ED2087E for ; Fri, 10 Apr 2020 19:47:53 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726650AbgDJTrw (ORCPT ); Fri, 10 Apr 2020 15:47:52 -0400 Received: from cloud.peff.net ([104.130.231.41]:40084 "HELO cloud.peff.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with SMTP id S1726263AbgDJTrw (ORCPT ); Fri, 10 Apr 2020 15:47:52 -0400 Received: (qmail 7661 invoked by uid 109); 10 Apr 2020 19:47:52 -0000 Received: from Unknown (HELO peff.net) (10.0.1.2) by cloud.peff.net (qpsmtpd/0.94) with SMTP; Fri, 10 Apr 2020 19:47:52 +0000 Authentication-Results: cloud.peff.net; auth=none Received: (qmail 2263 invoked by uid 111); 10 Apr 2020 19:58:25 -0000 Received: from coredump.intra.peff.net (HELO sigill.intra.peff.net) (10.0.0.2) by peff.net (qpsmtpd/0.94) with (TLS_AES_256_GCM_SHA384 encrypted) ESMTPS; Fri, 10 Apr 2020 15:58:25 -0400 Authentication-Results: peff.net; auth=none Date: Fri, 10 Apr 2020 15:47:51 -0400 From: Jeff King To: git@vger.kernel.org Subject: [PATCH 5/6] config: use size_t to store parsed variable baselen Message-ID: <20200410194751.GE1363756@coredump.intra.peff.net> References: <20200410194211.GA1363484@coredump.intra.peff.net> MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: <20200410194211.GA1363484@coredump.intra.peff.net> Sender: git-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: git@vger.kernel.org Most of the config parsing infrastructure is limited in what it can parse only by the size of memory, because it parses character by character, building up strbufs for keys, values, etc. One exception is the "baselen" value we keep in git_parse_source(), which is an int. That stores the length of the section.subsection base, to which we can then append individual key names (by truncating back to the baselen with strbuf_setlen(), and then appending characters for the key name). But because it's an int, if we see an absurdly long section or subsection, we may overflow the integer, wrapping negative. That negative value is then implicitly cast to a size_t when we pass it to strbuf_setlen(), creating a very large value and triggering a BUG. For example: $ { printf '[foo "' perl -e 'print "a" x 2**31' echo '"]bar = value' } >huge $ git config --file=huge --list fatal: BUG: strbuf_setlen() beyond buffer While this is obviously a silly case that we don't care about supporting, it's worth fixing it by switching to a size_t for a few reasons: - we should try to avoid hitting BUG assertions at all - avoiding integer truncation or overflow sets a good example and makes it easier to audit the code for more important issues - the BUG outcome is what happens in _this_ instance, because we wrap negative. If we used a 2**32 subsection, we'd wrap to a small positive value and actually generate wrong output (the subsection of our key would be truncated). Signed-off-by: Jeff King --- config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.c b/config.c index c48bb35dc0..1c25c94863 100644 --- a/config.c +++ b/config.c @@ -729,7 +729,7 @@ static int git_parse_source(config_fn_t fn, void *data, const struct config_options *opts) { int comment = 0; - int baselen = 0; + size_t baselen = 0; struct strbuf *var = &cf->var; int error_return = 0; char *error_msg = NULL; From patchwork Fri Apr 10 19:50:07 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jeff King X-Patchwork-Id: 11483589 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 879E8174A for ; Fri, 10 Apr 2020 19:50:10 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 673452083E for ; Fri, 10 Apr 2020 19:50:10 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726638AbgDJTuI (ORCPT ); Fri, 10 Apr 2020 15:50:08 -0400 Received: from cloud.peff.net ([104.130.231.41]:40090 "HELO cloud.peff.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with SMTP id S1726203AbgDJTuI (ORCPT ); Fri, 10 Apr 2020 15:50:08 -0400 Received: (qmail 7681 invoked by uid 109); 10 Apr 2020 19:50:09 -0000 Received: from Unknown (HELO peff.net) (10.0.1.2) by cloud.peff.net (qpsmtpd/0.94) with SMTP; Fri, 10 Apr 2020 19:50:09 +0000 Authentication-Results: cloud.peff.net; auth=none Received: (qmail 2281 invoked by uid 111); 10 Apr 2020 20:00:42 -0000 Received: from coredump.intra.peff.net (HELO sigill.intra.peff.net) (10.0.0.2) by peff.net (qpsmtpd/0.94) with (TLS_AES_256_GCM_SHA384 encrypted) ESMTPS; Fri, 10 Apr 2020 16:00:42 -0400 Authentication-Results: peff.net; auth=none Date: Fri, 10 Apr 2020 15:50:07 -0400 From: Jeff King To: git@vger.kernel.org Subject: [PATCH 6/6] config: reject parsing of files over INT_MAX Message-ID: <20200410195007.GF1363756@coredump.intra.peff.net> References: <20200410194211.GA1363484@coredump.intra.peff.net> MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: <20200410194211.GA1363484@coredump.intra.peff.net> Sender: git-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: git@vger.kernel.org While the last few commits have made it possible for the config parser to handle config files up to the limits of size_t, the rest of the code isn't really ready for this. In particular, we often feed the keys as strings into printf "%s" format specifiers. And because the printf family of functions must return an int to specify the result, they complain. Here are two concrete examples (using glibc; we're in uncharted territory here so results may vary): Generate a gigantic .gitmodules file like this: git submodule add /some/other/repo foo { printf '[submodule "' perl -e 'print "a" x 2**31' echo '"]path = foo' } >.gitmodules git commit -m 'huge gitmodule' then try this: $ git show BUG: strbuf.c:397: your vsnprintf is broken (returned -1) The problem is that we end up calling: strbuf_addf(&sb, "submodule.%s.ignore", submodule_name); which relies on vsnprintf(), and that function has no way to report back a size larger than INT_MAX. Taking that same file, try this: git config --file=.gitmodules --list --name-only On my system it produces an output with exactly 4GB of spaces. I confirmed in a debugger that we reach the config callback with the key intact: it's 2147483663 bytes and full of a's. But when we print it with this call: printf("%s%c", key_, term); we just get the spaces. So given the fact that these are insane cases which we have no need to support, the weird behavior from feeding the results to printf even if the code is careful, and the possibility of uncareful code introducing its own integer truncation issues, let's just declare INT_MAX as a limit for parsing config files. We'll enforce the limit in get_next_char(), which generalizes over all sources (blobs, files, etc) and covers any element we're parsing (whether section, key, value, etc). For simplicity, the limit is over the length of the _whole_ file, so you couldn't have two 1GB values in the same file. This should be perfectly fine, as the expected size for config files is generally kilobytes at most. With this patch both cases above will yield: fatal: bad config line 1 in file .gitmodules That's not an amazing error message, but the parser isn't set up to provide specific messages (it just breaks out of the parsing loop and gives that generic error even if see a syntactic issue). And we really wouldn't expect to see this case outside of somebody maliciously probing the limits of the config system. Signed-off-by: Jeff King --- config.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/config.c b/config.c index 1c25c94863..8db9c77098 100644 --- a/config.c +++ b/config.c @@ -37,6 +37,7 @@ struct config_source { enum config_error_action default_error_action; int linenr; int eof; + size_t total_len; struct strbuf value; struct strbuf var; unsigned subsection_case_sensitive : 1; @@ -524,6 +525,19 @@ static int get_next_char(void) c = '\r'; } } + + if (c != EOF && ++cf->total_len > INT_MAX) { + /* + * This is an absurdly long config file; refuse to parse + * further in order to protect downstream code from integer + * overflows. Note that we can't return an error specifically, + * but we can mark EOF and put trash in the return value, + * which will trigger a parse error. + */ + cf->eof = 1; + return 0; + } + if (c == '\n') cf->linenr++; if (c == EOF) { @@ -1540,6 +1554,7 @@ static int do_config_from(struct config_source *top, config_fn_t fn, void *data, top->prev = cf; top->linenr = 1; top->eof = 0; + top->total_len = 0; strbuf_init(&top->value, 1024); strbuf_init(&top->var, 1024); cf = top;