[v2,03/10] hashmap: allow re-use after hashmap_free()

Message ID	a686d0758a2652c91363dcf68da7726093d00c23.1602549650.git.gitgitgadget@gmail.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=7dzq=DU=vger.kernel.org=git-owner@kernel.org> Message-Id: <a686d0758a2652c91363dcf68da7726093d00c23.1602549650.git.gitgitgadget@gmail.com> In-Reply-To: <pull.835.v2.git.git.1602549650.gitgitgadget@gmail.com> References: <pull.835.git.git.1598035949.gitgitgadget@gmail.com> <pull.835.v2.git.git.1602549650.gitgitgadget@gmail.com> Date: Tue, 13 Oct 2020 00:40:43 +0000 Subject: [PATCH v2 03/10] hashmap: allow re-use after hashmap_free() Fcc: Sent Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MIME-Version: 1.0 To: git@vger.kernel.org Cc: Jeff King <peff@peff.net>, Elijah Newren <newren@gmail.com>, Elijah Newren <newren@gmail.com> Precedence: bulk From: Elijah Newren <newren@gmail.com>
Series	None \| expand [v2,03/10] hashmap: allow re-use after hashmap_free()

Message ID

a686d0758a2652c91363dcf68da7726093d00c23.1602549650.git.gitgitgadget@gmail.com (mailing list archive)

State

New, archived

Headers

Message-Id: 
 <a686d0758a2652c91363dcf68da7726093d00c23.1602549650.git.gitgitgadget@gmail.com>
In-Reply-To: <pull.835.v2.git.git.1602549650.gitgitgadget@gmail.com>
References: <pull.835.git.git.1598035949.gitgitgadget@gmail.com>
        <pull.835.v2.git.git.1602549650.gitgitgadget@gmail.com>
Date: Tue, 13 Oct 2020 00:40:43 +0000
Subject: [PATCH v2 03/10] hashmap: allow re-use after hashmap_free()
Fcc: Sent
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
MIME-Version: 1.0
To: git@vger.kernel.org
Cc: Jeff King <peff@peff.net>, Elijah Newren <newren@gmail.com>,
        Elijah Newren <newren@gmail.com>
Precedence: bulk
From: Elijah Newren <newren@gmail.com>

Series

None | expand

Commit Message

Elijah Newren Oct. 13, 2020, 12:40 a.m. UTC

From: Elijah Newren <newren@gmail.com>

Previously, once map->table had been freed, any calls to hashmap_put(),
hashmap_get(), or hashmap_remove() would cause a NULL pointer
dereference (since hashmap_free_() also zeros the memory; without that
zeroing, calling these functions would cause a use-after-free problem).

Modify these functions to check for a NULL table and automatically
allocate as needed.

I also thought about creating a HASHMAP_INIT macro to allow initializing
hashmaps on the stack without calling hashmap_init(), but virtually all
uses of hashmap specify a usecase-specific equals_function which defeats
the utility of such a macro.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 hashmap.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

Comments

Jeff King Oct. 30, 2020, 1:35 p.m. UTC | #1

On Tue, Oct 13, 2020 at 12:40:43AM +0000, Elijah Newren via GitGitGadget wrote:

> Previously, once map->table had been freed, any calls to hashmap_put(),
> hashmap_get(), or hashmap_remove() would cause a NULL pointer
> dereference (since hashmap_free_() also zeros the memory; without that
> zeroing, calling these functions would cause a use-after-free problem).
> 
> Modify these functions to check for a NULL table and automatically
> allocate as needed.

Unsurprisingly, I like this direction. The code looks correct to me,
though I think you could reduce duplication slightly by checking
map->table in find_entry_ptr(). That covers both hashmap_get() and
hashmap_remove(). But I'm happy either way.

> I also thought about creating a HASHMAP_INIT macro to allow initializing
> hashmaps on the stack without calling hashmap_init(), but virtually all
> uses of hashmap specify a usecase-specific equals_function which defeats
> the utility of such a macro.

This part I disagree with. If we did:

  #define HASHMAP_INIT(fn, data) = { .cmpfn = cmpfn, cmpfn_data = data }

then many callers could avoid handling the lazy-init themselves. E.g.:

 attr.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/attr.c b/attr.c
index a826b2ef1f..55a2783f1b 100644
--- a/attr.c
+++ b/attr.c
@@ -57,7 +57,9 @@ static inline void hashmap_unlock(struct attr_hashmap *map)
  * is a singleton object which is shared between threads.
  * Access to this dictionary must be surrounded with a mutex.
  */
-static struct attr_hashmap g_attr_hashmap;
+static struct attr_hashmap g_attr_hashmap = {
+	HASHMAP_INIT(attr_hash_entry_cmp, NULL)
+};
 
 /* The container for objects stored in "struct attr_hashmap" */
 struct attr_hash_entry {
@@ -80,12 +82,6 @@ static int attr_hash_entry_cmp(const void *unused_cmp_data,
 	return (a->keylen != b->keylen) || strncmp(a->key, b->key, a->keylen);
 }
 
-/* Initialize an 'attr_hashmap' object */
-static void attr_hashmap_init(struct attr_hashmap *map)
-{
-	hashmap_init(&map->map, attr_hash_entry_cmp, NULL, 0);
-}
-
 /*
  * Retrieve the 'value' stored in a hashmap given the provided 'key'.
  * If there is no matching entry, return NULL.
@@ -96,9 +92,6 @@ static void *attr_hashmap_get(struct attr_hashmap *map,
 	struct attr_hash_entry k;
 	struct attr_hash_entry *e;
 
-	if (!map->map.tablesize)
-		attr_hashmap_init(map);
-
 	hashmap_entry_init(&k.ent, memhash(key, keylen));
 	k.key = key;
 	k.keylen = keylen;
@@ -114,9 +107,6 @@ static void attr_hashmap_add(struct attr_hashmap *map,
 {
 	struct attr_hash_entry *e;
 
-	if (!map->map.tablesize)
-		attr_hashmap_init(map);
-
 	e = xmalloc(sizeof(struct attr_hash_entry));
 	hashmap_entry_init(&e->ent, memhash(key, keylen));
 	e->key = key;

Elijah Newren Oct. 30, 2020, 3:37 p.m. UTC | #2

On Fri, Oct 30, 2020 at 6:35 AM Jeff King <peff@peff.net> wrote:
>
> On Tue, Oct 13, 2020 at 12:40:43AM +0000, Elijah Newren via GitGitGadget wrote:
>
> > Previously, once map->table had been freed, any calls to hashmap_put(),
> > hashmap_get(), or hashmap_remove() would cause a NULL pointer
> > dereference (since hashmap_free_() also zeros the memory; without that
> > zeroing, calling these functions would cause a use-after-free problem).
> >
> > Modify these functions to check for a NULL table and automatically
> > allocate as needed.
>
> Unsurprisingly, I like this direction. The code looks correct to me,
> though I think you could reduce duplication slightly by checking
> map->table in find_entry_ptr(). That covers both hashmap_get() and
> hashmap_remove(). But I'm happy either way.
>
> > I also thought about creating a HASHMAP_INIT macro to allow initializing
> > hashmaps on the stack without calling hashmap_init(), but virtually all
> > uses of hashmap specify a usecase-specific equals_function which defeats
> > the utility of such a macro.
>
> This part I disagree with. If we did:
>
>   #define HASHMAP_INIT(fn, data) = { .cmpfn = cmpfn, cmpfn_data = data }
>
> then many callers could avoid handling the lazy-init themselves. E.g.:

Ah, gotcha.  That makes sense to me.  Given that 43 out of 47 callers
of hashmap_init use cmpfn_data = NULL, should I shorten it to just one
parameter for the macro, and let the four special cases keep calling
hashmap_init() to specify a non-NULL cmpfn_data?

>
>  attr.c | 16 +++-------------
>  1 file changed, 3 insertions(+), 13 deletions(-)
>
> diff --git a/attr.c b/attr.c
> index a826b2ef1f..55a2783f1b 100644
> --- a/attr.c
> +++ b/attr.c
> @@ -57,7 +57,9 @@ static inline void hashmap_unlock(struct attr_hashmap *map)
>   * is a singleton object which is shared between threads.
>   * Access to this dictionary must be surrounded with a mutex.
>   */
> -static struct attr_hashmap g_attr_hashmap;
> +static struct attr_hashmap g_attr_hashmap = {
> +       HASHMAP_INIT(attr_hash_entry_cmp, NULL)
> +};
>
>  /* The container for objects stored in "struct attr_hashmap" */
>  struct attr_hash_entry {
> @@ -80,12 +82,6 @@ static int attr_hash_entry_cmp(const void *unused_cmp_data,
>         return (a->keylen != b->keylen) || strncmp(a->key, b->key, a->keylen);
>  }
>
> -/* Initialize an 'attr_hashmap' object */
> -static void attr_hashmap_init(struct attr_hashmap *map)
> -{
> -       hashmap_init(&map->map, attr_hash_entry_cmp, NULL, 0);
> -}
> -
>  /*
>   * Retrieve the 'value' stored in a hashmap given the provided 'key'.
>   * If there is no matching entry, return NULL.
> @@ -96,9 +92,6 @@ static void *attr_hashmap_get(struct attr_hashmap *map,
>         struct attr_hash_entry k;
>         struct attr_hash_entry *e;
>
> -       if (!map->map.tablesize)
> -               attr_hashmap_init(map);
> -
>         hashmap_entry_init(&k.ent, memhash(key, keylen));
>         k.key = key;
>         k.keylen = keylen;
> @@ -114,9 +107,6 @@ static void attr_hashmap_add(struct attr_hashmap *map,
>  {
>         struct attr_hash_entry *e;
>
> -       if (!map->map.tablesize)
> -               attr_hashmap_init(map);
> -
>         e = xmalloc(sizeof(struct attr_hash_entry));
>         hashmap_entry_init(&e->ent, memhash(key, keylen));
>         e->key = key;

Jeff King Nov. 3, 2020, 4:08 p.m. UTC | #3

On Fri, Oct 30, 2020 at 08:37:42AM -0700, Elijah Newren wrote:

> > This part I disagree with. If we did:
> >
> >   #define HASHMAP_INIT(fn, data) = { .cmpfn = cmpfn, cmpfn_data = data }
> >
> > then many callers could avoid handling the lazy-init themselves. E.g.:
> 
> Ah, gotcha.  That makes sense to me.  Given that 43 out of 47 callers
> of hashmap_init use cmpfn_data = NULL, should I shorten it to just one
> parameter for the macro, and let the four special cases keep calling
> hashmap_init() to specify a non-NULL cmpfn_data?

I'd be fine with it either way. I actually wrote it without the data
parameter at first, then changed my mine and added it in. ;)

You could also do:

  #define HASHMAP_INIT_DATA(fn, data) { .cmpfn = cmpfn, cmpfn_data = data }
  #define HASHMAP_INIT(fn) HASHMAP_INIT_DATA(fn, NULL)

if you want to keep most callers simple.

-Peff

Elijah Newren Nov. 3, 2020, 4:16 p.m. UTC | #4

On Tue, Nov 3, 2020 at 8:08 AM Jeff King <peff@peff.net> wrote:
>
> On Fri, Oct 30, 2020 at 08:37:42AM -0700, Elijah Newren wrote:
>
> > > This part I disagree with. If we did:
> > >
> > >   #define HASHMAP_INIT(fn, data) = { .cmpfn = cmpfn, cmpfn_data = data }
> > >
> > > then many callers could avoid handling the lazy-init themselves. E.g.:
> >
> > Ah, gotcha.  That makes sense to me.  Given that 43 out of 47 callers
> > of hashmap_init use cmpfn_data = NULL, should I shorten it to just one
> > parameter for the macro, and let the four special cases keep calling
> > hashmap_init() to specify a non-NULL cmpfn_data?
>
> I'd be fine with it either way. I actually wrote it without the data
> parameter at first, then changed my mine and added it in. ;)
>
> You could also do:
>
>   #define HASHMAP_INIT_DATA(fn, data) { .cmpfn = cmpfn, cmpfn_data = data }
>   #define HASHMAP_INIT(fn) HASHMAP_INIT_DATA(fn, NULL)
>
> if you want to keep most callers simple.

I ended up going with your HASHMAP_INIT(fn, data) in v3 that I
submitted yesterday (except that you have a stray '=', are missing a
'.' in front of cmpfn_data, and you'll trigger BUG()s if you don't
also add .do_count_items = 1, but those are all minor fixups).  In the
future, if we determine we want/need the extra simplicity then we can
always convert to this newer suggestion.  I don't think it's that big
a deal either way.

diff --git a/hashmap.c b/hashmap.c
index e44d8a3e85..bb7c9979b8 100644
--- a/hashmap.c
+++ b/hashmap.c
@@ -114,6 +114,7 @@  int hashmap_bucket(const struct hashmap *map, unsigned int hash)
 
 static void rehash(struct hashmap *map, unsigned int newsize)
 {
+	/* map->table MUST NOT be NULL when this function is called */
 	unsigned int i, oldsize = map->tablesize;
 	struct hashmap_entry **oldtable = map->table;
 
@@ -134,6 +135,7 @@  static void rehash(struct hashmap *map, unsigned int newsize)
 static inline struct hashmap_entry **find_entry_ptr(const struct hashmap *map,
 		const struct hashmap_entry *key, const void *keydata)
 {
+	/* map->table MUST NOT be NULL when this function is called */
 	struct hashmap_entry **e = &map->table[bucket(map, key)];
 	while (*e && !entry_equals(map, *e, key, keydata))
 		e = &(*e)->next;
@@ -196,6 +198,8 @@  struct hashmap_entry *hashmap_get(const struct hashmap *map,
 				const struct hashmap_entry *key,
 				const void *keydata)
 {
+	if (!map->table)
+		return NULL;
 	return *find_entry_ptr(map, key, keydata);
 }
 
@@ -211,8 +215,12 @@  struct hashmap_entry *hashmap_get_next(const struct hashmap *map,
 
 void hashmap_add(struct hashmap *map, struct hashmap_entry *entry)
 {
-	unsigned int b = bucket(map, entry);
+	unsigned int b;
+
+	if (!map->table)
+		alloc_table(map, HASHMAP_INITIAL_SIZE);
 
+	b = bucket(map, entry);
 	/* add entry */
 	entry->next = map->table[b];
 	map->table[b] = entry;
@@ -230,7 +238,11 @@  struct hashmap_entry *hashmap_remove(struct hashmap *map,
 				     const void *keydata)
 {
 	struct hashmap_entry *old;
-	struct hashmap_entry **e = find_entry_ptr(map, key, keydata);
+	struct hashmap_entry **e;
+
+	if (!map->table)
+		return NULL;
+	e = find_entry_ptr(map, key, keydata);
 	if (!*e)
 		return NULL;

[v2,03/10] hashmap: allow re-use after hashmap_free()

Commit Message

Comments

Patch