Message ID | a686d0758a2652c91363dcf68da7726093d00c23.1602549650.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | None | expand |
On Tue, Oct 13, 2020 at 12:40:43AM +0000, Elijah Newren via GitGitGadget wrote: > Previously, once map->table had been freed, any calls to hashmap_put(), > hashmap_get(), or hashmap_remove() would cause a NULL pointer > dereference (since hashmap_free_() also zeros the memory; without that > zeroing, calling these functions would cause a use-after-free problem). > > Modify these functions to check for a NULL table and automatically > allocate as needed. Unsurprisingly, I like this direction. The code looks correct to me, though I think you could reduce duplication slightly by checking map->table in find_entry_ptr(). That covers both hashmap_get() and hashmap_remove(). But I'm happy either way. > I also thought about creating a HASHMAP_INIT macro to allow initializing > hashmaps on the stack without calling hashmap_init(), but virtually all > uses of hashmap specify a usecase-specific equals_function which defeats > the utility of such a macro. This part I disagree with. If we did: #define HASHMAP_INIT(fn, data) = { .cmpfn = cmpfn, cmpfn_data = data } then many callers could avoid handling the lazy-init themselves. E.g.: attr.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/attr.c b/attr.c index a826b2ef1f..55a2783f1b 100644 --- a/attr.c +++ b/attr.c @@ -57,7 +57,9 @@ static inline void hashmap_unlock(struct attr_hashmap *map) * is a singleton object which is shared between threads. * Access to this dictionary must be surrounded with a mutex. */ -static struct attr_hashmap g_attr_hashmap; +static struct attr_hashmap g_attr_hashmap = { + HASHMAP_INIT(attr_hash_entry_cmp, NULL) +}; /* The container for objects stored in "struct attr_hashmap" */ struct attr_hash_entry { @@ -80,12 +82,6 @@ static int attr_hash_entry_cmp(const void *unused_cmp_data, return (a->keylen != b->keylen) || strncmp(a->key, b->key, a->keylen); } -/* Initialize an 'attr_hashmap' object */ -static void attr_hashmap_init(struct attr_hashmap *map) -{ - hashmap_init(&map->map, attr_hash_entry_cmp, NULL, 0); -} - /* * Retrieve the 'value' stored in a hashmap given the provided 'key'. * If there is no matching entry, return NULL. @@ -96,9 +92,6 @@ static void *attr_hashmap_get(struct attr_hashmap *map, struct attr_hash_entry k; struct attr_hash_entry *e; - if (!map->map.tablesize) - attr_hashmap_init(map); - hashmap_entry_init(&k.ent, memhash(key, keylen)); k.key = key; k.keylen = keylen; @@ -114,9 +107,6 @@ static void attr_hashmap_add(struct attr_hashmap *map, { struct attr_hash_entry *e; - if (!map->map.tablesize) - attr_hashmap_init(map); - e = xmalloc(sizeof(struct attr_hash_entry)); hashmap_entry_init(&e->ent, memhash(key, keylen)); e->key = key;
On Fri, Oct 30, 2020 at 6:35 AM Jeff King <peff@peff.net> wrote: > > On Tue, Oct 13, 2020 at 12:40:43AM +0000, Elijah Newren via GitGitGadget wrote: > > > Previously, once map->table had been freed, any calls to hashmap_put(), > > hashmap_get(), or hashmap_remove() would cause a NULL pointer > > dereference (since hashmap_free_() also zeros the memory; without that > > zeroing, calling these functions would cause a use-after-free problem). > > > > Modify these functions to check for a NULL table and automatically > > allocate as needed. > > Unsurprisingly, I like this direction. The code looks correct to me, > though I think you could reduce duplication slightly by checking > map->table in find_entry_ptr(). That covers both hashmap_get() and > hashmap_remove(). But I'm happy either way. > > > I also thought about creating a HASHMAP_INIT macro to allow initializing > > hashmaps on the stack without calling hashmap_init(), but virtually all > > uses of hashmap specify a usecase-specific equals_function which defeats > > the utility of such a macro. > > This part I disagree with. If we did: > > #define HASHMAP_INIT(fn, data) = { .cmpfn = cmpfn, cmpfn_data = data } > > then many callers could avoid handling the lazy-init themselves. E.g.: Ah, gotcha. That makes sense to me. Given that 43 out of 47 callers of hashmap_init use cmpfn_data = NULL, should I shorten it to just one parameter for the macro, and let the four special cases keep calling hashmap_init() to specify a non-NULL cmpfn_data? > > attr.c | 16 +++------------- > 1 file changed, 3 insertions(+), 13 deletions(-) > > diff --git a/attr.c b/attr.c > index a826b2ef1f..55a2783f1b 100644 > --- a/attr.c > +++ b/attr.c > @@ -57,7 +57,9 @@ static inline void hashmap_unlock(struct attr_hashmap *map) > * is a singleton object which is shared between threads. > * Access to this dictionary must be surrounded with a mutex. > */ > -static struct attr_hashmap g_attr_hashmap; > +static struct attr_hashmap g_attr_hashmap = { > + HASHMAP_INIT(attr_hash_entry_cmp, NULL) > +}; > > /* The container for objects stored in "struct attr_hashmap" */ > struct attr_hash_entry { > @@ -80,12 +82,6 @@ static int attr_hash_entry_cmp(const void *unused_cmp_data, > return (a->keylen != b->keylen) || strncmp(a->key, b->key, a->keylen); > } > > -/* Initialize an 'attr_hashmap' object */ > -static void attr_hashmap_init(struct attr_hashmap *map) > -{ > - hashmap_init(&map->map, attr_hash_entry_cmp, NULL, 0); > -} > - > /* > * Retrieve the 'value' stored in a hashmap given the provided 'key'. > * If there is no matching entry, return NULL. > @@ -96,9 +92,6 @@ static void *attr_hashmap_get(struct attr_hashmap *map, > struct attr_hash_entry k; > struct attr_hash_entry *e; > > - if (!map->map.tablesize) > - attr_hashmap_init(map); > - > hashmap_entry_init(&k.ent, memhash(key, keylen)); > k.key = key; > k.keylen = keylen; > @@ -114,9 +107,6 @@ static void attr_hashmap_add(struct attr_hashmap *map, > { > struct attr_hash_entry *e; > > - if (!map->map.tablesize) > - attr_hashmap_init(map); > - > e = xmalloc(sizeof(struct attr_hash_entry)); > hashmap_entry_init(&e->ent, memhash(key, keylen)); > e->key = key;
On Fri, Oct 30, 2020 at 08:37:42AM -0700, Elijah Newren wrote: > > This part I disagree with. If we did: > > > > #define HASHMAP_INIT(fn, data) = { .cmpfn = cmpfn, cmpfn_data = data } > > > > then many callers could avoid handling the lazy-init themselves. E.g.: > > Ah, gotcha. That makes sense to me. Given that 43 out of 47 callers > of hashmap_init use cmpfn_data = NULL, should I shorten it to just one > parameter for the macro, and let the four special cases keep calling > hashmap_init() to specify a non-NULL cmpfn_data? I'd be fine with it either way. I actually wrote it without the data parameter at first, then changed my mine and added it in. ;) You could also do: #define HASHMAP_INIT_DATA(fn, data) { .cmpfn = cmpfn, cmpfn_data = data } #define HASHMAP_INIT(fn) HASHMAP_INIT_DATA(fn, NULL) if you want to keep most callers simple. -Peff
On Tue, Nov 3, 2020 at 8:08 AM Jeff King <peff@peff.net> wrote: > > On Fri, Oct 30, 2020 at 08:37:42AM -0700, Elijah Newren wrote: > > > > This part I disagree with. If we did: > > > > > > #define HASHMAP_INIT(fn, data) = { .cmpfn = cmpfn, cmpfn_data = data } > > > > > > then many callers could avoid handling the lazy-init themselves. E.g.: > > > > Ah, gotcha. That makes sense to me. Given that 43 out of 47 callers > > of hashmap_init use cmpfn_data = NULL, should I shorten it to just one > > parameter for the macro, and let the four special cases keep calling > > hashmap_init() to specify a non-NULL cmpfn_data? > > I'd be fine with it either way. I actually wrote it without the data > parameter at first, then changed my mine and added it in. ;) > > You could also do: > > #define HASHMAP_INIT_DATA(fn, data) { .cmpfn = cmpfn, cmpfn_data = data } > #define HASHMAP_INIT(fn) HASHMAP_INIT_DATA(fn, NULL) > > if you want to keep most callers simple. I ended up going with your HASHMAP_INIT(fn, data) in v3 that I submitted yesterday (except that you have a stray '=', are missing a '.' in front of cmpfn_data, and you'll trigger BUG()s if you don't also add .do_count_items = 1, but those are all minor fixups). In the future, if we determine we want/need the extra simplicity then we can always convert to this newer suggestion. I don't think it's that big a deal either way.
diff --git a/hashmap.c b/hashmap.c index e44d8a3e85..bb7c9979b8 100644 --- a/hashmap.c +++ b/hashmap.c @@ -114,6 +114,7 @@ int hashmap_bucket(const struct hashmap *map, unsigned int hash) static void rehash(struct hashmap *map, unsigned int newsize) { + /* map->table MUST NOT be NULL when this function is called */ unsigned int i, oldsize = map->tablesize; struct hashmap_entry **oldtable = map->table; @@ -134,6 +135,7 @@ static void rehash(struct hashmap *map, unsigned int newsize) static inline struct hashmap_entry **find_entry_ptr(const struct hashmap *map, const struct hashmap_entry *key, const void *keydata) { + /* map->table MUST NOT be NULL when this function is called */ struct hashmap_entry **e = &map->table[bucket(map, key)]; while (*e && !entry_equals(map, *e, key, keydata)) e = &(*e)->next; @@ -196,6 +198,8 @@ struct hashmap_entry *hashmap_get(const struct hashmap *map, const struct hashmap_entry *key, const void *keydata) { + if (!map->table) + return NULL; return *find_entry_ptr(map, key, keydata); } @@ -211,8 +215,12 @@ struct hashmap_entry *hashmap_get_next(const struct hashmap *map, void hashmap_add(struct hashmap *map, struct hashmap_entry *entry) { - unsigned int b = bucket(map, entry); + unsigned int b; + + if (!map->table) + alloc_table(map, HASHMAP_INITIAL_SIZE); + b = bucket(map, entry); /* add entry */ entry->next = map->table[b]; map->table[b] = entry; @@ -230,7 +238,11 @@ struct hashmap_entry *hashmap_remove(struct hashmap *map, const void *keydata) { struct hashmap_entry *old; - struct hashmap_entry **e = find_entry_ptr(map, key, keydata); + struct hashmap_entry **e; + + if (!map->table) + return NULL; + e = find_entry_ptr(map, key, keydata); if (!*e) return NULL;