Message ID | 20241022134343.3354111-2-gnaaman@drivenets.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | neighbour: Improve neigh_flush_dev performance | expand |
On Tue, Oct 22, 2024 at 3:44 PM Gilad Naaman <gnaaman@drivenets.com> wrote: > > Add a doubly-linked node to neighbours, so that they > can be deleted without iterating the entire bucket they're in. > > Signed-off-by: Gilad Naaman <gnaaman@drivenets.com> > --- > include/net/neighbour.h | 2 ++ > net/core/neighbour.c | 40 ++++++++++++++++++++++++++++++++++++++-- > 2 files changed, 40 insertions(+), 2 deletions(-) > > diff --git a/include/net/neighbour.h b/include/net/neighbour.h > index 3887ed9e5026..0402447854c7 100644 > --- a/include/net/neighbour.h > +++ b/include/net/neighbour.h > @@ -136,6 +136,7 @@ struct neigh_statistics { > > struct neighbour { > struct neighbour __rcu *next; > + struct hlist_node hash; > struct neigh_table *tbl; > struct neigh_parms *parms; > unsigned long confirmed; > @@ -191,6 +192,7 @@ struct pneigh_entry { > > struct neigh_hash_table { > struct neighbour __rcu **hash_buckets; > + struct hlist_head *hash_heads; > unsigned int hash_shift; > __u32 hash_rnd[NEIGH_NUM_HASH_RND]; > struct rcu_head rcu; > diff --git a/net/core/neighbour.c b/net/core/neighbour.c > index 395ae1626eef..7df4cfc0ac9a 100644 > --- a/net/core/neighbour.c > +++ b/net/core/neighbour.c > @@ -217,6 +217,7 @@ static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, > neigh = rcu_dereference_protected(n->next, > lockdep_is_held(&tbl->lock)); > rcu_assign_pointer(*np, neigh); > + hlist_del_rcu(&n->hash); > neigh_mark_dead(n); > retval = true; > } > @@ -403,6 +404,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, > rcu_assign_pointer(*np, > rcu_dereference_protected(n->next, > lockdep_is_held(&tbl->lock))); > + hlist_del_rcu(&n->hash); > write_lock(&n->lock); > neigh_del_timer(n); > neigh_mark_dead(n); > @@ -530,27 +532,47 @@ static void neigh_get_hash_rnd(u32 *x) > > static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) > { > + size_t hash_heads_size = (1 << shift) * sizeof(struct hlist_head); > size_t size = (1 << shift) * sizeof(struct neighbour *); > - struct neigh_hash_table *ret; > struct neighbour __rcu **buckets; > + struct hlist_head *hash_heads; > + struct neigh_hash_table *ret; > int i; > > + hash_heads = NULL; > + > ret = kmalloc(sizeof(*ret), GFP_ATOMIC); > if (!ret) > return NULL; > if (size <= PAGE_SIZE) { > buckets = kzalloc(size, GFP_ATOMIC); > + > + if (buckets) { > + hash_heads = kzalloc(hash_heads_size, GFP_ATOMIC); > + if (!hash_heads) > + kfree(buckets); > + } Oh well, I strongly suggest we first switch to kvzalloc() and kvfree(), instead of copy/pasting old work arounds... diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 395ae1626eef2f22f5b81051671371ed67eb5943..a44511218a600ff55513a7255e90641cd7c2e983 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -538,14 +538,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (!ret) return NULL; - if (size <= PAGE_SIZE) { - buckets = kzalloc(size, GFP_ATOMIC); - } else { - buckets = (struct neighbour __rcu **) - __get_free_pages(GFP_ATOMIC | __GFP_ZERO, - get_order(size)); - kmemleak_alloc(buckets, size, 1, GFP_ATOMIC); - } + buckets = kvzalloc(size, GFP_ATOMIC); if (!buckets) { kfree(ret); return NULL; @@ -562,15 +555,8 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct neigh_hash_table *nht = container_of(head, struct neigh_hash_table, rcu); - size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); - struct neighbour __rcu **buckets = nht->hash_buckets; - if (size <= PAGE_SIZE) { - kfree(buckets); - } else { - kmemleak_free(buckets); - free_pages((unsigned long)buckets, get_order(size)); - } + kvfree(nht->hash_buckets); kfree(nht); }
> Oh well, I strongly suggest we first switch to kvzalloc() and > kvfree(), instead of copy/pasting old work arounds... Hey, thank you for going over this. I see you've posted a patch that changes this, I'll rebase and absorb it.
diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 3887ed9e5026..0402447854c7 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -136,6 +136,7 @@ struct neigh_statistics { struct neighbour { struct neighbour __rcu *next; + struct hlist_node hash; struct neigh_table *tbl; struct neigh_parms *parms; unsigned long confirmed; @@ -191,6 +192,7 @@ struct pneigh_entry { struct neigh_hash_table { struct neighbour __rcu **hash_buckets; + struct hlist_head *hash_heads; unsigned int hash_shift; __u32 hash_rnd[NEIGH_NUM_HASH_RND]; struct rcu_head rcu; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 395ae1626eef..7df4cfc0ac9a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -217,6 +217,7 @@ static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, neigh = rcu_dereference_protected(n->next, lockdep_is_held(&tbl->lock)); rcu_assign_pointer(*np, neigh); + hlist_del_rcu(&n->hash); neigh_mark_dead(n); retval = true; } @@ -403,6 +404,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, rcu_assign_pointer(*np, rcu_dereference_protected(n->next, lockdep_is_held(&tbl->lock))); + hlist_del_rcu(&n->hash); write_lock(&n->lock); neigh_del_timer(n); neigh_mark_dead(n); @@ -530,27 +532,47 @@ static void neigh_get_hash_rnd(u32 *x) static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) { + size_t hash_heads_size = (1 << shift) * sizeof(struct hlist_head); size_t size = (1 << shift) * sizeof(struct neighbour *); - struct neigh_hash_table *ret; struct neighbour __rcu **buckets; + struct hlist_head *hash_heads; + struct neigh_hash_table *ret; int i; + hash_heads = NULL; + ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (!ret) return NULL; if (size <= PAGE_SIZE) { buckets = kzalloc(size, GFP_ATOMIC); + + if (buckets) { + hash_heads = kzalloc(hash_heads_size, GFP_ATOMIC); + if (!hash_heads) + kfree(buckets); + } } else { buckets = (struct neighbour __rcu **) __get_free_pages(GFP_ATOMIC | __GFP_ZERO, get_order(size)); kmemleak_alloc(buckets, size, 1, GFP_ATOMIC); + + if (buckets) { + hash_heads = (struct hlist_head *) + __get_free_pages(GFP_ATOMIC | __GFP_ZERO, + get_order(hash_heads_size)); + kmemleak_alloc(hash_heads, hash_heads_size, 1, GFP_ATOMIC); + if (!hash_heads) + free_pages((unsigned long)buckets, get_order(size)); + } } - if (!buckets) { + if (!buckets || !hash_heads) { kfree(ret); return NULL; } ret->hash_buckets = buckets; + ret->hash_heads = hash_heads; ret->hash_shift = shift; for (i = 0; i < NEIGH_NUM_HASH_RND; i++) neigh_get_hash_rnd(&ret->hash_rnd[i]); @@ -562,8 +584,10 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct neigh_hash_table *nht = container_of(head, struct neigh_hash_table, rcu); + size_t hash_heads_size = (1 << nht->hash_shift) * sizeof(struct hlist_head); size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); struct neighbour __rcu **buckets = nht->hash_buckets; + struct hlist_head *hash_heads = nht->hash_heads; if (size <= PAGE_SIZE) { kfree(buckets); @@ -571,6 +595,13 @@ static void neigh_hash_free_rcu(struct rcu_head *head) kmemleak_free(buckets); free_pages((unsigned long)buckets, get_order(size)); } + + if (hash_heads_size < PAGE_SIZE) { + kfree(hash_heads); + } else { + kmemleak_free(hash_heads); + free_pages((unsigned long)hash_heads, get_order(hash_heads_size)); + } kfree(nht); } @@ -607,6 +638,8 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, new_nht->hash_buckets[hash], lockdep_is_held(&tbl->lock))); rcu_assign_pointer(new_nht->hash_buckets[hash], n); + hlist_del_rcu(&n->hash); + hlist_add_head_rcu(&n->hash, &new_nht->hash_heads[hash]); } } @@ -717,6 +750,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, rcu_dereference_protected(nht->hash_buckets[hash_val], lockdep_is_held(&tbl->lock))); rcu_assign_pointer(nht->hash_buckets[hash_val], n); + hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]); write_unlock_bh(&tbl->lock); neigh_dbg(2, "neigh %p is created\n", n); rc = n; @@ -1002,6 +1036,7 @@ static void neigh_periodic_work(struct work_struct *work) rcu_assign_pointer(*np, rcu_dereference_protected(n->next, lockdep_is_held(&tbl->lock))); + hlist_del_rcu(&n->hash); neigh_mark_dead(n); write_unlock(&n->lock); neigh_cleanup_and_release(n); @@ -3131,6 +3166,7 @@ void __neigh_for_each_release(struct neigh_table *tbl, rcu_assign_pointer(*np, rcu_dereference_protected(n->next, lockdep_is_held(&tbl->lock))); + hlist_del_rcu(&n->hash); neigh_mark_dead(n); } else np = &n->next;
Add a doubly-linked node to neighbours, so that they can be deleted without iterating the entire bucket they're in. Signed-off-by: Gilad Naaman <gnaaman@drivenets.com> --- include/net/neighbour.h | 2 ++ net/core/neighbour.c | 40 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 2 deletions(-)