Message ID | 20241006064747.201773-2-gnaaman@drivenets.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | Improve neigh_flush_dev performance | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Clearly marked for net-next, async |
netdev/apply | fail | Patch does not apply to net-next-0 |
From: Gilad Naaman <gnaaman@drivenets.com> Date: Sun, 6 Oct 2024 06:47:42 +0000 > Use doubly-linked instead of singly-linked list when linking neighbours, > so that it is possible to remove neighbours without traversing the > entire table. > > Signed-off-by: Gilad Naaman <gnaaman@drivenets.com> > --- > include/net/neighbour.h | 8 +-- > net/core/neighbour.c | 124 ++++++++++++++-------------------------- > 2 files changed, 46 insertions(+), 86 deletions(-) > > diff --git a/include/net/neighbour.h b/include/net/neighbour.h > index a44f262a7384..5dde118323e3 100644 > --- a/include/net/neighbour.h > +++ b/include/net/neighbour.h > @@ -135,7 +135,7 @@ struct neigh_statistics { > #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field) > > struct neighbour { > - struct neighbour __rcu *next; > + struct hlist_node list; > struct neigh_table *tbl; > struct neigh_parms *parms; > unsigned long confirmed; > @@ -190,7 +190,7 @@ struct pneigh_entry { > #define NEIGH_NUM_HASH_RND 4 > > struct neigh_hash_table { > - struct neighbour __rcu **hash_buckets; > + struct hlist_head *hash_buckets; > unsigned int hash_shift; > __u32 hash_rnd[NEIGH_NUM_HASH_RND]; > struct rcu_head rcu; > @@ -304,9 +304,9 @@ static inline struct neighbour *___neigh_lookup_noref( > u32 hash_val; > > hash_val = hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); > - for (n = rcu_dereference(nht->hash_buckets[hash_val]); > + for (n = (struct neighbour *)rcu_dereference(hlist_first_rcu(&nht->hash_buckets[hash_val])); This for loop and hlist_first_rcu(&nht->hash_buckets[hash_val]) can also be written with a macro and an inline function. > n != NULL; > - n = rcu_dereference(n->next)) { > + n = (struct neighbour *)rcu_dereference(hlist_next_rcu(&n->list))) { This part is also reused multiple times so should be an inline function. I have similar patches for struct in_ifaddr.ifa_next (not upstreamed yet), and this will be a good example for you. https://github.com/q2ven/linux/commit/a51fdf7ccc14bf6edba58bacf7faaeebe811d41b > if (n->dev == dev && key_eq(n, pkey)) > return n; > } > diff --git a/net/core/neighbour.c b/net/core/neighbour.c > index 77b819cd995b..86b174baae27 100644 > --- a/net/core/neighbour.c > +++ b/net/core/neighbour.c > @@ -37,6 +37,7 @@ > #include <linux/string.h> > #include <linux/log2.h> > #include <linux/inetdevice.h> > +#include <linux/rculist.h> > #include <net/addrconf.h> > > #include <trace/events/neigh.h> > @@ -205,18 +206,13 @@ static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify, > } > } > > -static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, > - struct neigh_table *tbl) > +static bool neigh_del(struct neighbour *n, struct neigh_table *tbl) > { > bool retval = false; > > write_lock(&n->lock); > if (refcount_read(&n->refcnt) == 1) { > - struct neighbour *neigh; > - > - neigh = rcu_dereference_protected(n->next, > - lockdep_is_held(&tbl->lock)); > - rcu_assign_pointer(*np, neigh); > + hlist_del_rcu(&n->list); > neigh_mark_dead(n); > retval = true; > } > @@ -228,25 +224,7 @@ static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, > > bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl) > { > - struct neigh_hash_table *nht; > - void *pkey = ndel->primary_key; > - u32 hash_val; > - struct neighbour *n; > - struct neighbour __rcu **np; > - > - nht = rcu_dereference_protected(tbl->nht, > - lockdep_is_held(&tbl->lock)); > - hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd); > - hash_val = hash_val >> (32 - nht->hash_shift); > - > - np = &nht->hash_buckets[hash_val]; > - while ((n = rcu_dereference_protected(*np, > - lockdep_is_held(&tbl->lock)))) { > - if (n == ndel) > - return neigh_del(n, np, tbl); > - np = &n->next; > - } > - return false; > + return neigh_del(ndel, tbl); > } > > static int neigh_forced_gc(struct neigh_table *tbl) > @@ -388,21 +366,20 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, > > for (i = 0; i < (1 << nht->hash_shift); i++) { > struct neighbour *n; > - struct neighbour __rcu **np = &nht->hash_buckets[i]; > + struct neighbour __rcu **np = > + (struct neighbour __rcu **)&nht->hash_buckets[i].first; This will be no longer needed for doubly linked list, > > while ((n = rcu_dereference_protected(*np, > lockdep_is_held(&tbl->lock))) != NULL) { and this while can be converted to the for-loop macro. > if (dev && n->dev != dev) { > - np = &n->next; > + np = (struct neighbour __rcu **)&n->list.next; > continue; > } > if (skip_perm && n->nud_state & NUD_PERMANENT) { > - np = &n->next; > + np = (struct neighbour __rcu **)&n->list.next; > continue; > } > - rcu_assign_pointer(*np, > - rcu_dereference_protected(n->next, > - lockdep_is_held(&tbl->lock))); > + hlist_del_rcu(&n->list); > write_lock(&n->lock); > neigh_del_timer(n); > neigh_mark_dead(n); > @@ -530,9 +507,9 @@ static void neigh_get_hash_rnd(u32 *x) > > static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) > { > - size_t size = (1 << shift) * sizeof(struct neighbour *); > + size_t size = (1 << shift) * sizeof(struct hlist_head); > struct neigh_hash_table *ret; > - struct neighbour __rcu **buckets; > + struct hlist_head *buckets; > int i; > > ret = kmalloc(sizeof(*ret), GFP_ATOMIC); > @@ -541,7 +518,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) > if (size <= PAGE_SIZE) { > buckets = kzalloc(size, GFP_ATOMIC); > } else { > - buckets = (struct neighbour __rcu **) > + buckets = (struct hlist_head *) > __get_free_pages(GFP_ATOMIC | __GFP_ZERO, > get_order(size)); > kmemleak_alloc(buckets, size, 1, GFP_ATOMIC); > @@ -562,8 +539,8 @@ static void neigh_hash_free_rcu(struct rcu_head *head) > struct neigh_hash_table *nht = container_of(head, > struct neigh_hash_table, > rcu); > - size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); > - struct neighbour __rcu **buckets = nht->hash_buckets; > + size_t size = (1 << nht->hash_shift) * sizeof(struct hlist_head); > + struct hlist_head *buckets = nht->hash_buckets; > > if (size <= PAGE_SIZE) { > kfree(buckets); > @@ -591,22 +568,18 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, > for (i = 0; i < (1 << old_nht->hash_shift); i++) { > struct neighbour *n, *next; > > - for (n = rcu_dereference_protected(old_nht->hash_buckets[i], > - lockdep_is_held(&tbl->lock)); > + for (n = (struct neighbour *) > + rcu_dereference_protected(hlist_first_rcu(&old_nht->hash_buckets[i]), > + lockdep_is_held(&tbl->lock)); This can be macro. > n != NULL; > n = next) { > hash = tbl->hash(n->primary_key, n->dev, > new_nht->hash_rnd); > > hash >>= (32 - new_nht->hash_shift); > - next = rcu_dereference_protected(n->next, > - lockdep_is_held(&tbl->lock)); > - > - rcu_assign_pointer(n->next, > - rcu_dereference_protected( > - new_nht->hash_buckets[hash], > - lockdep_is_held(&tbl->lock))); > - rcu_assign_pointer(new_nht->hash_buckets[hash], n); > + next = (struct neighbour *)rcu_dereference(hlist_next_rcu(&n->list)); This can be static inline function and reused for for-loop macro. > + hlist_del_rcu(&n->list); > + hlist_add_head_rcu(&n->list, &new_nht->hash_buckets[hash]); > } > } > > @@ -693,11 +666,10 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, > goto out_tbl_unlock; > } > > - for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val], > - lockdep_is_held(&tbl->lock)); > - n1 != NULL; > - n1 = rcu_dereference_protected(n1->next, > - lockdep_is_held(&tbl->lock))) { > + hlist_for_each_entry_rcu(n1, > + &nht->hash_buckets[hash_val], > + list, > + lockdep_is_held(&tbl->lock)) { Let's define hlist_for_each_entry_rcu() as neigh-specific macro. > if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { > if (want_ref) > neigh_hold(n1); > @@ -713,10 +685,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, > list_add_tail(&n->managed_list, &n->tbl->managed_list); > if (want_ref) > neigh_hold(n); > - rcu_assign_pointer(n->next, > - rcu_dereference_protected(nht->hash_buckets[hash_val], > - lockdep_is_held(&tbl->lock))); > - rcu_assign_pointer(nht->hash_buckets[hash_val], n); > + hlist_add_head_rcu(&n->list, &nht->hash_buckets[hash_val]); > write_unlock_bh(&tbl->lock); > neigh_dbg(2, "neigh %p is created\n", n); > rc = n; > @@ -976,7 +945,7 @@ static void neigh_periodic_work(struct work_struct *work) > goto out; > > for (i = 0 ; i < (1 << nht->hash_shift); i++) { > - np = &nht->hash_buckets[i]; > + np = (struct neighbour __rcu **)&nht->hash_buckets[i].first; No np here too, > > while ((n = rcu_dereference_protected(*np, > lockdep_is_held(&tbl->lock))) != NULL) { and for-loop macro here. > @@ -999,9 +968,7 @@ static void neigh_periodic_work(struct work_struct *work) > (state == NUD_FAILED || > !time_in_range_open(jiffies, n->used, > n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { > - rcu_assign_pointer(*np, > - rcu_dereference_protected(n->next, > - lockdep_is_held(&tbl->lock))); > + hlist_del_rcu(&n->list); > neigh_mark_dead(n); > write_unlock(&n->lock); > neigh_cleanup_and_release(n); > @@ -1010,7 +977,7 @@ static void neigh_periodic_work(struct work_struct *work) > write_unlock(&n->lock); > > next_elt: > - np = &n->next; > + np = (struct neighbour __rcu **)&n->list.next; > } > /* > * It's fine to release lock here, even if hash table > @@ -2728,9 +2695,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, > for (h = s_h; h < (1 << nht->hash_shift); h++) { > if (h > s_h) > s_idx = 0; > - for (n = rcu_dereference(nht->hash_buckets[h]), idx = 0; > - n != NULL; > - n = rcu_dereference(n->next)) { > + hlist_for_each_entry_rcu(n, &nht->hash_buckets[h], list) { > if (idx < s_idx || !net_eq(dev_net(n->dev), net)) > goto next; > if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || > @@ -3097,9 +3062,7 @@ void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void > for (chain = 0; chain < (1 << nht->hash_shift); chain++) { > struct neighbour *n; > > - for (n = rcu_dereference(nht->hash_buckets[chain]); > - n != NULL; > - n = rcu_dereference(n->next)) > + hlist_for_each_entry_rcu(n, &nht->hash_buckets[chain], list) > cb(n, cookie); > } > read_unlock_bh(&tbl->lock); > @@ -3120,7 +3083,7 @@ void __neigh_for_each_release(struct neigh_table *tbl, > struct neighbour *n; > struct neighbour __rcu **np; > > - np = &nht->hash_buckets[chain]; > + np = (struct neighbour __rcu **)&nht->hash_buckets[chain].first; > while ((n = rcu_dereference_protected(*np, > lockdep_is_held(&tbl->lock))) != NULL) { Same here. > int release; > @@ -3128,12 +3091,10 @@ void __neigh_for_each_release(struct neigh_table *tbl, > write_lock(&n->lock); > release = cb(n); > if (release) { > - rcu_assign_pointer(*np, > - rcu_dereference_protected(n->next, > - lockdep_is_held(&tbl->lock))); > + hlist_del_rcu(&n->list); > neigh_mark_dead(n); > } else > - np = &n->next; > + np = (struct neighbour __rcu **)&n->list.next; > write_unlock(&n->lock); > if (release) > neigh_cleanup_and_release(n); > @@ -3200,25 +3161,21 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) > > state->flags &= ~NEIGH_SEQ_IS_PNEIGH; > for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { > - n = rcu_dereference(nht->hash_buckets[bucket]); > - > - while (n) { > + hlist_for_each_entry_rcu(n, &nht->hash_buckets[bucket], list) { > if (!net_eq(dev_net(n->dev), net)) > - goto next; > + continue; > if (state->neigh_sub_iter) { > loff_t fakep = 0; > void *v; > > v = state->neigh_sub_iter(state, n, &fakep); > if (!v) > - goto next; > + continue; > } > if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) > break; > if (READ_ONCE(n->nud_state) & ~NUD_NOARP) > break; > -next: > - n = rcu_dereference(n->next); > } > > if (n) > @@ -3242,7 +3199,8 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, > if (v) > return n; > } > - n = rcu_dereference(n->next); > + > + n = (struct neighbour *)rcu_dereference(hlist_next_rcu(&n->list)); inline helper should be used, > > while (1) { > while (n) { > @@ -3260,7 +3218,8 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, > if (READ_ONCE(n->nud_state) & ~NUD_NOARP) > break; > next: > - n = rcu_dereference(n->next); > + > + n = (struct neighbour *)rcu_dereference(hlist_next_rcu(&n->list)); same here, > } > > if (n) > @@ -3269,7 +3228,8 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, > if (++state->bucket >= (1 << nht->hash_shift)) > break; > > - n = rcu_dereference(nht->hash_buckets[state->bucket]); > + n = (struct neighbour *) > + rcu_dereference(hlist_first_rcu(&nht->hash_buckets[state->bucket])); and here. > } > > if (n && pos) > -- > 2.46.0
From: Gilad Naaman <gnaaman@drivenets.com> Date: Sun, 6 Oct 2024 06:47:42 +0000 > @@ -2728,9 +2695,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, > for (h = s_h; h < (1 << nht->hash_shift); h++) { > if (h > s_h) > s_idx = 0; > - for (n = rcu_dereference(nht->hash_buckets[h]), idx = 0; > - n != NULL; > - n = rcu_dereference(n->next)) { > + hlist_for_each_entry_rcu(n, &nht->hash_buckets[h], list) { idx = 0 was accidentally removed. > if (idx < s_idx || !net_eq(dev_net(n->dev), net)) > goto next; > if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
Thank you for reviewing this > > Use doubly-linked instead of singly-linked list when linking neighbours, > > so that it is possible to remove neighbours without traversing the > > entire table. > > > > Signed-off-by: Gilad Naaman <gnaaman@drivenets.com> > > --- > > include/net/neighbour.h | 8 +-- > > net/core/neighbour.c | 124 ++++++++++++++-------------------------- > > 2 files changed, 46 insertions(+), 86 deletions(-) > > > > diff --git a/include/net/neighbour.h b/include/net/neighbour.h > > index a44f262a7384..5dde118323e3 100644 > > --- a/include/net/neighbour.h > > +++ b/include/net/neighbour.h > > @@ -135,7 +135,7 @@ struct neigh_statistics { > > #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field) > > > > struct neighbour { > > - struct neighbour __rcu *next; > > + struct hlist_node list; > > struct neigh_table *tbl; > > struct neigh_parms *parms; > > unsigned long confirmed; > > @@ -190,7 +190,7 @@ struct pneigh_entry { > > #define NEIGH_NUM_HASH_RND 4 > > > > struct neigh_hash_table { > > - struct neighbour __rcu **hash_buckets; > > + struct hlist_head *hash_buckets; > > unsigned int hash_shift; > > __u32 hash_rnd[NEIGH_NUM_HASH_RND]; > > struct rcu_head rcu; > > @@ -304,9 +304,9 @@ static inline struct neighbour *___neigh_lookup_noref( > > u32 hash_val; > > > > hash_val = hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); > > - for (n = rcu_dereference(nht->hash_buckets[hash_val]); > > + for (n = (struct neighbour *)rcu_dereference(hlist_first_rcu(&nht->hash_buckets[hash_val])); > > This for loop and hlist_first_rcu(&nht->hash_buckets[hash_val]) > can also be written with a macro and an inline function. Good point, I'll convert all of these to use `neigh_{first,next}_rcu{,protected}`. > > > n != NULL; > > - n = rcu_dereference(n->next)) { > > + n = (struct neighbour *)rcu_dereference(hlist_next_rcu(&n->list))) { > > This part is also reused multiple times so should be an inline function. > > I have similar patches for struct in_ifaddr.ifa_next (not upstreamed yet), > and this will be a good example for you. > https://github.com/q2ven/linux/commit/a51fdf7ccc14bf6edba58bacf7faaeebe811d41b > > > > if (n->dev == dev && key_eq(n, pkey)) > > return n; > > } > > diff --git a/net/core/neighbour.c b/net/core/neighbour.c > > index 77b819cd995b..86b174baae27 100644 > > --- a/net/core/neighbour.c > > +++ b/net/core/neighbour.c > > @@ -37,6 +37,7 @@ > > #include <linux/string.h> > > #include <linux/log2.h> > > #include <linux/inetdevice.h> > > +#include <linux/rculist.h> > > #include <net/addrconf.h> > > > > #include <trace/events/neigh.h> > > @@ -205,18 +206,13 @@ static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify, > > } > > } > > > > -static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, > > - struct neigh_table *tbl) > > +static bool neigh_del(struct neighbour *n, struct neigh_table *tbl) > > { > > bool retval = false; > > > > write_lock(&n->lock); > > if (refcount_read(&n->refcnt) == 1) { > > - struct neighbour *neigh; > > - > > - neigh = rcu_dereference_protected(n->next, > > - lockdep_is_held(&tbl->lock)); > > - rcu_assign_pointer(*np, neigh); > > + hlist_del_rcu(&n->list); > > neigh_mark_dead(n); > > retval = true; > > } > > @@ -228,25 +224,7 @@ static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, > > > > bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl) > > { > > - struct neigh_hash_table *nht; > > - void *pkey = ndel->primary_key; > > - u32 hash_val; > > - struct neighbour *n; > > - struct neighbour __rcu **np; > > - > > - nht = rcu_dereference_protected(tbl->nht, > > - lockdep_is_held(&tbl->lock)); > > - hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd); > > - hash_val = hash_val >> (32 - nht->hash_shift); > > - > > - np = &nht->hash_buckets[hash_val]; > > - while ((n = rcu_dereference_protected(*np, > > - lockdep_is_held(&tbl->lock)))) { > > - if (n == ndel) > > - return neigh_del(n, np, tbl); > > - np = &n->next; > > - } > > - return false; > > + return neigh_del(ndel, tbl); > > } > > > > static int neigh_forced_gc(struct neigh_table *tbl) > > @@ -388,21 +366,20 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, > > > > for (i = 0; i < (1 << nht->hash_shift); i++) { > > struct neighbour *n; > > - struct neighbour __rcu **np = &nht->hash_buckets[i]; > > + struct neighbour __rcu **np = > > + (struct neighbour __rcu **)&nht->hash_buckets[i].first; > > This will be no longer needed for doubly linked list, This is not as-necessary with a doubly-linked list, but unfortunately I cannot eliminate it completely, as the `n` might be released in the loop body. I can convert this function to use a `struct neighour *next` instead, if it is more palatable. > > > > > while ((n = rcu_dereference_protected(*np, > > lockdep_is_held(&tbl->lock))) != NULL) { > > and this while can be converted to the for-loop macro. As far as I understand, this cannot be converted into the for-loop macro, as the cursor can be released during the loop-body, resulting in use-after-free when trying to increment it. > > > if (dev && n->dev != dev) { > > - np = &n->next; > > + np = (struct neighbour __rcu **)&n->list.next; > > continue; > > } > > if (skip_perm && n->nud_state & NUD_PERMANENT) { > > - np = &n->next; > > + np = (struct neighbour __rcu **)&n->list.next; > > continue; > > } > > - rcu_assign_pointer(*np, > > - rcu_dereference_protected(n->next, > > - lockdep_is_held(&tbl->lock))); > > + hlist_del_rcu(&n->list); > > write_lock(&n->lock); > > neigh_del_timer(n); > > neigh_mark_dead(n); == SNIP == > > > + hlist_del_rcu(&n->list); > > + hlist_add_head_rcu(&n->list, &new_nht->hash_buckets[hash]); > > } > > } > > > > @@ -693,11 +666,10 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, > > goto out_tbl_unlock; > > } > > > > - for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val], > > - lockdep_is_held(&tbl->lock)); > > - n1 != NULL; > > - n1 = rcu_dereference_protected(n1->next, > > - lockdep_is_held(&tbl->lock))) { > > + hlist_for_each_entry_rcu(n1, > > + &nht->hash_buckets[hash_val], > > + list, > > + lockdep_is_held(&tbl->lock)) { > > Let's define hlist_for_each_entry_rcu() as neigh-specific macro. Can you elaborate on this? Do you want the `list` parameter to be eliminated? > > > if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { > > if (want_ref) > > neigh_hold(n1); > > @@ -713,10 +685,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, > > list_add_tail(&n->managed_list, &n->tbl->managed_list); > > if (want_ref) > > neigh_hold(n); > > - rcu_assign_pointer(n->next, > > - rcu_dereference_protected(nht->hash_buckets[hash_val], > > - lockdep_is_held(&tbl->lock))); > > - rcu_assign_pointer(nht->hash_buckets[hash_val], n); > > + hlist_add_head_rcu(&n->list, &nht->hash_buckets[hash_val]); > > write_unlock_bh(&tbl->lock); > > neigh_dbg(2, "neigh %p is created\n", n); > > rc = n; > > @@ -976,7 +945,7 @@ static void neigh_periodic_work(struct work_struct *work) > > goto out; > > > > for (i = 0 ; i < (1 << nht->hash_shift); i++) { > > - np = &nht->hash_buckets[i]; > > + np = (struct neighbour __rcu **)&nht->hash_buckets[i].first; > > No np here too, Same as the other loop in `neigh_flush_dev`, we must keep `np` in order to avoid UAF, because `n` might be freed in the loop body.
From: Gilad Naaman <gnaaman@drivenets.com> Date: Tue, 8 Oct 2024 07:38:55 +0000 > > > @@ -388,21 +366,20 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, > > > > > > for (i = 0; i < (1 << nht->hash_shift); i++) { > > > struct neighbour *n; > > > - struct neighbour __rcu **np = &nht->hash_buckets[i]; > > > + struct neighbour __rcu **np = > > > + (struct neighbour __rcu **)&nht->hash_buckets[i].first; > > > > This will be no longer needed for doubly linked list, > > This is not as-necessary with a doubly-linked list, but unfortunately > I cannot eliminate it completely, as the `n` might be released in the loop > body. > > I can convert this function to use a `struct neighour *next` instead, > if it is more palatable. Yes, using hlist_for_each_entry_safe() is more preferable. Mixing for() and while() is harder to read. [...] > > > @@ -693,11 +666,10 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, > > > goto out_tbl_unlock; > > > } > > > > > > - for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val], > > > - lockdep_is_held(&tbl->lock)); > > > - n1 != NULL; > > > - n1 = rcu_dereference_protected(n1->next, > > > - lockdep_is_held(&tbl->lock))) { > > > + hlist_for_each_entry_rcu(n1, > > > + &nht->hash_buckets[hash_val], > > > + list, > > > + lockdep_is_held(&tbl->lock)) { > > > > Let's define hlist_for_each_entry_rcu() as neigh-specific macro. > > Can you elaborate on this? > Do you want the `list` parameter to be eliminated? I mean like #define neigh_for_each(...) \ hlist_for_each_entry(...) #define neigh_for_each_rcu(...) \ hlist_for_each_entry_rcu(...) are better if there's repeated arguments.
Hello, kernel test robot noticed "BUG:unable_to_handle_page_fault_for_address" on: commit: febd2a80c498e3be8afdfe826ca49966bd9aca9a ("[PATCH net-next v2 1/2] Convert neighbour-table to use hlist") url: https://github.com/intel-lab-lkp/linux/commits/Gilad-Naaman/Convert-neighbour-table-to-use-hlist/20241006-145017 base: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git 8f602276d3902642fdc3429b548d73c745446601 patch link: https://lore.kernel.org/all/20241006064747.201773-2-gnaaman@drivenets.com/ patch subject: [PATCH net-next v2 1/2] Convert neighbour-table to use hlist in testcase: trinity version: trinity-i386-abe9de86-1_20230429 with following parameters: runtime: 300s group: group-02 nr_groups: 5 compiler: gcc-12 test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G (please refer to attached dmesg/kmsg for entire log/backtrace) +--------------------------------------------------------------+------------+------------+ | | 8f602276d3 | febd2a80c4 | +--------------------------------------------------------------+------------+------------+ | BUG:unable_to_handle_page_fault_for_address | 0 | 100 | | Oops | 0 | 100 | | RIP:___neigh_lookup_noref | 0 | 100 | +--------------------------------------------------------------+------------+------------+ If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <oliver.sang@intel.com> | Closes: https://lore.kernel.org/oe-lkp/202410091622.3d66a78c-lkp@intel.com [ 42.953184][ T23] BUG: unable to handle page fault for address: ffffffffdeaf2215 [ 42.954660][ T23] #PF: supervisor read access in kernel mode [ 42.955397][ T23] #PF: error_code(0x0000) - not-present page [ 42.956085][ T23] PGD 3441067 P4D 3441067 PUD 3443067 PMD 0 [ 42.956877][ T23] Oops: Oops: 0000 [#1] PREEMPT SMP [ 42.957461][ T23] CPU: 1 UID: 0 PID: 23 Comm: kworker/1:0 Tainted: G T 6.12.0-rc1-00350-gfebd2a80c498 #1 b68071eb3c38aea083990c8ab76066a73681badf [ 42.959169][ T23] Tainted: [T]=RANDSTRUCT [ 42.959657][ T23] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 [ 42.960816][ T23] Workqueue: mld mld_ifc_work [ 42.968090][ T23] RIP: 0010:___neigh_lookup_noref (include/net/neighbour.h:310) [ 42.969009][ T23] Code: 00 00 48 c7 c2 de e1 0a 83 48 c7 c7 fc 39 1f 83 e8 b0 ea 08 ff 48 85 db 0f 84 85 00 00 00 48 8d bb 28 03 00 00 e8 31 18 1a ff <4c> 3b ab 28 03 00 00 75 0f 4c 89 e6 48 89 df e8 59 f8 ff ff 84 c0 All code ======== 0: 00 00 add %al,(%rax) 2: 48 c7 c2 de e1 0a 83 mov $0xffffffff830ae1de,%rdx 9: 48 c7 c7 fc 39 1f 83 mov $0xffffffff831f39fc,%rdi 10: e8 b0 ea 08 ff call 0xffffffffff08eac5 15: 48 85 db test %rbx,%rbx 18: 0f 84 85 00 00 00 je 0xa3 1e: 48 8d bb 28 03 00 00 lea 0x328(%rbx),%rdi 25: e8 31 18 1a ff call 0xffffffffff1a185b 2a:* 4c 3b ab 28 03 00 00 cmp 0x328(%rbx),%r13 <-- trapping instruction 31: 75 0f jne 0x42 33: 4c 89 e6 mov %r12,%rsi 36: 48 89 df mov %rbx,%rdi 39: e8 59 f8 ff ff call 0xfffffffffffff897 3e: 84 c0 test %al,%al Code starting with the faulting instruction =========================================== 0: 4c 3b ab 28 03 00 00 cmp 0x328(%rbx),%r13 7: 75 0f jne 0x18 9: 4c 89 e6 mov %r12,%rsi c: 48 89 df mov %rbx,%rdi f: e8 59 f8 ff ff call 0xfffffffffffff86d 14: 84 c0 test %al,%al [ 42.971354][ T23] RSP: 0018:ffffc900000cbb88 EFLAGS: 00210246 [ 42.972125][ T23] RAX: 0000000000000000 RBX: ffffffffdeaf1eed RCX: 0000000000000000 [ 42.973071][ T23] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 42.974124][ T23] RBP: ffff88811af34b40 R08: 0000000000000000 R09: 0000000000000000 [ 42.975040][ T23] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88811c6fa028 [ 42.975974][ T23] R13: ffff88811bc7a000 R14: 000000000000001d R15: 0000607bd0004020 [ 42.976897][ T23] FS: 0000000000000000(0000) GS:ffff88842fc00000(0000) knlGS:0000000000000000 [ 42.977854][ T23] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 42.978590][ T23] CR2: ffffffffdeaf2215 CR3: 000000000343f000 CR4: 00000000000406b0 [ 42.979442][ T23] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 42.980369][ T23] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 42.981266][ T23] Call Trace: [ 42.981636][ T23] <TASK> [ 42.981988][ T23] ? __die_body (arch/x86/kernel/dumpstack.c:421) [ 42.982475][ T23] ? page_fault_oops (arch/x86/mm/fault.c:715) [ 42.983091][ T23] ? exc_page_fault (arch/x86/mm/fault.c:1479 arch/x86/mm/fault.c:1539) [ 42.983677][ T23] ? asm_exc_page_fault (arch/x86/include/asm/idtentry.h:623) [ 42.984326][ T23] ? ___neigh_lookup_noref (include/net/neighbour.h:310) [ 42.985102][ T23] ? ___neigh_lookup_noref (include/net/neighbour.h:310) [ 42.985789][ T23] ip6_finish_output2 (include/net/ndisc.h:368 net/ipv6/ip6_output.c:128) [ 42.986377][ T23] ip6_output (net/ipv6/ip6_output.c:251) [ 42.986913][ T23] dst_output+0x59/0x64 [ 42.987506][ T23] mld_sendpack (net/ipv6/mcast.c:1823) [ 42.988064][ T23] mld_send_cr (net/ipv6/mcast.c:2121) [ 42.988539][ T23] mld_ifc_work (net/ipv6/mcast.c:2653) [ 42.989034][ T23] ? process_one_work (kernel/workqueue.c:3205) [ 42.989680][ T23] process_one_work (kernel/workqueue.c:3234) [ 42.990204][ T23] ? process_one_work (kernel/workqueue.c:3205) [ 42.990790][ T23] process_scheduled_works (kernel/workqueue.c:3310) [ 42.991361][ T23] worker_thread (include/linux/list.h:373 kernel/workqueue.c:946 kernel/workqueue.c:3392) [ 42.991863][ T23] ? drain_dead_softirq_workfn (kernel/workqueue.c:3337) [ 42.992581][ T23] kthread (kernel/kthread.c:391) [ 42.993075][ T23] ? kthread (kernel/kthread.c:374) [ 42.993578][ T23] ? list_del_init (include/linux/lockdep.h:249) [ 42.994107][ T23] ret_from_fork (arch/x86/kernel/process.c:153) [ 42.994606][ T23] ? list_del_init (include/linux/lockdep.h:249) [ 42.995189][ T23] ret_from_fork_asm (arch/x86/entry/entry_64.S:254) [ 42.995794][ T23] </TASK> [ 42.996192][ T23] Modules linked in: [ 42.996717][ T23] CR2: ffffffffdeaf2215 [ 42.997192][ T23] ---[ end trace 0000000000000000 ]--- [ 42.997790][ T23] RIP: 0010:___neigh_lookup_noref (include/net/neighbour.h:310) [ 42.998628][ T23] Code: 00 00 48 c7 c2 de e1 0a 83 48 c7 c7 fc 39 1f 83 e8 b0 ea 08 ff 48 85 db 0f 84 85 00 00 00 48 8d bb 28 03 00 00 e8 31 18 1a ff <4c> 3b ab 28 03 00 00 75 0f 4c 89 e6 48 89 df e8 59 f8 ff ff 84 c0 All code ======== 0: 00 00 add %al,(%rax) 2: 48 c7 c2 de e1 0a 83 mov $0xffffffff830ae1de,%rdx 9: 48 c7 c7 fc 39 1f 83 mov $0xffffffff831f39fc,%rdi 10: e8 b0 ea 08 ff call 0xffffffffff08eac5 15: 48 85 db test %rbx,%rbx 18: 0f 84 85 00 00 00 je 0xa3 1e: 48 8d bb 28 03 00 00 lea 0x328(%rbx),%rdi 25: e8 31 18 1a ff call 0xffffffffff1a185b 2a:* 4c 3b ab 28 03 00 00 cmp 0x328(%rbx),%r13 <-- trapping instruction 31: 75 0f jne 0x42 33: 4c 89 e6 mov %r12,%rsi 36: 48 89 df mov %rbx,%rdi 39: e8 59 f8 ff ff call 0xfffffffffffff897 3e: 84 c0 test %al,%al Code starting with the faulting instruction =========================================== 0: 4c 3b ab 28 03 00 00 cmp 0x328(%rbx),%r13 7: 75 0f jne 0x18 9: 4c 89 e6 mov %r12,%rsi c: 48 89 df mov %rbx,%rdi f: e8 59 f8 ff ff call 0xfffffffffffff86d 14: 84 c0 test %al,%al The kernel config and materials to reproduce are available at: https://download.01.org/0day-ci/archive/20241009/202410091622.3d66a78c-lkp@intel.com
diff --git a/include/net/neighbour.h b/include/net/neighbour.h index a44f262a7384..5dde118323e3 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -135,7 +135,7 @@ struct neigh_statistics { #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field) struct neighbour { - struct neighbour __rcu *next; + struct hlist_node list; struct neigh_table *tbl; struct neigh_parms *parms; unsigned long confirmed; @@ -190,7 +190,7 @@ struct pneigh_entry { #define NEIGH_NUM_HASH_RND 4 struct neigh_hash_table { - struct neighbour __rcu **hash_buckets; + struct hlist_head *hash_buckets; unsigned int hash_shift; __u32 hash_rnd[NEIGH_NUM_HASH_RND]; struct rcu_head rcu; @@ -304,9 +304,9 @@ static inline struct neighbour *___neigh_lookup_noref( u32 hash_val; hash_val = hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); - for (n = rcu_dereference(nht->hash_buckets[hash_val]); + for (n = (struct neighbour *)rcu_dereference(hlist_first_rcu(&nht->hash_buckets[hash_val])); n != NULL; - n = rcu_dereference(n->next)) { + n = (struct neighbour *)rcu_dereference(hlist_next_rcu(&n->list))) { if (n->dev == dev && key_eq(n, pkey)) return n; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 77b819cd995b..86b174baae27 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -37,6 +37,7 @@ #include <linux/string.h> #include <linux/log2.h> #include <linux/inetdevice.h> +#include <linux/rculist.h> #include <net/addrconf.h> #include <trace/events/neigh.h> @@ -205,18 +206,13 @@ static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify, } } -static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, - struct neigh_table *tbl) +static bool neigh_del(struct neighbour *n, struct neigh_table *tbl) { bool retval = false; write_lock(&n->lock); if (refcount_read(&n->refcnt) == 1) { - struct neighbour *neigh; - - neigh = rcu_dereference_protected(n->next, - lockdep_is_held(&tbl->lock)); - rcu_assign_pointer(*np, neigh); + hlist_del_rcu(&n->list); neigh_mark_dead(n); retval = true; } @@ -228,25 +224,7 @@ static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl) { - struct neigh_hash_table *nht; - void *pkey = ndel->primary_key; - u32 hash_val; - struct neighbour *n; - struct neighbour __rcu **np; - - nht = rcu_dereference_protected(tbl->nht, - lockdep_is_held(&tbl->lock)); - hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd); - hash_val = hash_val >> (32 - nht->hash_shift); - - np = &nht->hash_buckets[hash_val]; - while ((n = rcu_dereference_protected(*np, - lockdep_is_held(&tbl->lock)))) { - if (n == ndel) - return neigh_del(n, np, tbl); - np = &n->next; - } - return false; + return neigh_del(ndel, tbl); } static int neigh_forced_gc(struct neigh_table *tbl) @@ -388,21 +366,20 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, for (i = 0; i < (1 << nht->hash_shift); i++) { struct neighbour *n; - struct neighbour __rcu **np = &nht->hash_buckets[i]; + struct neighbour __rcu **np = + (struct neighbour __rcu **)&nht->hash_buckets[i].first; while ((n = rcu_dereference_protected(*np, lockdep_is_held(&tbl->lock))) != NULL) { if (dev && n->dev != dev) { - np = &n->next; + np = (struct neighbour __rcu **)&n->list.next; continue; } if (skip_perm && n->nud_state & NUD_PERMANENT) { - np = &n->next; + np = (struct neighbour __rcu **)&n->list.next; continue; } - rcu_assign_pointer(*np, - rcu_dereference_protected(n->next, - lockdep_is_held(&tbl->lock))); + hlist_del_rcu(&n->list); write_lock(&n->lock); neigh_del_timer(n); neigh_mark_dead(n); @@ -530,9 +507,9 @@ static void neigh_get_hash_rnd(u32 *x) static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) { - size_t size = (1 << shift) * sizeof(struct neighbour *); + size_t size = (1 << shift) * sizeof(struct hlist_head); struct neigh_hash_table *ret; - struct neighbour __rcu **buckets; + struct hlist_head *buckets; int i; ret = kmalloc(sizeof(*ret), GFP_ATOMIC); @@ -541,7 +518,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) if (size <= PAGE_SIZE) { buckets = kzalloc(size, GFP_ATOMIC); } else { - buckets = (struct neighbour __rcu **) + buckets = (struct hlist_head *) __get_free_pages(GFP_ATOMIC | __GFP_ZERO, get_order(size)); kmemleak_alloc(buckets, size, 1, GFP_ATOMIC); @@ -562,8 +539,8 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct neigh_hash_table *nht = container_of(head, struct neigh_hash_table, rcu); - size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); - struct neighbour __rcu **buckets = nht->hash_buckets; + size_t size = (1 << nht->hash_shift) * sizeof(struct hlist_head); + struct hlist_head *buckets = nht->hash_buckets; if (size <= PAGE_SIZE) { kfree(buckets); @@ -591,22 +568,18 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, for (i = 0; i < (1 << old_nht->hash_shift); i++) { struct neighbour *n, *next; - for (n = rcu_dereference_protected(old_nht->hash_buckets[i], - lockdep_is_held(&tbl->lock)); + for (n = (struct neighbour *) + rcu_dereference_protected(hlist_first_rcu(&old_nht->hash_buckets[i]), + lockdep_is_held(&tbl->lock)); n != NULL; n = next) { hash = tbl->hash(n->primary_key, n->dev, new_nht->hash_rnd); hash >>= (32 - new_nht->hash_shift); - next = rcu_dereference_protected(n->next, - lockdep_is_held(&tbl->lock)); - - rcu_assign_pointer(n->next, - rcu_dereference_protected( - new_nht->hash_buckets[hash], - lockdep_is_held(&tbl->lock))); - rcu_assign_pointer(new_nht->hash_buckets[hash], n); + next = (struct neighbour *)rcu_dereference(hlist_next_rcu(&n->list)); + hlist_del_rcu(&n->list); + hlist_add_head_rcu(&n->list, &new_nht->hash_buckets[hash]); } } @@ -693,11 +666,10 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, goto out_tbl_unlock; } - for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val], - lockdep_is_held(&tbl->lock)); - n1 != NULL; - n1 = rcu_dereference_protected(n1->next, - lockdep_is_held(&tbl->lock))) { + hlist_for_each_entry_rcu(n1, + &nht->hash_buckets[hash_val], + list, + lockdep_is_held(&tbl->lock)) { if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { if (want_ref) neigh_hold(n1); @@ -713,10 +685,7 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey, list_add_tail(&n->managed_list, &n->tbl->managed_list); if (want_ref) neigh_hold(n); - rcu_assign_pointer(n->next, - rcu_dereference_protected(nht->hash_buckets[hash_val], - lockdep_is_held(&tbl->lock))); - rcu_assign_pointer(nht->hash_buckets[hash_val], n); + hlist_add_head_rcu(&n->list, &nht->hash_buckets[hash_val]); write_unlock_bh(&tbl->lock); neigh_dbg(2, "neigh %p is created\n", n); rc = n; @@ -976,7 +945,7 @@ static void neigh_periodic_work(struct work_struct *work) goto out; for (i = 0 ; i < (1 << nht->hash_shift); i++) { - np = &nht->hash_buckets[i]; + np = (struct neighbour __rcu **)&nht->hash_buckets[i].first; while ((n = rcu_dereference_protected(*np, lockdep_is_held(&tbl->lock))) != NULL) { @@ -999,9 +968,7 @@ static void neigh_periodic_work(struct work_struct *work) (state == NUD_FAILED || !time_in_range_open(jiffies, n->used, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { - rcu_assign_pointer(*np, - rcu_dereference_protected(n->next, - lockdep_is_held(&tbl->lock))); + hlist_del_rcu(&n->list); neigh_mark_dead(n); write_unlock(&n->lock); neigh_cleanup_and_release(n); @@ -1010,7 +977,7 @@ static void neigh_periodic_work(struct work_struct *work) write_unlock(&n->lock); next_elt: - np = &n->next; + np = (struct neighbour __rcu **)&n->list.next; } /* * It's fine to release lock here, even if hash table @@ -2728,9 +2695,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, for (h = s_h; h < (1 << nht->hash_shift); h++) { if (h > s_h) s_idx = 0; - for (n = rcu_dereference(nht->hash_buckets[h]), idx = 0; - n != NULL; - n = rcu_dereference(n->next)) { + hlist_for_each_entry_rcu(n, &nht->hash_buckets[h], list) { if (idx < s_idx || !net_eq(dev_net(n->dev), net)) goto next; if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || @@ -3097,9 +3062,7 @@ void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void for (chain = 0; chain < (1 << nht->hash_shift); chain++) { struct neighbour *n; - for (n = rcu_dereference(nht->hash_buckets[chain]); - n != NULL; - n = rcu_dereference(n->next)) + hlist_for_each_entry_rcu(n, &nht->hash_buckets[chain], list) cb(n, cookie); } read_unlock_bh(&tbl->lock); @@ -3120,7 +3083,7 @@ void __neigh_for_each_release(struct neigh_table *tbl, struct neighbour *n; struct neighbour __rcu **np; - np = &nht->hash_buckets[chain]; + np = (struct neighbour __rcu **)&nht->hash_buckets[chain].first; while ((n = rcu_dereference_protected(*np, lockdep_is_held(&tbl->lock))) != NULL) { int release; @@ -3128,12 +3091,10 @@ void __neigh_for_each_release(struct neigh_table *tbl, write_lock(&n->lock); release = cb(n); if (release) { - rcu_assign_pointer(*np, - rcu_dereference_protected(n->next, - lockdep_is_held(&tbl->lock))); + hlist_del_rcu(&n->list); neigh_mark_dead(n); } else - np = &n->next; + np = (struct neighbour __rcu **)&n->list.next; write_unlock(&n->lock); if (release) neigh_cleanup_and_release(n); @@ -3200,25 +3161,21 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) state->flags &= ~NEIGH_SEQ_IS_PNEIGH; for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { - n = rcu_dereference(nht->hash_buckets[bucket]); - - while (n) { + hlist_for_each_entry_rcu(n, &nht->hash_buckets[bucket], list) { if (!net_eq(dev_net(n->dev), net)) - goto next; + continue; if (state->neigh_sub_iter) { loff_t fakep = 0; void *v; v = state->neigh_sub_iter(state, n, &fakep); if (!v) - goto next; + continue; } if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) break; if (READ_ONCE(n->nud_state) & ~NUD_NOARP) break; -next: - n = rcu_dereference(n->next); } if (n) @@ -3242,7 +3199,8 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, if (v) return n; } - n = rcu_dereference(n->next); + + n = (struct neighbour *)rcu_dereference(hlist_next_rcu(&n->list)); while (1) { while (n) { @@ -3260,7 +3218,8 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, if (READ_ONCE(n->nud_state) & ~NUD_NOARP) break; next: - n = rcu_dereference(n->next); + + n = (struct neighbour *)rcu_dereference(hlist_next_rcu(&n->list)); } if (n) @@ -3269,7 +3228,8 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, if (++state->bucket >= (1 << nht->hash_shift)) break; - n = rcu_dereference(nht->hash_buckets[state->bucket]); + n = (struct neighbour *) + rcu_dereference(hlist_first_rcu(&nht->hash_buckets[state->bucket])); } if (n && pos)
Use doubly-linked instead of singly-linked list when linking neighbours, so that it is possible to remove neighbours without traversing the entire table. Signed-off-by: Gilad Naaman <gnaaman@drivenets.com> --- include/net/neighbour.h | 8 +-- net/core/neighbour.c | 124 ++++++++++++++-------------------------- 2 files changed, 46 insertions(+), 86 deletions(-)