Message ID | 20220623043449.1217288-9-edumazet@google.com (mailing list archive) |
---|---|
State | Accepted |
Commit | 4eadb88244d17056179a47f613e30c49191072c5 |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | ipmr: get rid of rwlocks | expand |
Hi Eric, On Thu, Jun 23, 2022 at 04:34:38AM +0000, Eric Dumazet wrote: > ip_mr_forward() uses standard RCU protection already. > > Signed-off-by: Eric Dumazet <edumazet@google.com> > --- > net/ipv4/ipmr.c | 9 ++------- > 1 file changed, 2 insertions(+), 7 deletions(-) > > diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c > index 6ea54bc3d9b6555aaa9974d81ba4acd47481724f..b0f2e6d79d62273c8c2682f28cb45fe5ec3df6f3 100644 > --- a/net/ipv4/ipmr.c > +++ b/net/ipv4/ipmr.c > @@ -1817,7 +1817,7 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, > } > #endif > > -/* Processing handlers for ipmr_forward */ > +/* Processing handlers for ipmr_forward, under rcu_read_lock() */ > > static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, > int in_vifi, struct sk_buff *skb, int vifi) > @@ -1839,9 +1839,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, > WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); > vif_dev->stats.tx_bytes += skb->len; > vif_dev->stats.tx_packets++; > - rcu_read_lock(); > ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); > - rcu_read_unlock(); > goto out_free; > } > > @@ -1936,6 +1934,7 @@ static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev) > } > > /* "local" means that we should preserve one skb (for local delivery) */ > +/* Called uner rcu_read_lock() */ > static void ip_mr_forward(struct net *net, struct mr_table *mrt, > struct net_device *dev, struct sk_buff *skb, > struct mfc_cache *c, int local) > @@ -1992,12 +1991,10 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, > c->_c.mfc_un.res.last_assert + > MFC_ASSERT_THRESH)) { > c->_c.mfc_un.res.last_assert = jiffies; > - rcu_read_lock(); > ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); > if (mrt->mroute_do_wrvifwhole) > ipmr_cache_report(mrt, skb, true_vifi, > IGMPMSG_WRVIFWHOLE); > - rcu_read_unlock(); > } > goto dont_forward; > } > @@ -2169,9 +2166,7 @@ int ip_mr_input(struct sk_buff *skb) > return -ENODEV; > } > > - read_lock(&mrt_lock); > ip_mr_forward(net, mrt, dev, skb, cache, local); > - read_unlock(&mrt_lock); > > if (local) > return ip_local_deliver(skb); > -- > 2.37.0.rc0.104.g0611611a94-goog > Sorry for reporting this late, but there seems to be 1 call path from which RCU is not watching in ip_mr_forward(). It's via ipmr_mfc_add() -> ipmr_cache_resolve() -> ip_mr_forward(). The warning looks like this: [ 632.062382] ============================= [ 632.068568] WARNING: suspicious RCU usage [ 632.073702] 5.19.0-rc7-07010-ga9b9500ffaac-dirty #3374 Not tainted [ 632.081098] ----------------------------- [ 632.086216] net/ipv4/ipmr.c:1080 suspicious rcu_dereference_check() usage! [ 632.094152] [ 632.094152] other info that might help us debug this: [ 632.103349] [ 632.103349] rcu_scheduler_active = 2, debug_locks = 1 [ 632.111011] 1 lock held by smcrouted/359: [ 632.116079] #0: ffffd27b44d23770 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_lock+0x1c/0x30 [ 632.124703] [ 632.124703] stack backtrace: [ 632.129681] CPU: 0 PID: 359 Comm: smcrouted Not tainted 5.19.0-rc7-07010-ga9b9500ffaac-dirty #3374 [ 632.143426] Call trace: [ 632.160542] lockdep_rcu_suspicious+0xf8/0x10c [ 632.165014] ipmr_cache_report+0x2f0/0x530 [ 632.169137] ip_mr_forward+0x364/0x38c [ 632.172909] ipmr_mfc_add+0x894/0xc90 [ 632.176592] ip_mroute_setsockopt+0x6ac/0x950 [ 632.180973] ip_setsockopt+0x16a0/0x16ac [ 632.184921] raw_setsockopt+0x110/0x184 [ 632.188780] sock_common_setsockopt+0x1c/0x2c [ 632.193163] __sys_setsockopt+0x94/0x170 [ 632.197111] __arm64_sys_setsockopt+0x2c/0x40 [ 632.201492] invoke_syscall+0x48/0x114 I don't exactly understand the data structures that are used inside ip_mr_forward(), so I'm unable to say what needs RCU protection and what is fine with the rtnl_mutex that we are holding, just annotated poorly. Could you please take a look?
On Fri, Jul 22, 2022 at 9:34 PM Vladimir Oltean <olteanv@gmail.com> wrote: > > Hi Eric, > > On Thu, Jun 23, 2022 at 04:34:38AM +0000, Eric Dumazet wrote: > > ip_mr_forward() uses standard RCU protection already. > > > > Signed-off-by: Eric Dumazet <edumazet@google.com> > > --- > > net/ipv4/ipmr.c | 9 ++------- > > 1 file changed, 2 insertions(+), 7 deletions(-) > > > > diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c > > index 6ea54bc3d9b6555aaa9974d81ba4acd47481724f..b0f2e6d79d62273c8c2682f28cb45fe5ec3df6f3 100644 > > --- a/net/ipv4/ipmr.c > > +++ b/net/ipv4/ipmr.c > > @@ -1817,7 +1817,7 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, > > } > > #endif > > > > -/* Processing handlers for ipmr_forward */ > > +/* Processing handlers for ipmr_forward, under rcu_read_lock() */ > > > > static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, > > int in_vifi, struct sk_buff *skb, int vifi) > > @@ -1839,9 +1839,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, > > WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); > > vif_dev->stats.tx_bytes += skb->len; > > vif_dev->stats.tx_packets++; > > - rcu_read_lock(); > > ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); > > - rcu_read_unlock(); > > goto out_free; > > } > > > > @@ -1936,6 +1934,7 @@ static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev) > > } > > > > /* "local" means that we should preserve one skb (for local delivery) */ > > +/* Called uner rcu_read_lock() */ > > static void ip_mr_forward(struct net *net, struct mr_table *mrt, > > struct net_device *dev, struct sk_buff *skb, > > struct mfc_cache *c, int local) > > @@ -1992,12 +1991,10 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, > > c->_c.mfc_un.res.last_assert + > > MFC_ASSERT_THRESH)) { > > c->_c.mfc_un.res.last_assert = jiffies; > > - rcu_read_lock(); > > ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); > > if (mrt->mroute_do_wrvifwhole) > > ipmr_cache_report(mrt, skb, true_vifi, > > IGMPMSG_WRVIFWHOLE); > > - rcu_read_unlock(); > > } > > goto dont_forward; > > } > > @@ -2169,9 +2166,7 @@ int ip_mr_input(struct sk_buff *skb) > > return -ENODEV; > > } > > > > - read_lock(&mrt_lock); > > ip_mr_forward(net, mrt, dev, skb, cache, local); > > - read_unlock(&mrt_lock); > > > > if (local) > > return ip_local_deliver(skb); > > -- > > 2.37.0.rc0.104.g0611611a94-goog > > > > Sorry for reporting this late, but there seems to be 1 call path from > which RCU is not watching in ip_mr_forward(). It's via ipmr_mfc_add() -> > ipmr_cache_resolve() -> ip_mr_forward(). > > The warning looks like this: > > [ 632.062382] ============================= > [ 632.068568] WARNING: suspicious RCU usage > [ 632.073702] 5.19.0-rc7-07010-ga9b9500ffaac-dirty #3374 Not tainted > [ 632.081098] ----------------------------- > [ 632.086216] net/ipv4/ipmr.c:1080 suspicious rcu_dereference_check() usage! > [ 632.094152] > [ 632.094152] other info that might help us debug this: > [ 632.103349] > [ 632.103349] rcu_scheduler_active = 2, debug_locks = 1 > [ 632.111011] 1 lock held by smcrouted/359: > [ 632.116079] #0: ffffd27b44d23770 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_lock+0x1c/0x30 > [ 632.124703] > [ 632.124703] stack backtrace: > [ 632.129681] CPU: 0 PID: 359 Comm: smcrouted Not tainted 5.19.0-rc7-07010-ga9b9500ffaac-dirty #3374 > [ 632.143426] Call trace: > [ 632.160542] lockdep_rcu_suspicious+0xf8/0x10c > [ 632.165014] ipmr_cache_report+0x2f0/0x530 > [ 632.169137] ip_mr_forward+0x364/0x38c > [ 632.172909] ipmr_mfc_add+0x894/0xc90 > [ 632.176592] ip_mroute_setsockopt+0x6ac/0x950 > [ 632.180973] ip_setsockopt+0x16a0/0x16ac > [ 632.184921] raw_setsockopt+0x110/0x184 > [ 632.188780] sock_common_setsockopt+0x1c/0x2c > [ 632.193163] __sys_setsockopt+0x94/0x170 > [ 632.197111] __arm64_sys_setsockopt+0x2c/0x40 > [ 632.201492] invoke_syscall+0x48/0x114 > > I don't exactly understand the data structures that are used inside ip_mr_forward(), > so I'm unable to say what needs RCU protection and what is fine with the rtnl_mutex > that we are holding, just annotated poorly. Could you please take a look? Thanks for the report. I guess there are multiple ways to solve this issue, one being: diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 73651d17e51f31c8755da6ac3c1c2763a99b1117..1c288a7b60132365c072874d1f811b70679a2bcb 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1004,7 +1004,9 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else { + rcu_read_lock(); ip_mr_forward(net, mrt, skb->dev, skb, c, 0); + rcu_read_unlock(); } } } @@ -1933,7 +1935,7 @@ static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev) } /* "local" means that we should preserve one skb (for local delivery) */ -/* Called uner rcu_read_lock() */ +/* Called under rcu_read_lock() */ static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct net_device *dev, struct sk_buff *skb, struct mfc_cache *c, int local)
On Fri, Jul 22, 2022 at 10:37:24PM +0200, Eric Dumazet wrote: > Thanks for the report. > > I guess there are multiple ways to solve this issue, one being: > > diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c > index 73651d17e51f31c8755da6ac3c1c2763a99b1117..1c288a7b60132365c072874d1f811b70679a2bcb > 100644 > --- a/net/ipv4/ipmr.c > +++ b/net/ipv4/ipmr.c > @@ -1004,7 +1004,9 @@ static void ipmr_cache_resolve(struct net *net, > struct mr_table *mrt, > > rtnl_unicast(skb, net, NETLINK_CB(skb).portid); > } else { > + rcu_read_lock(); > ip_mr_forward(net, mrt, skb->dev, skb, c, 0); > + rcu_read_unlock(); > } > } > } > @@ -1933,7 +1935,7 @@ static int ipmr_find_vif(const struct mr_table > *mrt, struct net_device *dev) > } > > /* "local" means that we should preserve one skb (for local delivery) */ > -/* Called uner rcu_read_lock() */ > +/* Called under rcu_read_lock() */ > static void ip_mr_forward(struct net *net, struct mr_table *mrt, > struct net_device *dev, struct sk_buff *skb, > struct mfc_cache *c, int local) It sure makes lockdep stop complaining... I just noticed that we appear to have the same problem with the equivalent call path for ipv6: ip6mr_mfc_add -> ip6mr_cache_resolve -> ip6_mr_forward, although I don't have smcroute or the kernel configured for any IPv6 multicast routes right now, so I can't say for sure.
On Sat, Jul 23, 2022 at 12:10:05AM +0300, Vladimir Oltean wrote: > I just noticed that we appear to have the same problem with the > equivalent call path for ipv6: ip6mr_mfc_add -> ip6mr_cache_resolve -> > ip6_mr_forward, although I don't have smcroute or the kernel configured > for any IPv6 multicast routes right now, so I can't say for sure. Not to mention ip6_mr_forward() has a random rcu_read_unlock() thrown in there, with no paired lock(), left from who knows what refactoring... I don't think I'll be able to report all the locking problems with the IP multicast routing code, maybe someone with more familiarity should take a look there :-/
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 6ea54bc3d9b6555aaa9974d81ba4acd47481724f..b0f2e6d79d62273c8c2682f28cb45fe5ec3df6f3 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1817,7 +1817,7 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, } #endif -/* Processing handlers for ipmr_forward */ +/* Processing handlers for ipmr_forward, under rcu_read_lock() */ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, int in_vifi, struct sk_buff *skb, int vifi) @@ -1839,9 +1839,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); vif_dev->stats.tx_bytes += skb->len; vif_dev->stats.tx_packets++; - rcu_read_lock(); ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); - rcu_read_unlock(); goto out_free; } @@ -1936,6 +1934,7 @@ static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev) } /* "local" means that we should preserve one skb (for local delivery) */ +/* Called uner rcu_read_lock() */ static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct net_device *dev, struct sk_buff *skb, struct mfc_cache *c, int local) @@ -1992,12 +1991,10 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, c->_c.mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { c->_c.mfc_un.res.last_assert = jiffies; - rcu_read_lock(); ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); if (mrt->mroute_do_wrvifwhole) ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRVIFWHOLE); - rcu_read_unlock(); } goto dont_forward; } @@ -2169,9 +2166,7 @@ int ip_mr_input(struct sk_buff *skb) return -ENODEV; } - read_lock(&mrt_lock); ip_mr_forward(net, mrt, dev, skb, cache, local); - read_unlock(&mrt_lock); if (local) return ip_local_deliver(skb);
ip_mr_forward() uses standard RCU protection already. Signed-off-by: Eric Dumazet <edumazet@google.com> --- net/ipv4/ipmr.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-)