diff mbox series

[v2,net-next,08/19] ipmr: do not acquire mrt_lock while calling ip_mr_forward()

Message ID 20220623043449.1217288-9-edumazet@google.com (mailing list archive)
State Accepted
Commit 4eadb88244d17056179a47f613e30c49191072c5
Delegated to: Netdev Maintainers
Headers show
Series ipmr: get rid of rwlocks | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count fail Series longer than 15 patches (and no cover letter)
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 4 this patch: 4
netdev/cc_maintainers warning 2 maintainers not CCed: yoshfuji@linux-ipv6.org dsahern@kernel.org
netdev/build_clang success Errors and warnings before: 6 this patch: 6
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 4 this patch: 4
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 45 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Eric Dumazet June 23, 2022, 4:34 a.m. UTC
ip_mr_forward() uses standard RCU protection already.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv4/ipmr.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

Comments

Vladimir Oltean July 22, 2022, 7:34 p.m. UTC | #1
Hi Eric,

On Thu, Jun 23, 2022 at 04:34:38AM +0000, Eric Dumazet wrote:
> ip_mr_forward() uses standard RCU protection already.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  net/ipv4/ipmr.c | 9 ++-------
>  1 file changed, 2 insertions(+), 7 deletions(-)
> 
> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index 6ea54bc3d9b6555aaa9974d81ba4acd47481724f..b0f2e6d79d62273c8c2682f28cb45fe5ec3df6f3 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -1817,7 +1817,7 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
>  }
>  #endif
>  
> -/* Processing handlers for ipmr_forward */
> +/* Processing handlers for ipmr_forward, under rcu_read_lock() */
>  
>  static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
>  			    int in_vifi, struct sk_buff *skb, int vifi)
> @@ -1839,9 +1839,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
>  		WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
>  		vif_dev->stats.tx_bytes += skb->len;
>  		vif_dev->stats.tx_packets++;
> -		rcu_read_lock();
>  		ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
> -		rcu_read_unlock();
>  		goto out_free;
>  	}
>  
> @@ -1936,6 +1934,7 @@ static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev)
>  }
>  
>  /* "local" means that we should preserve one skb (for local delivery) */
> +/* Called uner rcu_read_lock() */
>  static void ip_mr_forward(struct net *net, struct mr_table *mrt,
>  			  struct net_device *dev, struct sk_buff *skb,
>  			  struct mfc_cache *c, int local)
> @@ -1992,12 +1991,10 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
>  			       c->_c.mfc_un.res.last_assert +
>  			       MFC_ASSERT_THRESH)) {
>  			c->_c.mfc_un.res.last_assert = jiffies;
> -			rcu_read_lock();
>  			ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
>  			if (mrt->mroute_do_wrvifwhole)
>  				ipmr_cache_report(mrt, skb, true_vifi,
>  						  IGMPMSG_WRVIFWHOLE);
> -			rcu_read_unlock();
>  		}
>  		goto dont_forward;
>  	}
> @@ -2169,9 +2166,7 @@ int ip_mr_input(struct sk_buff *skb)
>  		return -ENODEV;
>  	}
>  
> -	read_lock(&mrt_lock);
>  	ip_mr_forward(net, mrt, dev, skb, cache, local);
> -	read_unlock(&mrt_lock);
>  
>  	if (local)
>  		return ip_local_deliver(skb);
> -- 
> 2.37.0.rc0.104.g0611611a94-goog
> 

Sorry for reporting this late, but there seems to be 1 call path from
which RCU is not watching in ip_mr_forward(). It's via ipmr_mfc_add() ->
ipmr_cache_resolve() -> ip_mr_forward().

The warning looks like this:

[  632.062382] =============================
[  632.068568] WARNING: suspicious RCU usage
[  632.073702] 5.19.0-rc7-07010-ga9b9500ffaac-dirty #3374 Not tainted
[  632.081098] -----------------------------
[  632.086216] net/ipv4/ipmr.c:1080 suspicious rcu_dereference_check() usage!
[  632.094152]
[  632.094152] other info that might help us debug this:
[  632.103349]
[  632.103349] rcu_scheduler_active = 2, debug_locks = 1
[  632.111011] 1 lock held by smcrouted/359:
[  632.116079]  #0: ffffd27b44d23770 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_lock+0x1c/0x30
[  632.124703]
[  632.124703] stack backtrace:
[  632.129681] CPU: 0 PID: 359 Comm: smcrouted Not tainted 5.19.0-rc7-07010-ga9b9500ffaac-dirty #3374
[  632.143426] Call trace:
[  632.160542]  lockdep_rcu_suspicious+0xf8/0x10c
[  632.165014]  ipmr_cache_report+0x2f0/0x530
[  632.169137]  ip_mr_forward+0x364/0x38c
[  632.172909]  ipmr_mfc_add+0x894/0xc90
[  632.176592]  ip_mroute_setsockopt+0x6ac/0x950
[  632.180973]  ip_setsockopt+0x16a0/0x16ac
[  632.184921]  raw_setsockopt+0x110/0x184
[  632.188780]  sock_common_setsockopt+0x1c/0x2c
[  632.193163]  __sys_setsockopt+0x94/0x170
[  632.197111]  __arm64_sys_setsockopt+0x2c/0x40
[  632.201492]  invoke_syscall+0x48/0x114

I don't exactly understand the data structures that are used inside ip_mr_forward(),
so I'm unable to say what needs RCU protection and what is fine with the rtnl_mutex
that we are holding, just annotated poorly. Could you please take a look?
Eric Dumazet July 22, 2022, 8:37 p.m. UTC | #2
On Fri, Jul 22, 2022 at 9:34 PM Vladimir Oltean <olteanv@gmail.com> wrote:
>
> Hi Eric,
>
> On Thu, Jun 23, 2022 at 04:34:38AM +0000, Eric Dumazet wrote:
> > ip_mr_forward() uses standard RCU protection already.
> >
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > ---
> >  net/ipv4/ipmr.c | 9 ++-------
> >  1 file changed, 2 insertions(+), 7 deletions(-)
> >
> > diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> > index 6ea54bc3d9b6555aaa9974d81ba4acd47481724f..b0f2e6d79d62273c8c2682f28cb45fe5ec3df6f3 100644
> > --- a/net/ipv4/ipmr.c
> > +++ b/net/ipv4/ipmr.c
> > @@ -1817,7 +1817,7 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
> >  }
> >  #endif
> >
> > -/* Processing handlers for ipmr_forward */
> > +/* Processing handlers for ipmr_forward, under rcu_read_lock() */
> >
> >  static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
> >                           int in_vifi, struct sk_buff *skb, int vifi)
> > @@ -1839,9 +1839,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
> >               WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
> >               vif_dev->stats.tx_bytes += skb->len;
> >               vif_dev->stats.tx_packets++;
> > -             rcu_read_lock();
> >               ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
> > -             rcu_read_unlock();
> >               goto out_free;
> >       }
> >
> > @@ -1936,6 +1934,7 @@ static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev)
> >  }
> >
> >  /* "local" means that we should preserve one skb (for local delivery) */
> > +/* Called uner rcu_read_lock() */
> >  static void ip_mr_forward(struct net *net, struct mr_table *mrt,
> >                         struct net_device *dev, struct sk_buff *skb,
> >                         struct mfc_cache *c, int local)
> > @@ -1992,12 +1991,10 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
> >                              c->_c.mfc_un.res.last_assert +
> >                              MFC_ASSERT_THRESH)) {
> >                       c->_c.mfc_un.res.last_assert = jiffies;
> > -                     rcu_read_lock();
> >                       ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
> >                       if (mrt->mroute_do_wrvifwhole)
> >                               ipmr_cache_report(mrt, skb, true_vifi,
> >                                                 IGMPMSG_WRVIFWHOLE);
> > -                     rcu_read_unlock();
> >               }
> >               goto dont_forward;
> >       }
> > @@ -2169,9 +2166,7 @@ int ip_mr_input(struct sk_buff *skb)
> >               return -ENODEV;
> >       }
> >
> > -     read_lock(&mrt_lock);
> >       ip_mr_forward(net, mrt, dev, skb, cache, local);
> > -     read_unlock(&mrt_lock);
> >
> >       if (local)
> >               return ip_local_deliver(skb);
> > --
> > 2.37.0.rc0.104.g0611611a94-goog
> >
>
> Sorry for reporting this late, but there seems to be 1 call path from
> which RCU is not watching in ip_mr_forward(). It's via ipmr_mfc_add() ->
> ipmr_cache_resolve() -> ip_mr_forward().
>
> The warning looks like this:
>
> [  632.062382] =============================
> [  632.068568] WARNING: suspicious RCU usage
> [  632.073702] 5.19.0-rc7-07010-ga9b9500ffaac-dirty #3374 Not tainted
> [  632.081098] -----------------------------
> [  632.086216] net/ipv4/ipmr.c:1080 suspicious rcu_dereference_check() usage!
> [  632.094152]
> [  632.094152] other info that might help us debug this:
> [  632.103349]
> [  632.103349] rcu_scheduler_active = 2, debug_locks = 1
> [  632.111011] 1 lock held by smcrouted/359:
> [  632.116079]  #0: ffffd27b44d23770 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_lock+0x1c/0x30
> [  632.124703]
> [  632.124703] stack backtrace:
> [  632.129681] CPU: 0 PID: 359 Comm: smcrouted Not tainted 5.19.0-rc7-07010-ga9b9500ffaac-dirty #3374
> [  632.143426] Call trace:
> [  632.160542]  lockdep_rcu_suspicious+0xf8/0x10c
> [  632.165014]  ipmr_cache_report+0x2f0/0x530
> [  632.169137]  ip_mr_forward+0x364/0x38c
> [  632.172909]  ipmr_mfc_add+0x894/0xc90
> [  632.176592]  ip_mroute_setsockopt+0x6ac/0x950
> [  632.180973]  ip_setsockopt+0x16a0/0x16ac
> [  632.184921]  raw_setsockopt+0x110/0x184
> [  632.188780]  sock_common_setsockopt+0x1c/0x2c
> [  632.193163]  __sys_setsockopt+0x94/0x170
> [  632.197111]  __arm64_sys_setsockopt+0x2c/0x40
> [  632.201492]  invoke_syscall+0x48/0x114
>
> I don't exactly understand the data structures that are used inside ip_mr_forward(),
> so I'm unable to say what needs RCU protection and what is fine with the rtnl_mutex
> that we are holding, just annotated poorly. Could you please take a look?

Thanks for the report.

I guess there are multiple ways to solve this issue, one being:

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 73651d17e51f31c8755da6ac3c1c2763a99b1117..1c288a7b60132365c072874d1f811b70679a2bcb
100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1004,7 +1004,9 @@ static void ipmr_cache_resolve(struct net *net,
struct mr_table *mrt,

                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
                } else {
+                       rcu_read_lock();
                        ip_mr_forward(net, mrt, skb->dev, skb, c, 0);
+                       rcu_read_unlock();
                }
        }
 }
@@ -1933,7 +1935,7 @@ static int ipmr_find_vif(const struct mr_table
*mrt, struct net_device *dev)
 }

 /* "local" means that we should preserve one skb (for local delivery) */
-/* Called uner rcu_read_lock() */
+/* Called under rcu_read_lock() */
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
                          struct net_device *dev, struct sk_buff *skb,
                          struct mfc_cache *c, int local)
Vladimir Oltean July 22, 2022, 9:10 p.m. UTC | #3
On Fri, Jul 22, 2022 at 10:37:24PM +0200, Eric Dumazet wrote:
> Thanks for the report.
> 
> I guess there are multiple ways to solve this issue, one being:
> 
> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index 73651d17e51f31c8755da6ac3c1c2763a99b1117..1c288a7b60132365c072874d1f811b70679a2bcb
> 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -1004,7 +1004,9 @@ static void ipmr_cache_resolve(struct net *net,
> struct mr_table *mrt,
> 
>                         rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
>                 } else {
> +                       rcu_read_lock();
>                         ip_mr_forward(net, mrt, skb->dev, skb, c, 0);
> +                       rcu_read_unlock();
>                 }
>         }
>  }
> @@ -1933,7 +1935,7 @@ static int ipmr_find_vif(const struct mr_table
> *mrt, struct net_device *dev)
>  }
> 
>  /* "local" means that we should preserve one skb (for local delivery) */
> -/* Called uner rcu_read_lock() */
> +/* Called under rcu_read_lock() */
>  static void ip_mr_forward(struct net *net, struct mr_table *mrt,
>                           struct net_device *dev, struct sk_buff *skb,
>                           struct mfc_cache *c, int local)

It sure makes lockdep stop complaining...

I just noticed that we appear to have the same problem with the
equivalent call path for ipv6: ip6mr_mfc_add -> ip6mr_cache_resolve ->
ip6_mr_forward, although I don't have smcroute or the kernel configured
for any IPv6 multicast routes right now, so I can't say for sure.
Vladimir Oltean July 22, 2022, 9:15 p.m. UTC | #4
On Sat, Jul 23, 2022 at 12:10:05AM +0300, Vladimir Oltean wrote:
> I just noticed that we appear to have the same problem with the
> equivalent call path for ipv6: ip6mr_mfc_add -> ip6mr_cache_resolve ->
> ip6_mr_forward, although I don't have smcroute or the kernel configured
> for any IPv6 multicast routes right now, so I can't say for sure.

Not to mention ip6_mr_forward() has a random rcu_read_unlock() thrown in
there, with no paired lock(), left from who knows what refactoring...
I don't think I'll be able to report all the locking problems with the
IP multicast routing code, maybe someone with more familiarity should
take a look there :-/
diff mbox series

Patch

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 6ea54bc3d9b6555aaa9974d81ba4acd47481724f..b0f2e6d79d62273c8c2682f28cb45fe5ec3df6f3 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1817,7 +1817,7 @@  static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
 }
 #endif
 
-/* Processing handlers for ipmr_forward */
+/* Processing handlers for ipmr_forward, under rcu_read_lock() */
 
 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 			    int in_vifi, struct sk_buff *skb, int vifi)
@@ -1839,9 +1839,7 @@  static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 		WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
 		vif_dev->stats.tx_bytes += skb->len;
 		vif_dev->stats.tx_packets++;
-		rcu_read_lock();
 		ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
-		rcu_read_unlock();
 		goto out_free;
 	}
 
@@ -1936,6 +1934,7 @@  static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev)
 }
 
 /* "local" means that we should preserve one skb (for local delivery) */
+/* Called uner rcu_read_lock() */
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 			  struct net_device *dev, struct sk_buff *skb,
 			  struct mfc_cache *c, int local)
@@ -1992,12 +1991,10 @@  static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 			       c->_c.mfc_un.res.last_assert +
 			       MFC_ASSERT_THRESH)) {
 			c->_c.mfc_un.res.last_assert = jiffies;
-			rcu_read_lock();
 			ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
 			if (mrt->mroute_do_wrvifwhole)
 				ipmr_cache_report(mrt, skb, true_vifi,
 						  IGMPMSG_WRVIFWHOLE);
-			rcu_read_unlock();
 		}
 		goto dont_forward;
 	}
@@ -2169,9 +2166,7 @@  int ip_mr_input(struct sk_buff *skb)
 		return -ENODEV;
 	}
 
-	read_lock(&mrt_lock);
 	ip_mr_forward(net, mrt, dev, skb, cache, local);
-	read_unlock(&mrt_lock);
 
 	if (local)
 		return ip_local_deliver(skb);