diff mbox series

[net-next] ipv6: remove RTNL protection from ip6addrlbl_dump()

Message ID 20240403123913.4173904-1-edumazet@google.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net-next] ipv6: remove RTNL protection from ip6addrlbl_dump() | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 943 this patch: 943
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 5 of 5 maintainers
netdev/build_clang success Errors and warnings before: 954 this patch: 954
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 954 this patch: 954
netdev/checkpatch warning WARNING: line length of 88 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest warning net-next-2024-04-03--21-00 (tests: 951)

Commit Message

Eric Dumazet April 3, 2024, 12:39 p.m. UTC
No longer hold RTNL while calling ip6addrlbl_dump()
("ip addrlabel show")

ip6addrlbl_dump() was already mostly relying on RCU anyway.

Add READ_ONCE()/WRITE_ONCE() annotations around
net->ipv6.ip6addrlbl_table.seq

Also change return value for a completed dump,
so that NLMSG_DONE can be appended to current skb,
saving one recvmsg() system call.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv6/addrlabel.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

Comments

David Ahern April 3, 2024, 4:13 p.m. UTC | #1
On 4/3/24 6:39 AM, Eric Dumazet wrote:
> diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
> index 17ac45aa7194ce9c148ed95e14dd575d17feeb98..961e853543b64cd2060ef693ae3ad32a44780aa1 100644
> --- a/net/ipv6/addrlabel.c
> +++ b/net/ipv6/addrlabel.c
> @@ -234,7 +234,8 @@ static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp,
>  		hlist_add_head_rcu(&newp->list, &net->ipv6.ip6addrlbl_table.head);
>  out:
>  	if (!ret)
> -		net->ipv6.ip6addrlbl_table.seq++;
> +		WRITE_ONCE(net->ipv6.ip6addrlbl_table.seq,
> +			   net->ipv6.ip6addrlbl_table.seq + 1);
>  	return ret;
>  }
>  
> @@ -445,7 +446,7 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
>  };
>  
>  static int ip6addrlbl_fill(struct sk_buff *skb,
> -			   struct ip6addrlbl_entry *p,
> +			   const struct ip6addrlbl_entry *p,
>  			   u32 lseq,
>  			   u32 portid, u32 seq, int event,
>  			   unsigned int flags)
> @@ -498,7 +499,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
>  	struct net *net = sock_net(skb->sk);
>  	struct ip6addrlbl_entry *p;
>  	int idx = 0, s_idx = cb->args[0];
> -	int err;
> +	int err = 0;
>  
>  	if (cb->strict_check) {
>  		err = ip6addrlbl_valid_dump_req(nlh, cb->extack);
> @@ -510,7 +511,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
>  	hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
>  		if (idx >= s_idx) {
>  			err = ip6addrlbl_fill(skb, p,
> -					      net->ipv6.ip6addrlbl_table.seq,
> +					      READ_ONCE(net->ipv6.ip6addrlbl_table.seq),

seems like this should be read once on entry, and the same value used
for all iterations.

>  					      NETLINK_CB(cb->skb).portid,
>  					      nlh->nlmsg_seq,
>  					      RTM_NEWADDRLABEL,
> @@ -522,7 +523,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
>  	}
>  	rcu_read_unlock();
>  	cb->args[0] = idx;
> -	return skb->len;
> +	return err;
>  }
>  
>  static inline int ip6addrlbl_msgsize(void)
> @@ -614,7 +615,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
>  
>  	rcu_read_lock();
>  	p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
> -	lseq = net->ipv6.ip6addrlbl_table.seq;
> +	lseq = READ_ONCE(net->ipv6.ip6addrlbl_table.seq);
>  	if (p)
>  		err = ip6addrlbl_fill(skb, p, lseq,
>  				      NETLINK_CB(in_skb).portid,
> @@ -647,6 +648,7 @@ int __init ipv6_addr_label_rtnl_register(void)
>  		return ret;
>  	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDRLABEL,
>  				   ip6addrlbl_get,
> -				   ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED);
> +				   ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED |
> +						    RTNL_FLAG_DUMP_UNLOCKED);
>  	return ret;
>  }
Eric Dumazet April 3, 2024, 4:25 p.m. UTC | #2
On Wed, Apr 3, 2024 at 6:13 PM David Ahern <dsahern@kernel.org> wrote:
>
> On 4/3/24 6:39 AM, Eric Dumazet wrote:
> > diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
> > index 17ac45aa7194ce9c148ed95e14dd575d17feeb98..961e853543b64cd2060ef693ae3ad32a44780aa1 100644
> > --- a/net/ipv6/addrlabel.c
> > +++ b/net/ipv6/addrlabel.c
> > @@ -234,7 +234,8 @@ static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp,
> >               hlist_add_head_rcu(&newp->list, &net->ipv6.ip6addrlbl_table.head);
> >  out:
> >       if (!ret)
> > -             net->ipv6.ip6addrlbl_table.seq++;
> > +             WRITE_ONCE(net->ipv6.ip6addrlbl_table.seq,
> > +                        net->ipv6.ip6addrlbl_table.seq + 1);
> >       return ret;
> >  }
> >
> > @@ -445,7 +446,7 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
> >  };
> >
> >  static int ip6addrlbl_fill(struct sk_buff *skb,
> > -                        struct ip6addrlbl_entry *p,
> > +                        const struct ip6addrlbl_entry *p,
> >                          u32 lseq,
> >                          u32 portid, u32 seq, int event,
> >                          unsigned int flags)
> > @@ -498,7 +499,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
> >       struct net *net = sock_net(skb->sk);
> >       struct ip6addrlbl_entry *p;
> >       int idx = 0, s_idx = cb->args[0];
> > -     int err;
> > +     int err = 0;
> >
> >       if (cb->strict_check) {
> >               err = ip6addrlbl_valid_dump_req(nlh, cb->extack);
> > @@ -510,7 +511,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
> >       hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
> >               if (idx >= s_idx) {
> >                       err = ip6addrlbl_fill(skb, p,
> > -                                           net->ipv6.ip6addrlbl_table.seq,
> > +                                           READ_ONCE(net->ipv6.ip6addrlbl_table.seq),
>
> seems like this should be read once on entry, and the same value used
> for all iterations.

I thought of that, but this will miss any update done concurrently (if
all entries fit in a single skb)

It is unclear to me what user space can do with ifal_seq

There is no clear signal like :

Initial seq _before_ the dump, and seq seen _after_ the dump.
David Ahern April 3, 2024, 5:49 p.m. UTC | #3
On 4/3/24 10:25 AM, Eric Dumazet wrote:
> On Wed, Apr 3, 2024 at 6:13 PM David Ahern <dsahern@kernel.org> wrote:
>>
>> On 4/3/24 6:39 AM, Eric Dumazet wrote:
>>> diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
>>> index 17ac45aa7194ce9c148ed95e14dd575d17feeb98..961e853543b64cd2060ef693ae3ad32a44780aa1 100644
>>> --- a/net/ipv6/addrlabel.c
>>> +++ b/net/ipv6/addrlabel.c
>>> @@ -234,7 +234,8 @@ static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp,
>>>               hlist_add_head_rcu(&newp->list, &net->ipv6.ip6addrlbl_table.head);
>>>  out:
>>>       if (!ret)
>>> -             net->ipv6.ip6addrlbl_table.seq++;
>>> +             WRITE_ONCE(net->ipv6.ip6addrlbl_table.seq,
>>> +                        net->ipv6.ip6addrlbl_table.seq + 1);
>>>       return ret;
>>>  }
>>>
>>> @@ -445,7 +446,7 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
>>>  };
>>>
>>>  static int ip6addrlbl_fill(struct sk_buff *skb,
>>> -                        struct ip6addrlbl_entry *p,
>>> +                        const struct ip6addrlbl_entry *p,
>>>                          u32 lseq,
>>>                          u32 portid, u32 seq, int event,
>>>                          unsigned int flags)
>>> @@ -498,7 +499,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
>>>       struct net *net = sock_net(skb->sk);
>>>       struct ip6addrlbl_entry *p;
>>>       int idx = 0, s_idx = cb->args[0];
>>> -     int err;
>>> +     int err = 0;
>>>
>>>       if (cb->strict_check) {
>>>               err = ip6addrlbl_valid_dump_req(nlh, cb->extack);
>>> @@ -510,7 +511,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
>>>       hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
>>>               if (idx >= s_idx) {
>>>                       err = ip6addrlbl_fill(skb, p,
>>> -                                           net->ipv6.ip6addrlbl_table.seq,
>>> +                                           READ_ONCE(net->ipv6.ip6addrlbl_table.seq),
>>
>> seems like this should be read once on entry, and the same value used
>> for all iterations.
> 
> I thought of that, but this will miss any update done concurrently (if
> all entries fit in a single skb)
> 
> It is unclear to me what user space can do with ifal_seq
> 
> There is no clear signal like :
> 
> Initial seq _before_ the dump, and seq seen _after_ the dump.

Since this a known change in behavior (however slight), it would be best
to add something in the commit message or a code comment so if someone
hits a problem and does a git bisect the problem and solution are clear.
Eric Dumazet April 3, 2024, 5:55 p.m. UTC | #4
On Wed, Apr 3, 2024 at 7:49 PM David Ahern <dsahern@kernel.org> wrote:

> Since this a known change in behavior (however slight), it would be best
> to add something in the commit message or a code comment so if someone
> hits a problem and does a git bisect the problem and solution are clear.

Ok.

The cb->seq  / nl_dump_check_consistent() protocol could be implemented later,
if necessary.
diff mbox series

Patch

diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 17ac45aa7194ce9c148ed95e14dd575d17feeb98..961e853543b64cd2060ef693ae3ad32a44780aa1 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -234,7 +234,8 @@  static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp,
 		hlist_add_head_rcu(&newp->list, &net->ipv6.ip6addrlbl_table.head);
 out:
 	if (!ret)
-		net->ipv6.ip6addrlbl_table.seq++;
+		WRITE_ONCE(net->ipv6.ip6addrlbl_table.seq,
+			   net->ipv6.ip6addrlbl_table.seq + 1);
 	return ret;
 }
 
@@ -445,7 +446,7 @@  static void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
 };
 
 static int ip6addrlbl_fill(struct sk_buff *skb,
-			   struct ip6addrlbl_entry *p,
+			   const struct ip6addrlbl_entry *p,
 			   u32 lseq,
 			   u32 portid, u32 seq, int event,
 			   unsigned int flags)
@@ -498,7 +499,7 @@  static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	struct net *net = sock_net(skb->sk);
 	struct ip6addrlbl_entry *p;
 	int idx = 0, s_idx = cb->args[0];
-	int err;
+	int err = 0;
 
 	if (cb->strict_check) {
 		err = ip6addrlbl_valid_dump_req(nlh, cb->extack);
@@ -510,7 +511,7 @@  static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
 		if (idx >= s_idx) {
 			err = ip6addrlbl_fill(skb, p,
-					      net->ipv6.ip6addrlbl_table.seq,
+					      READ_ONCE(net->ipv6.ip6addrlbl_table.seq),
 					      NETLINK_CB(cb->skb).portid,
 					      nlh->nlmsg_seq,
 					      RTM_NEWADDRLABEL,
@@ -522,7 +523,7 @@  static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	}
 	rcu_read_unlock();
 	cb->args[0] = idx;
-	return skb->len;
+	return err;
 }
 
 static inline int ip6addrlbl_msgsize(void)
@@ -614,7 +615,7 @@  static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 
 	rcu_read_lock();
 	p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
-	lseq = net->ipv6.ip6addrlbl_table.seq;
+	lseq = READ_ONCE(net->ipv6.ip6addrlbl_table.seq);
 	if (p)
 		err = ip6addrlbl_fill(skb, p, lseq,
 				      NETLINK_CB(in_skb).portid,
@@ -647,6 +648,7 @@  int __init ipv6_addr_label_rtnl_register(void)
 		return ret;
 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDRLABEL,
 				   ip6addrlbl_get,
-				   ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED);
+				   ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED |
+						    RTNL_FLAG_DUMP_UNLOCKED);
 	return ret;
 }