Message ID | 20240425170002.68160-7-kuniyu@amazon.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | arp: Random clean up and RCU conversion for ioctl(SIOCGARP). | expand |
On Thu, Apr 25, 2024 at 7:02 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: > > ioctl(SIOCGARP) holds rtnl_lock() for __dev_get_by_name() and > later calls neigh_lookup(), which calls rcu_read_lock(). > > Let's replace __dev_get_by_name() with dev_get_by_name_rcu() to > avoid locking rtnl_lock(). > > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> > --- > net/ipv4/arp.c | 26 +++++++++++++++++--------- > 1 file changed, 17 insertions(+), 9 deletions(-) > > diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c > index 5034920be85a..9430b64558cd 100644 > --- a/net/ipv4/arp.c > +++ b/net/ipv4/arp.c > @@ -1003,11 +1003,15 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev, > * User level interface (ioctl) > */ > > -static struct net_device *arp_req_dev_by_name(struct net *net, struct arpreq *r) > +static struct net_device *arp_req_dev_by_name(struct net *net, struct arpreq *r, > + bool getarp) > { > struct net_device *dev; > > - dev = __dev_get_by_name(net, r->arp_dev); > + if (getarp) > + dev = dev_get_by_name_rcu(net, r->arp_dev); > + else > + dev = __dev_get_by_name(net, r->arp_dev); > if (!dev) > return ERR_PTR(-ENODEV); > > @@ -1028,7 +1032,7 @@ static struct net_device *arp_req_dev(struct net *net, struct arpreq *r) > __be32 ip; > > if (r->arp_dev[0]) > - return arp_req_dev_by_name(net, r); > + return arp_req_dev_by_name(net, r, false); > > if (r->arp_flags & ATF_PUBL) > return NULL; > @@ -1166,7 +1170,7 @@ static int arp_req_get(struct net *net, struct arpreq *r) > if (!r->arp_dev[0]) > return -ENODEV; > > - dev = arp_req_dev_by_name(net, r); > + dev = arp_req_dev_by_name(net, r, true); > if (IS_ERR(dev)) > return PTR_ERR(dev); > > @@ -1287,23 +1291,27 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) > else if (*netmask && *netmask != htonl(0xFFFFFFFFUL)) > return -EINVAL; > > - rtnl_lock(); > - > switch (cmd) { > case SIOCDARP: > + rtnl_lock(); > err = arp_req_delete(net, &r); > + rtnl_unlock(); > break; > case SIOCSARP: > + rtnl_lock(); > err = arp_req_set(net, &r); > + rtnl_unlock(); > break; > case SIOCGARP: > + rcu_read_lock(); > err = arp_req_get(net, &r); > + rcu_read_unlock(); Note that arp_req_get() uses : strscpy(r->arp_dev, dev->name, sizeof(r->arp_dev)); This currently depends on RTNL or devnet_rename_sem Perhaps we should add a helper and use a seqlock to safely copy dev->name into a temporary variable. netdev_get_name() can not be called from rcu_read_lock() at this moment unfortunately.
From: Eric Dumazet <edumazet@google.com> Date: Thu, 25 Apr 2024 19:12:56 +0200 > On Thu, Apr 25, 2024 at 7:02 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: > > > > ioctl(SIOCGARP) holds rtnl_lock() for __dev_get_by_name() and > > later calls neigh_lookup(), which calls rcu_read_lock(). > > > > Let's replace __dev_get_by_name() with dev_get_by_name_rcu() to > > avoid locking rtnl_lock(). > > > > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> > > --- > > net/ipv4/arp.c | 26 +++++++++++++++++--------- > > 1 file changed, 17 insertions(+), 9 deletions(-) > > > > diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c > > index 5034920be85a..9430b64558cd 100644 > > --- a/net/ipv4/arp.c > > +++ b/net/ipv4/arp.c > > @@ -1003,11 +1003,15 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev, > > * User level interface (ioctl) > > */ > > > > -static struct net_device *arp_req_dev_by_name(struct net *net, struct arpreq *r) > > +static struct net_device *arp_req_dev_by_name(struct net *net, struct arpreq *r, > > + bool getarp) > > { > > struct net_device *dev; > > > > - dev = __dev_get_by_name(net, r->arp_dev); > > + if (getarp) > > + dev = dev_get_by_name_rcu(net, r->arp_dev); > > + else > > + dev = __dev_get_by_name(net, r->arp_dev); > > if (!dev) > > return ERR_PTR(-ENODEV); > > > > @@ -1028,7 +1032,7 @@ static struct net_device *arp_req_dev(struct net *net, struct arpreq *r) > > __be32 ip; > > > > if (r->arp_dev[0]) > > - return arp_req_dev_by_name(net, r); > > + return arp_req_dev_by_name(net, r, false); > > > > if (r->arp_flags & ATF_PUBL) > > return NULL; > > @@ -1166,7 +1170,7 @@ static int arp_req_get(struct net *net, struct arpreq *r) > > if (!r->arp_dev[0]) > > return -ENODEV; > > > > - dev = arp_req_dev_by_name(net, r); > > + dev = arp_req_dev_by_name(net, r, true); > > if (IS_ERR(dev)) > > return PTR_ERR(dev); > > > > @@ -1287,23 +1291,27 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) > > else if (*netmask && *netmask != htonl(0xFFFFFFFFUL)) > > return -EINVAL; > > > > - rtnl_lock(); > > - > > switch (cmd) { > > case SIOCDARP: > > + rtnl_lock(); > > err = arp_req_delete(net, &r); > > + rtnl_unlock(); > > break; > > case SIOCSARP: > > + rtnl_lock(); > > err = arp_req_set(net, &r); > > + rtnl_unlock(); > > break; > > case SIOCGARP: > > + rcu_read_lock(); > > err = arp_req_get(net, &r); > > + rcu_read_unlock(); > > > Note that arp_req_get() uses : > > strscpy(r->arp_dev, dev->name, sizeof(r->arp_dev)); > > This currently depends on RTNL or devnet_rename_sem Ah, I missed this point, thanks for catching! > > Perhaps we should add a helper and use a seqlock to safely copy > dev->name into a temporary variable. So it's preferable to add seqlock around memcpy() in dev_change_name() and use a helper in arp_req_get() rather than adding devnet_rename_sem locking around memcpy() in arp_req_get() ?
On Thu, Apr 25, 2024 at 7:52 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: > > From: Eric Dumazet <edumazet@google.com> > Date: Thu, 25 Apr 2024 19:12:56 +0200 > > On Thu, Apr 25, 2024 at 7:02 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: > > > > > > ioctl(SIOCGARP) holds rtnl_lock() for __dev_get_by_name() and > > > later calls neigh_lookup(), which calls rcu_read_lock(). > > > > > > Let's replace __dev_get_by_name() with dev_get_by_name_rcu() to > > > avoid locking rtnl_lock(). > > > > > > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> > > > --- > > > net/ipv4/arp.c | 26 +++++++++++++++++--------- > > > 1 file changed, 17 insertions(+), 9 deletions(-) > > > > > > diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c > > > index 5034920be85a..9430b64558cd 100644 > > > --- a/net/ipv4/arp.c > > > +++ b/net/ipv4/arp.c > > > @@ -1003,11 +1003,15 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev, > > > * User level interface (ioctl) > > > */ > > > > > > -static struct net_device *arp_req_dev_by_name(struct net *net, struct arpreq *r) > > > +static struct net_device *arp_req_dev_by_name(struct net *net, struct arpreq *r, > > > + bool getarp) > > > { > > > struct net_device *dev; > > > > > > - dev = __dev_get_by_name(net, r->arp_dev); > > > + if (getarp) > > > + dev = dev_get_by_name_rcu(net, r->arp_dev); > > > + else > > > + dev = __dev_get_by_name(net, r->arp_dev); > > > if (!dev) > > > return ERR_PTR(-ENODEV); > > > > > > @@ -1028,7 +1032,7 @@ static struct net_device *arp_req_dev(struct net *net, struct arpreq *r) > > > __be32 ip; > > > > > > if (r->arp_dev[0]) > > > - return arp_req_dev_by_name(net, r); > > > + return arp_req_dev_by_name(net, r, false); > > > > > > if (r->arp_flags & ATF_PUBL) > > > return NULL; > > > @@ -1166,7 +1170,7 @@ static int arp_req_get(struct net *net, struct arpreq *r) > > > if (!r->arp_dev[0]) > > > return -ENODEV; > > > > > > - dev = arp_req_dev_by_name(net, r); > > > + dev = arp_req_dev_by_name(net, r, true); > > > if (IS_ERR(dev)) > > > return PTR_ERR(dev); > > > > > > @@ -1287,23 +1291,27 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) > > > else if (*netmask && *netmask != htonl(0xFFFFFFFFUL)) > > > return -EINVAL; > > > > > > - rtnl_lock(); > > > - > > > switch (cmd) { > > > case SIOCDARP: > > > + rtnl_lock(); > > > err = arp_req_delete(net, &r); > > > + rtnl_unlock(); > > > break; > > > case SIOCSARP: > > > + rtnl_lock(); > > > err = arp_req_set(net, &r); > > > + rtnl_unlock(); > > > break; > > > case SIOCGARP: > > > + rcu_read_lock(); > > > err = arp_req_get(net, &r); > > > + rcu_read_unlock(); > > > > > > Note that arp_req_get() uses : > > > > strscpy(r->arp_dev, dev->name, sizeof(r->arp_dev)); > > > > This currently depends on RTNL or devnet_rename_sem > > Ah, I missed this point, thanks for catching! > > > > > > Perhaps we should add a helper and use a seqlock to safely copy > > dev->name into a temporary variable. > > So it's preferable to add seqlock around memcpy() in dev_change_name() > and use a helper in arp_req_get() rather than adding devnet_rename_sem > locking around memcpy() in arp_req_get() ? Under rcu_read_lock(), we can not sleep. devnet_rename_sem is a semaphore... down_read() might sleep. So if you plan using current netdev_get_name(), you must call it outside of rcu_read_lock() section.
From: Eric Dumazet <edumazet@google.com> Date: Thu, 25 Apr 2024 22:35:38 +0200 > On Thu, Apr 25, 2024 at 7:52 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: > > > > From: Eric Dumazet <edumazet@google.com> > > Date: Thu, 25 Apr 2024 19:12:56 +0200 > > > On Thu, Apr 25, 2024 at 7:02 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: > > > > > > > > ioctl(SIOCGARP) holds rtnl_lock() for __dev_get_by_name() and > > > > later calls neigh_lookup(), which calls rcu_read_lock(). > > > > > > > > Let's replace __dev_get_by_name() with dev_get_by_name_rcu() to > > > > avoid locking rtnl_lock(). > > > > > > > > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> > > > > --- > > > > net/ipv4/arp.c | 26 +++++++++++++++++--------- > > > > 1 file changed, 17 insertions(+), 9 deletions(-) > > > > > > > > diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c > > > > index 5034920be85a..9430b64558cd 100644 > > > > --- a/net/ipv4/arp.c > > > > +++ b/net/ipv4/arp.c > > > > @@ -1003,11 +1003,15 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev, > > > > * User level interface (ioctl) > > > > */ > > > > > > > > -static struct net_device *arp_req_dev_by_name(struct net *net, struct arpreq *r) > > > > +static struct net_device *arp_req_dev_by_name(struct net *net, struct arpreq *r, > > > > + bool getarp) > > > > { > > > > struct net_device *dev; > > > > > > > > - dev = __dev_get_by_name(net, r->arp_dev); > > > > + if (getarp) > > > > + dev = dev_get_by_name_rcu(net, r->arp_dev); > > > > + else > > > > + dev = __dev_get_by_name(net, r->arp_dev); > > > > if (!dev) > > > > return ERR_PTR(-ENODEV); > > > > > > > > @@ -1028,7 +1032,7 @@ static struct net_device *arp_req_dev(struct net *net, struct arpreq *r) > > > > __be32 ip; > > > > > > > > if (r->arp_dev[0]) > > > > - return arp_req_dev_by_name(net, r); > > > > + return arp_req_dev_by_name(net, r, false); > > > > > > > > if (r->arp_flags & ATF_PUBL) > > > > return NULL; > > > > @@ -1166,7 +1170,7 @@ static int arp_req_get(struct net *net, struct arpreq *r) > > > > if (!r->arp_dev[0]) > > > > return -ENODEV; > > > > > > > > - dev = arp_req_dev_by_name(net, r); > > > > + dev = arp_req_dev_by_name(net, r, true); > > > > if (IS_ERR(dev)) > > > > return PTR_ERR(dev); > > > > > > > > @@ -1287,23 +1291,27 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) > > > > else if (*netmask && *netmask != htonl(0xFFFFFFFFUL)) > > > > return -EINVAL; > > > > > > > > - rtnl_lock(); > > > > - > > > > switch (cmd) { > > > > case SIOCDARP: > > > > + rtnl_lock(); > > > > err = arp_req_delete(net, &r); > > > > + rtnl_unlock(); > > > > break; > > > > case SIOCSARP: > > > > + rtnl_lock(); > > > > err = arp_req_set(net, &r); > > > > + rtnl_unlock(); > > > > break; > > > > case SIOCGARP: > > > > + rcu_read_lock(); > > > > err = arp_req_get(net, &r); > > > > + rcu_read_unlock(); > > > > > > > > > Note that arp_req_get() uses : > > > > > > strscpy(r->arp_dev, dev->name, sizeof(r->arp_dev)); > > > > > > This currently depends on RTNL or devnet_rename_sem > > > > Ah, I missed this point, thanks for catching! > > > > > > > > > > Perhaps we should add a helper and use a seqlock to safely copy > > > dev->name into a temporary variable. > > > > So it's preferable to add seqlock around memcpy() in dev_change_name() > > and use a helper in arp_req_get() rather than adding devnet_rename_sem > > locking around memcpy() in arp_req_get() ? > > Under rcu_read_lock(), we can not sleep. > > devnet_rename_sem is a semaphore... down_read() might sleep. yes... -ENOCOFFEE :) > > So if you plan using current netdev_get_name(), you must call it > outside of rcu_read_lock() section. > Will add seqlock helper in v3. Thanks!
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 5034920be85a..9430b64558cd 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1003,11 +1003,15 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev, * User level interface (ioctl) */ -static struct net_device *arp_req_dev_by_name(struct net *net, struct arpreq *r) +static struct net_device *arp_req_dev_by_name(struct net *net, struct arpreq *r, + bool getarp) { struct net_device *dev; - dev = __dev_get_by_name(net, r->arp_dev); + if (getarp) + dev = dev_get_by_name_rcu(net, r->arp_dev); + else + dev = __dev_get_by_name(net, r->arp_dev); if (!dev) return ERR_PTR(-ENODEV); @@ -1028,7 +1032,7 @@ static struct net_device *arp_req_dev(struct net *net, struct arpreq *r) __be32 ip; if (r->arp_dev[0]) - return arp_req_dev_by_name(net, r); + return arp_req_dev_by_name(net, r, false); if (r->arp_flags & ATF_PUBL) return NULL; @@ -1166,7 +1170,7 @@ static int arp_req_get(struct net *net, struct arpreq *r) if (!r->arp_dev[0]) return -ENODEV; - dev = arp_req_dev_by_name(net, r); + dev = arp_req_dev_by_name(net, r, true); if (IS_ERR(dev)) return PTR_ERR(dev); @@ -1287,23 +1291,27 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) else if (*netmask && *netmask != htonl(0xFFFFFFFFUL)) return -EINVAL; - rtnl_lock(); - switch (cmd) { case SIOCDARP: + rtnl_lock(); err = arp_req_delete(net, &r); + rtnl_unlock(); break; case SIOCSARP: + rtnl_lock(); err = arp_req_set(net, &r); + rtnl_unlock(); break; case SIOCGARP: + rcu_read_lock(); err = arp_req_get(net, &r); + rcu_read_unlock(); + + if (!err && copy_to_user(arg, &r, sizeof(r))) + err = -EFAULT; break; } - rtnl_unlock(); - if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r))) - err = -EFAULT; return err; }
ioctl(SIOCGARP) holds rtnl_lock() for __dev_get_by_name() and later calls neigh_lookup(), which calls rcu_read_lock(). Let's replace __dev_get_by_name() with dev_get_by_name_rcu() to avoid locking rtnl_lock(). Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> --- net/ipv4/arp.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-)