Message ID | 20210208193410.3859094-4-weiwan@google.com (mailing list archive) |
---|---|
State | Accepted |
Commit | 5fdd2f0e5c64846bf3066689b73fc3b8dddd1c74 |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | implement kthread based napi poll | expand |
On Mon, Feb 8, 2021 at 11:38 AM Wei Wang <weiwan@google.com> wrote: > > This patch adds a new sysfs attribute to the network device class. > Said attribute provides a per-device control to enable/disable the > threaded mode for all the napi instances of the given network device, > without the need for a device up/down. > User sets it to 1 or 0 to enable or disable threaded mode. > Note: when switching between threaded and the current softirq based mode > for a napi instance, it will not immediately take effect if the napi is > currently being polled. The mode switch will happen for the next time > napi_schedule() is called. > > Co-developed-by: Paolo Abeni <pabeni@redhat.com> > Signed-off-by: Paolo Abeni <pabeni@redhat.com> > Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org> > Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> > Co-developed-by: Felix Fietkau <nbd@nbd.name> > Signed-off-by: Felix Fietkau <nbd@nbd.name> > Signed-off-by: Wei Wang <weiwan@google.com> > --- > Documentation/ABI/testing/sysfs-class-net | 15 +++++++ > include/linux/netdevice.h | 2 + > net/core/dev.c | 48 ++++++++++++++++++++++- > net/core/net-sysfs.c | 40 +++++++++++++++++++ > 4 files changed, 103 insertions(+), 2 deletions(-) > > diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net > index 1f2002df5ba2..1419103d11f9 100644 > --- a/Documentation/ABI/testing/sysfs-class-net > +++ b/Documentation/ABI/testing/sysfs-class-net > @@ -337,3 +337,18 @@ Contact: netdev@vger.kernel.org > Description: > 32-bit unsigned integer counting the number of times the link has > been down > + > +What: /sys/class/net/<iface>/threaded > +Date: Jan 2021 > +KernelVersion: 5.12 > +Contact: netdev@vger.kernel.org > +Description: > + Boolean value to control the threaded mode per device. User could > + set this value to enable/disable threaded mode for all napi > + belonging to this device, without the need to do device up/down. > + > + Possible values: > + == ================================== > + 0 threaded mode disabled for this dev > + 1 threaded mode enabled for this dev > + == ================================== > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h > index 99fb4ec9573e..1340327f7abf 100644 > --- a/include/linux/netdevice.h > +++ b/include/linux/netdevice.h > @@ -497,6 +497,8 @@ static inline bool napi_complete(struct napi_struct *n) > return napi_complete_done(n, 0); > } > > +int dev_set_threaded(struct net_device *dev, bool threaded); > + > /** > * napi_disable - prevent NAPI from scheduling > * @n: NAPI context > diff --git a/net/core/dev.c b/net/core/dev.c > index 1e35f4f44f3b..7647278e46f0 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -4291,8 +4291,9 @@ static inline void ____napi_schedule(struct softnet_data *sd, > > if (test_bit(NAPI_STATE_THREADED, &napi->state)) { > /* Paired with smp_mb__before_atomic() in > - * napi_enable(). Use READ_ONCE() to guarantee > - * a complete read on napi->thread. Only call > + * napi_enable()/dev_set_threaded(). > + * Use READ_ONCE() to guarantee a complete > + * read on napi->thread. Only call > * wake_up_process() when it's not NULL. > */ > thread = READ_ONCE(napi->thread); > @@ -6738,6 +6739,49 @@ static void init_gro_hash(struct napi_struct *napi) > napi->gro_bitmask = 0; > } > > +int dev_set_threaded(struct net_device *dev, bool threaded) > +{ > + struct napi_struct *napi; > + int err = 0; > + > + if (dev->threaded == threaded) > + return 0; > + > + if (threaded) { > + list_for_each_entry(napi, &dev->napi_list, dev_list) { > + if (!napi->thread) { > + err = napi_kthread_create(napi); > + if (err) { > + threaded = false; > + break; > + } > + } > + } > + } > + So one idea on how to reduce the number of indents and braces would be to look at pulling the threaded check into the loop. So something like: list_for_each_entry(napi, &dev->napi_list, dev_list) { if (!threaded || napi->thread) continue; err = napi_kthread_create(napi); if (err) threaded = false; } Anyway it is just a suggestion for improvement since it is functionally the same as the code above but greatly reduces the indentation. I am okay with the code as is as well, it just seemed like a lot of braces on the end there. > + dev->threaded = threaded; > + > + /* Make sure kthread is created before THREADED bit > + * is set. > + */ > + smp_mb__before_atomic(); > + > + /* Setting/unsetting threaded mode on a napi might not immediately > + * take effect, if the current napi instance is actively being > + * polled. In this case, the switch between threaded mode and > + * softirq mode will happen in the next round of napi_schedule(). > + * This should not cause hiccups/stalls to the live traffic. > + */ > + list_for_each_entry(napi, &dev->napi_list, dev_list) { > + if (threaded) > + set_bit(NAPI_STATE_THREADED, &napi->state); > + else > + clear_bit(NAPI_STATE_THREADED, &napi->state); > + } > + > + return err; > +} > + > void netif_napi_add(struct net_device *dev, struct napi_struct *napi, > int (*poll)(struct napi_struct *, int), int weight) > { > diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c > index daf502c13d6d..e72d474c2623 100644 > --- a/net/core/net-sysfs.c > +++ b/net/core/net-sysfs.c > @@ -538,6 +538,45 @@ static ssize_t phys_switch_id_show(struct device *dev, > } > static DEVICE_ATTR_RO(phys_switch_id); > > +static ssize_t threaded_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct net_device *netdev = to_net_dev(dev); > + ssize_t ret = -EINVAL; > + > + if (!rtnl_trylock()) > + return restart_syscall(); > + > + if (dev_isalive(netdev)) > + ret = sprintf(buf, fmt_dec, netdev->threaded); > + > + rtnl_unlock(); > + return ret; > +} > + > +static int modify_napi_threaded(struct net_device *dev, unsigned long val) > +{ > + int ret; > + > + if (list_empty(&dev->napi_list)) > + return -EOPNOTSUPP; > + > + if (val != 0 && val != 1) > + return -EOPNOTSUPP; > + > + ret = dev_set_threaded(dev, val); > + > + return ret; > +} > + > +static ssize_t threaded_store(struct device *dev, > + struct device_attribute *attr, > + const char *buf, size_t len) > +{ > + return netdev_store(dev, attr, buf, len, modify_napi_threaded); > +} > +static DEVICE_ATTR_RW(threaded); > + > static struct attribute *net_class_attrs[] __ro_after_init = { > &dev_attr_netdev_group.attr, > &dev_attr_type.attr, > @@ -570,6 +609,7 @@ static struct attribute *net_class_attrs[] __ro_after_init = { > &dev_attr_proto_down.attr, > &dev_attr_carrier_up_count.attr, > &dev_attr_carrier_down_count.attr, > + &dev_attr_threaded.attr, > NULL, > }; > ATTRIBUTE_GROUPS(net_class); Other than the style nit I mentioned above the code looks good to me. Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net index 1f2002df5ba2..1419103d11f9 100644 --- a/Documentation/ABI/testing/sysfs-class-net +++ b/Documentation/ABI/testing/sysfs-class-net @@ -337,3 +337,18 @@ Contact: netdev@vger.kernel.org Description: 32-bit unsigned integer counting the number of times the link has been down + +What: /sys/class/net/<iface>/threaded +Date: Jan 2021 +KernelVersion: 5.12 +Contact: netdev@vger.kernel.org +Description: + Boolean value to control the threaded mode per device. User could + set this value to enable/disable threaded mode for all napi + belonging to this device, without the need to do device up/down. + + Possible values: + == ================================== + 0 threaded mode disabled for this dev + 1 threaded mode enabled for this dev + == ================================== diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 99fb4ec9573e..1340327f7abf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -497,6 +497,8 @@ static inline bool napi_complete(struct napi_struct *n) return napi_complete_done(n, 0); } +int dev_set_threaded(struct net_device *dev, bool threaded); + /** * napi_disable - prevent NAPI from scheduling * @n: NAPI context diff --git a/net/core/dev.c b/net/core/dev.c index 1e35f4f44f3b..7647278e46f0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4291,8 +4291,9 @@ static inline void ____napi_schedule(struct softnet_data *sd, if (test_bit(NAPI_STATE_THREADED, &napi->state)) { /* Paired with smp_mb__before_atomic() in - * napi_enable(). Use READ_ONCE() to guarantee - * a complete read on napi->thread. Only call + * napi_enable()/dev_set_threaded(). + * Use READ_ONCE() to guarantee a complete + * read on napi->thread. Only call * wake_up_process() when it's not NULL. */ thread = READ_ONCE(napi->thread); @@ -6738,6 +6739,49 @@ static void init_gro_hash(struct napi_struct *napi) napi->gro_bitmask = 0; } +int dev_set_threaded(struct net_device *dev, bool threaded) +{ + struct napi_struct *napi; + int err = 0; + + if (dev->threaded == threaded) + return 0; + + if (threaded) { + list_for_each_entry(napi, &dev->napi_list, dev_list) { + if (!napi->thread) { + err = napi_kthread_create(napi); + if (err) { + threaded = false; + break; + } + } + } + } + + dev->threaded = threaded; + + /* Make sure kthread is created before THREADED bit + * is set. + */ + smp_mb__before_atomic(); + + /* Setting/unsetting threaded mode on a napi might not immediately + * take effect, if the current napi instance is actively being + * polled. In this case, the switch between threaded mode and + * softirq mode will happen in the next round of napi_schedule(). + * This should not cause hiccups/stalls to the live traffic. + */ + list_for_each_entry(napi, &dev->napi_list, dev_list) { + if (threaded) + set_bit(NAPI_STATE_THREADED, &napi->state); + else + clear_bit(NAPI_STATE_THREADED, &napi->state); + } + + return err; +} + void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index daf502c13d6d..e72d474c2623 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -538,6 +538,45 @@ static ssize_t phys_switch_id_show(struct device *dev, } static DEVICE_ATTR_RO(phys_switch_id); +static ssize_t threaded_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + ssize_t ret = -EINVAL; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (dev_isalive(netdev)) + ret = sprintf(buf, fmt_dec, netdev->threaded); + + rtnl_unlock(); + return ret; +} + +static int modify_napi_threaded(struct net_device *dev, unsigned long val) +{ + int ret; + + if (list_empty(&dev->napi_list)) + return -EOPNOTSUPP; + + if (val != 0 && val != 1) + return -EOPNOTSUPP; + + ret = dev_set_threaded(dev, val); + + return ret; +} + +static ssize_t threaded_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return netdev_store(dev, attr, buf, len, modify_napi_threaded); +} +static DEVICE_ATTR_RW(threaded); + static struct attribute *net_class_attrs[] __ro_after_init = { &dev_attr_netdev_group.attr, &dev_attr_type.attr, @@ -570,6 +609,7 @@ static struct attribute *net_class_attrs[] __ro_after_init = { &dev_attr_proto_down.attr, &dev_attr_carrier_up_count.attr, &dev_attr_carrier_down_count.attr, + &dev_attr_threaded.attr, NULL, }; ATTRIBUTE_GROUPS(net_class);