Message ID | b5d7b8e243178d63643c8efc1f1c48b3b2468dc7.1695838185.git.ecree.xilinx@gmail.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | ethtool: track custom RSS contexts in the core | expand |
On 9/27/2023 11:13 AM, edward.cree@amd.com wrote: > From: Edward Cree <ecree.xilinx@gmail.com> > > While this is not needed to serialise the ethtool entry points (which > are all under RTNL), drivers may have cause to asynchronously access > dev->ethtool->rss_ctx; taking dev->ethtool->rss_lock allows them to > do this safely without needing to take the RTNL. > > Signed-off-by: Edward Cree <ecree.xilinx@gmail.com> Reviewed-by: Jacob Keller <jacob.e.keller@intel.com> > --- > include/linux/ethtool.h | 3 +++ > net/core/dev.c | 5 +++++ > net/ethtool/ioctl.c | 7 +++++++ > 3 files changed, 15 insertions(+) > > diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h > index c8963bde9289..d15a21bd6f12 100644 > --- a/include/linux/ethtool.h > +++ b/include/linux/ethtool.h > @@ -1026,11 +1026,14 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, > /** > * struct ethtool_netdev_state - per-netdevice state for ethtool features > * @rss_ctx: XArray of custom RSS contexts > + * @rss_lock: Protects entries in @rss_ctx. May be taken from > + * within RTNL. > * @rss_ctx_max_id: maximum (exclusive) supported RSS context ID > * @wol_enabled: Wake-on-LAN is enabled > */ > struct ethtool_netdev_state { > struct xarray rss_ctx; > + struct mutex rss_lock; > u32 rss_ctx_max_id; > u32 wol_enabled:1; > }; > diff --git a/net/core/dev.c b/net/core/dev.c > index 69579d9cd7ba..c57456ed4be8 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -10074,6 +10074,7 @@ int register_netdevice(struct net_device *dev) > > /* rss ctx ID 0 is reserved for the default context, start from 1 */ > xa_init_flags(&dev->ethtool->rss_ctx, XA_FLAGS_ALLOC1); > + mutex_init(&dev->ethtool->rss_lock); > > spin_lock_init(&dev->addr_list_lock); > netdev_set_addr_lockdep_class(dev); > @@ -10882,6 +10883,7 @@ static void netdev_rss_contexts_free(struct net_device *dev) > struct ethtool_rxfh_context *ctx; > unsigned long context; > > + mutex_lock(&dev->ethtool->rss_lock); > if (dev->ethtool_ops->create_rxfh_context || > dev->ethtool_ops->set_rxfh_context) > xa_for_each(&dev->ethtool->rss_ctx, context, ctx) { > @@ -10903,6 +10905,7 @@ static void netdev_rss_contexts_free(struct net_device *dev) > kfree(ctx); > } > xa_destroy(&dev->ethtool->rss_ctx); > + mutex_unlock(&dev->ethtool->rss_lock); > } > > /** > @@ -11016,6 +11019,8 @@ void unregister_netdevice_many_notify(struct list_head *head, > if (dev->netdev_ops->ndo_uninit) > dev->netdev_ops->ndo_uninit(dev); > > + mutex_destroy(&dev->ethtool->rss_lock); > + > if (skb) > rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh); > > diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c > index 3920ddee3ee2..d21bbc92e6fc 100644 > --- a/net/ethtool/ioctl.c > +++ b/net/ethtool/ioctl.c > @@ -1258,6 +1258,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, > u8 *rss_config; > u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); > bool create = false, delete = false; > + bool locked = false; /* dev->ethtool->rss_lock taken */ > > if (!ops->get_rxnfc || !ops->set_rxfh) > return -EOPNOTSUPP; > @@ -1335,6 +1336,10 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, > } > } > > + if (rxfh.rss_context) { > + mutex_lock(&dev->ethtool->rss_lock); > + locked = true; > + } > if (create) { > if (delete) { > ret = -EINVAL; > @@ -1455,6 +1460,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, > } > > out: > + if (locked) > + mutex_unlock(&dev->ethtool->rss_lock); > kfree(rss_config); > return ret; > } >
On Wed, Sep 27, 2023 at 07:13:37PM +0100, edward.cree@amd.com wrote: > From: Edward Cree <ecree.xilinx@gmail.com> > > While this is not needed to serialise the ethtool entry points (which > are all under RTNL), drivers may have cause to asynchronously access > dev->ethtool->rss_ctx; taking dev->ethtool->rss_lock allows them to > do this safely without needing to take the RTNL. > > Signed-off-by: Edward Cree <ecree.xilinx@gmail.com> Reviewed-by: Martin Habets <habetsm.xilinx@gmail.com> > --- > include/linux/ethtool.h | 3 +++ > net/core/dev.c | 5 +++++ > net/ethtool/ioctl.c | 7 +++++++ > 3 files changed, 15 insertions(+) > > diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h > index c8963bde9289..d15a21bd6f12 100644 > --- a/include/linux/ethtool.h > +++ b/include/linux/ethtool.h > @@ -1026,11 +1026,14 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, > /** > * struct ethtool_netdev_state - per-netdevice state for ethtool features > * @rss_ctx: XArray of custom RSS contexts > + * @rss_lock: Protects entries in @rss_ctx. May be taken from > + * within RTNL. > * @rss_ctx_max_id: maximum (exclusive) supported RSS context ID > * @wol_enabled: Wake-on-LAN is enabled > */ > struct ethtool_netdev_state { > struct xarray rss_ctx; > + struct mutex rss_lock; > u32 rss_ctx_max_id; > u32 wol_enabled:1; > }; > diff --git a/net/core/dev.c b/net/core/dev.c > index 69579d9cd7ba..c57456ed4be8 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -10074,6 +10074,7 @@ int register_netdevice(struct net_device *dev) > > /* rss ctx ID 0 is reserved for the default context, start from 1 */ > xa_init_flags(&dev->ethtool->rss_ctx, XA_FLAGS_ALLOC1); > + mutex_init(&dev->ethtool->rss_lock); > > spin_lock_init(&dev->addr_list_lock); > netdev_set_addr_lockdep_class(dev); > @@ -10882,6 +10883,7 @@ static void netdev_rss_contexts_free(struct net_device *dev) > struct ethtool_rxfh_context *ctx; > unsigned long context; > > + mutex_lock(&dev->ethtool->rss_lock); > if (dev->ethtool_ops->create_rxfh_context || > dev->ethtool_ops->set_rxfh_context) > xa_for_each(&dev->ethtool->rss_ctx, context, ctx) { > @@ -10903,6 +10905,7 @@ static void netdev_rss_contexts_free(struct net_device *dev) > kfree(ctx); > } > xa_destroy(&dev->ethtool->rss_ctx); > + mutex_unlock(&dev->ethtool->rss_lock); > } > > /** > @@ -11016,6 +11019,8 @@ void unregister_netdevice_many_notify(struct list_head *head, > if (dev->netdev_ops->ndo_uninit) > dev->netdev_ops->ndo_uninit(dev); > > + mutex_destroy(&dev->ethtool->rss_lock); > + > if (skb) > rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh); > > diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c > index 3920ddee3ee2..d21bbc92e6fc 100644 > --- a/net/ethtool/ioctl.c > +++ b/net/ethtool/ioctl.c > @@ -1258,6 +1258,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, > u8 *rss_config; > u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); > bool create = false, delete = false; > + bool locked = false; /* dev->ethtool->rss_lock taken */ > > if (!ops->get_rxnfc || !ops->set_rxfh) > return -EOPNOTSUPP; > @@ -1335,6 +1336,10 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, > } > } > > + if (rxfh.rss_context) { > + mutex_lock(&dev->ethtool->rss_lock); > + locked = true; > + } > if (create) { > if (delete) { > ret = -EINVAL; > @@ -1455,6 +1460,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, > } > > out: > + if (locked) > + mutex_unlock(&dev->ethtool->rss_lock); > kfree(rss_config); > return ret; > }
On Wed, 27 Sep 2023 19:13:37 +0100 edward.cree@amd.com wrote: > While this is not needed to serialise the ethtool entry points (which > are all under RTNL), drivers may have cause to asynchronously access > dev->ethtool->rss_ctx; taking dev->ethtool->rss_lock allows them to > do this safely without needing to take the RTNL. Can we use a replay mechanism, like we do in TC offloads and VxLAN/UDP ports? The driver which lost config can ask for the rss contexts to be "replayed" and the core will issue a series of ->create calls for all existing entries? Regarding the lock itself - can we hide it under ethtool_rss_lock(dev) / ethtool_rss_unlock(dev) helpers?
On 05/10/2023 00:16, Jakub Kicinski wrote: > On Wed, 27 Sep 2023 19:13:37 +0100 edward.cree@amd.com wrote: >> While this is not needed to serialise the ethtool entry points (which >> are all under RTNL), drivers may have cause to asynchronously access >> dev->ethtool->rss_ctx; taking dev->ethtool->rss_lock allows them to >> do this safely without needing to take the RTNL. > > Can we use a replay mechanism, like we do in TC offloads and VxLAN/UDP > ports? The driver which lost config can ask for the rss contexts to be > "replayed" and the core will issue a series of ->create calls for all > existing entries? I like that idea, yes. Will try to implement it for v5. There is a question as to how the core should react if the ->create call then fails; see my reply to Martin on #7. > Regarding the lock itself - can we hide it under ethtool_rss_lock(dev) > / ethtool_rss_unlock(dev) helpers? Sure. If I can't get replay to work then I'll do that. -ed
On Thu, 5 Oct 2023 21:56:47 +0100 Edward Cree wrote: > > Can we use a replay mechanism, like we do in TC offloads and VxLAN/UDP > > ports? The driver which lost config can ask for the rss contexts to be > > "replayed" and the core will issue a series of ->create calls for all > > existing entries? > > I like that idea, yes. Will try to implement it for v5. > There is a question as to how the core should react if the ->create call > then fails; see my reply to Martin on #7. Hm. The application asked for a config which is no longer applied. The machine needs to raise some form of an alarm or be taken out of commission. My first thought would be to print an error message in the core, and expect the driver to fail some devlink health reporter. I don't think a "broken" flag local to RSS would be monitored, there'd be too many of such local flags throughout the APIs. devlink health may be monitored. > > Regarding the lock itself - can we hide it under ethtool_rss_lock(dev) > > / ethtool_rss_unlock(dev) helpers? > > Sure. If I can't get replay to work then I'll do that.
On 05/10/2023 00:16, Jakub Kicinski wrote: > On Wed, 27 Sep 2023 19:13:37 +0100 edward.cree@amd.com wrote: >> While this is not needed to serialise the ethtool entry points (which >> are all under RTNL), drivers may have cause to asynchronously access >> dev->ethtool->rss_ctx; taking dev->ethtool->rss_lock allows them to >> do this safely without needing to take the RTNL. > > Can we use a replay mechanism, like we do in TC offloads and VxLAN/UDP > ports? The driver which lost config can ask for the rss contexts to be > "replayed" and the core will issue a series of ->create calls for all > existing entries? So I tried to prototype this, and unfortunately I ran into a problem. While we can replay the contexts alright, that still leaves the ntuple filters which we also want to restore, and which might depend on the contexts, so that can't be done until after context restore is done. So to do this we'd need to *also* have the core replay the filters, which would mean adding a filter array to the core similar to this context array. Now that's a thing that might be useful to have, enabling netlink dumps and so on, but it would considerably extend the scope of this work, in which case who knows if it'll ever be ready to merge :S Moreover, at least in the case of sfc (as usual, no idea about other NICs), the filter table on the device contains more than just ntuple filters; stuff like the device's unicast address list, PTP filters and representor filters, some of which are required for correct operation, live in the same table. When coming up after a reset, currently we: 1) restore RSS contexts 2) restore all filters (both driver-internal and ethtool ntuple) from the software shadow filter table into the hardware 3) bring up the NIC datapath. Instead we would need to: 1) restore all the 'internal' filters (which do not, and after these changes could not ever, use custom RSS contexts), and discard all ntuple filters from the software shadow filter table in the driver 2) request RSS+ntuple replay 3) bring up the NIC datapath 4) the replay workitem runs, and reinserts RSS contexts and ntuple filters. This would also mean that the default RSS context, which is used by the unicast/multicast address and Ethernet broadcast filters, could not ever migrate to be tracked in the XArray (otherwise a desirable simplification). tl;dr: none of this is impossible, but it'd be a lot of work just to get rid of one mutex, and would paint us into a bit of a corner. So I propose to stick with the locking scheme for now; I'll post v5 with the other review comments addressed; and if at some point in the future core tracking of ntuple filters gets added, we can revisit then whether moving to a replay scheme is viable. Okay? -ed
On Thu, 7 Dec 2023 14:15:30 +0000 Edward Cree wrote: > tl;dr: none of this is impossible, but it'd be a lot of work just to > get rid of one mutex, and would paint us into a bit of a corner. > So I propose to stick with the locking scheme for now; I'll post v5 > with the other review comments addressed; and if at some point in > the future core tracking of ntuple filters gets added, we can > revisit then whether moving to a replay scheme is viable. Okay? Sounds good, thanks for investigating.
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c8963bde9289..d15a21bd6f12 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1026,11 +1026,14 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, /** * struct ethtool_netdev_state - per-netdevice state for ethtool features * @rss_ctx: XArray of custom RSS contexts + * @rss_lock: Protects entries in @rss_ctx. May be taken from + * within RTNL. * @rss_ctx_max_id: maximum (exclusive) supported RSS context ID * @wol_enabled: Wake-on-LAN is enabled */ struct ethtool_netdev_state { struct xarray rss_ctx; + struct mutex rss_lock; u32 rss_ctx_max_id; u32 wol_enabled:1; }; diff --git a/net/core/dev.c b/net/core/dev.c index 69579d9cd7ba..c57456ed4be8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10074,6 +10074,7 @@ int register_netdevice(struct net_device *dev) /* rss ctx ID 0 is reserved for the default context, start from 1 */ xa_init_flags(&dev->ethtool->rss_ctx, XA_FLAGS_ALLOC1); + mutex_init(&dev->ethtool->rss_lock); spin_lock_init(&dev->addr_list_lock); netdev_set_addr_lockdep_class(dev); @@ -10882,6 +10883,7 @@ static void netdev_rss_contexts_free(struct net_device *dev) struct ethtool_rxfh_context *ctx; unsigned long context; + mutex_lock(&dev->ethtool->rss_lock); if (dev->ethtool_ops->create_rxfh_context || dev->ethtool_ops->set_rxfh_context) xa_for_each(&dev->ethtool->rss_ctx, context, ctx) { @@ -10903,6 +10905,7 @@ static void netdev_rss_contexts_free(struct net_device *dev) kfree(ctx); } xa_destroy(&dev->ethtool->rss_ctx); + mutex_unlock(&dev->ethtool->rss_lock); } /** @@ -11016,6 +11019,8 @@ void unregister_netdevice_many_notify(struct list_head *head, if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); + mutex_destroy(&dev->ethtool->rss_lock); + if (skb) rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 3920ddee3ee2..d21bbc92e6fc 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1258,6 +1258,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, u8 *rss_config; u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); bool create = false, delete = false; + bool locked = false; /* dev->ethtool->rss_lock taken */ if (!ops->get_rxnfc || !ops->set_rxfh) return -EOPNOTSUPP; @@ -1335,6 +1336,10 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } } + if (rxfh.rss_context) { + mutex_lock(&dev->ethtool->rss_lock); + locked = true; + } if (create) { if (delete) { ret = -EINVAL; @@ -1455,6 +1460,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } out: + if (locked) + mutex_unlock(&dev->ethtool->rss_lock); kfree(rss_config); return ret; }