Message ID | e5b4739cd6b2d3324b5c639fa9006f94fc03c255.1718862050.git.ecree.xilinx@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | ethtool: track custom RSS contexts in the core | expand |
On 6/20/24 07:47, edward.cree@amd.com wrote: > From: Edward Cree <ecree.xilinx@gmail.com> > > Since drivers are still choosing the context IDs, we have to force the > XArray to use the ID they've chosen rather than picking one ourselves, > and handle the case where they give us an ID that's already in use. Q: This is a new API, perhaps you could force adopters to convert to not choosing ID and switching to allocated one? > > Signed-off-by: Edward Cree <ecree.xilinx@gmail.com> > --- > include/linux/ethtool.h | 14 ++++++++ > net/ethtool/ioctl.c | 74 ++++++++++++++++++++++++++++++++++++++++- > 2 files changed, 87 insertions(+), 1 deletion(-) > > diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h > index 13c9c819de58..283ba4aff623 100644 > --- a/include/linux/ethtool.h > +++ b/include/linux/ethtool.h > @@ -199,6 +199,17 @@ static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx) > return (u8 *)(ethtool_rxfh_context_indir(ctx) + ctx->indir_size); > } > > +static inline size_t ethtool_rxfh_context_size(u32 indir_size, u32 key_size, > + u16 priv_size) > +{ > + size_t indir_bytes = array_size(indir_size, sizeof(u32)); > + size_t flex_len; > + > + flex_len = size_add(size_add(indir_bytes, key_size), > + ALIGN(priv_size, sizeof(u32))); > + return struct_size((struct ethtool_rxfh_context *)0, data, flex_len); struct_size_t > +} > + > /* declare a link mode bitmap */ > #define __ETHTOOL_DECLARE_LINK_MODE_MASK(name) \ > DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS) > @@ -709,6 +720,8 @@ struct ethtool_rxfh_param { > * contexts. > * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor > * RSS. > + * @rxfh_priv_size: size of the driver private data area the core should > + * allocate for an RSS context (in &struct ethtool_rxfh_context). > * @supported_coalesce_params: supported types of interrupt coalescing. > * @supported_ring_params: supported ring params. > * @get_drvinfo: Report driver/device information. Modern drivers no > @@ -892,6 +905,7 @@ struct ethtool_ops { > u32 cap_link_lanes_supported:1; > u32 cap_rss_ctx_supported:1; > u32 cap_rss_sym_xor_supported:1; > + u16 rxfh_priv_size; > u32 supported_coalesce_params; > u32 supported_ring_params; > void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); > diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c > index 998571f05deb..f879deb6ac4e 100644 > --- a/net/ethtool/ioctl.c > +++ b/net/ethtool/ioctl.c > @@ -1278,10 +1278,12 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, > const struct ethtool_ops *ops = dev->ethtool_ops; > u32 dev_indir_size = 0, dev_key_size = 0, i; > struct ethtool_rxfh_param rxfh_dev = {}; > + struct ethtool_rxfh_context *ctx = NULL; > struct netlink_ext_ack *extack = NULL; > struct ethtool_rxnfc rx_rings; > struct ethtool_rxfh rxfh; > u32 indir_bytes = 0; > + bool create = false; > u8 *rss_config; > int ret; > > @@ -1309,6 +1311,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, > if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && > !ops->cap_rss_sym_xor_supported) > return -EOPNOTSUPP; > + create = rxfh.rss_context == ETH_RXFH_CONTEXT_ALLOC; > > /* If either indir, hash key or function is valid, proceed further. > * Must request at least one change: indir size, hash key, function > @@ -1374,13 +1377,42 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, > } > } > > + if (create) { > + if (rxfh_dev.rss_delete) { > + ret = -EINVAL; > + goto out; > + } > + ctx = kzalloc(ethtool_rxfh_context_size(dev_indir_size, > + dev_key_size, > + ops->rxfh_priv_size), > + GFP_KERNEL_ACCOUNT); > + if (!ctx) { > + ret = -ENOMEM; > + goto out; > + } > + ctx->indir_size = dev_indir_size; > + ctx->key_size = dev_key_size; > + ctx->hfunc = rxfh.hfunc; > + ctx->input_xfrm = rxfh.input_xfrm; > + ctx->priv_size = ops->rxfh_priv_size; > + } else if (rxfh.rss_context) { > + ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context); > + if (!ctx) { > + ret = -ENOENT; > + goto out; > + } > + } > rxfh_dev.hfunc = rxfh.hfunc; > rxfh_dev.rss_context = rxfh.rss_context; > rxfh_dev.input_xfrm = rxfh.input_xfrm; > > ret = ops->set_rxfh(dev, &rxfh_dev, extack); > - if (ret) > + if (ret) { > + if (create) > + /* failed to create, free our new tracking entry */ > + kfree(ctx); > goto out; > + } > > if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context), > &rxfh_dev.rss_context, sizeof(rxfh_dev.rss_context))) > @@ -1393,6 +1425,46 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, > else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) > dev->priv_flags |= IFF_RXFH_CONFIGURED; > } > + /* Update rss_ctx tracking */ > + if (create) { > + /* Ideally this should happen before calling the driver, > + * so that we can fail more cleanly; but we don't have the > + * context ID until the driver picks it, so we have to > + * wait until after. > + */ > + if (WARN_ON(xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context))) { > + /* context ID reused, our tracking is screwed */ why no error code set? > + kfree(ctx); > + goto out; > + } > + /* Allocate the exact ID the driver gave us */ > + if (xa_is_err(xa_store(&dev->ethtool->rss_ctx, rxfh.rss_context, > + ctx, GFP_KERNEL))) { this is racy - assuming it is possible that context was set by other means (otherwisce you would not xa_load() a few lines above) - a concurrent writer could have done this just after you xa_load() call. so, instead of xa_load() + xa_store() just use xa_insert() anyway I feel the pain of trying to support both driver-selected IDs and your own > + kfree(ctx); > + goto out; > + } > + ctx->indir_configured = rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE; > + ctx->key_configured = !!rxfh.key_size; > + } > + if (rxfh_dev.rss_delete) { > + WARN_ON(xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context) != ctx); > + kfree(ctx); > + } else if (ctx) { > + if (rxfh_dev.indir) { > + for (i = 0; i < dev_indir_size; i++) > + ethtool_rxfh_context_indir(ctx)[i] = rxfh_dev.indir[i]; > + ctx->indir_configured = 1; > + } > + if (rxfh_dev.key) { > + memcpy(ethtool_rxfh_context_key(ctx), rxfh_dev.key, > + dev_key_size); > + ctx->key_configured = 1; > + } > + if (rxfh_dev.hfunc != ETH_RSS_HASH_NO_CHANGE) > + ctx->hfunc = rxfh_dev.hfunc; > + if (rxfh_dev.input_xfrm != RXH_XFRM_NO_CHANGE) > + ctx->input_xfrm = rxfh_dev.input_xfrm; > + } > > out: > kfree(rss_config); >
On 20/06/2024 07:32, Przemek Kitszel wrote: > On 6/20/24 07:47, edward.cree@amd.com wrote: >> From: Edward Cree <ecree.xilinx@gmail.com> >> >> Since drivers are still choosing the context IDs, we have to force the >> XArray to use the ID they've chosen rather than picking one ourselves, >> and handle the case where they give us an ID that's already in use. > > Q: This is a new API, perhaps you could force adopters to convert to > not choosing ID and switching to allocated one? Yes, that's exactly what the next patch (#4) does, when it introduces the new API.
On 6/20/24 08:37, Edward Cree wrote: > On 20/06/2024 07:32, Przemek Kitszel wrote: >> On 6/20/24 07:47, edward.cree@amd.com wrote: >>> From: Edward Cree <ecree.xilinx@gmail.com> >>> >>> Since drivers are still choosing the context IDs, we have to force the >>> XArray to use the ID they've chosen rather than picking one ourselves, >>> and handle the case where they give us an ID that's already in use. >> >> Q: This is a new API, perhaps you could force adopters to convert to >> not choosing ID and switching to allocated one? > > Yes, that's exactly what the next patch (#4) does, when it introduces > the new API. Thanks, I noticed it only after reading this patch. I know that there are is v7 already, but I don't know if you just missed my other comments to this v6 patch, or they are not relevant after you answering the first question? (Code is not removed in subsequent patch, so I guess you just missed).
On 25/06/2024 08:17, Przemek Kitszel wrote: > I know that there are is v7 already, but I don't know if you just missed > my other comments to this v6 patch, or they are not relevant after you > answering the first question? (Code is not removed in subsequent patch, > so I guess you just missed). Didn't spot them, sorry. Will look through and address.
On 20/06/2024 07:32, Przemek Kitszel wrote: > On 6/20/24 07:47, edward.cree@amd.com wrote: >> + return struct_size((struct ethtool_rxfh_context *)0, data, flex_len); > > struct_size_t Yup, will do, thanks for the suggestion. Don't think that existed yet when I wrote v1 :-D >> + /* Update rss_ctx tracking */ >> + if (create) { >> + /* Ideally this should happen before calling the driver, >> + * so that we can fail more cleanly; but we don't have the >> + * context ID until the driver picks it, so we have to >> + * wait until after. >> + */ >> + if (WARN_ON(xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context))) { >> + /* context ID reused, our tracking is screwed */ > > why no error code set? Because at this point the driver *has* created the context, it's in the hardware. If we wanted to return failure we'd have to call the driver again to delete it, and that would still leave an ugly case where that call fails. > >> + kfree(ctx); >> + goto out; >> + } >> + /* Allocate the exact ID the driver gave us */ >> + if (xa_is_err(xa_store(&dev->ethtool->rss_ctx, rxfh.rss_context, >> + ctx, GFP_KERNEL))) { > > this is racy - assuming it is possible that context was set by other > means (otherwisce you would not xa_load() a few lines above) - > a concurrent writer could have done this just after you xa_load() call. I don't expect a concurrent writer - this is all under RTNL. The xa_load() is there in case we create two contexts consecutively and the driver gives us the same ID both times. > so, instead of xa_load() + xa_store() just use xa_insert() The reason for splitting it up is for the WARN_ON on the xa_load(). I guess with xa_insert() it would have to be WARN_ON(xa_insert() == -EBUSY)? > anyway I feel the pain of trying to support both driver-selected IDs > and your own
On 6/25/24 15:39, Edward Cree wrote: > On 20/06/2024 07:32, Przemek Kitszel wrote: >> On 6/20/24 07:47, edward.cree@amd.com wrote: >>> + return struct_size((struct ethtool_rxfh_context *)0, data, flex_len); >> >> struct_size_t > > Yup, will do, thanks for the suggestion. > Don't think that existed yet when I wrote v1 :-D yeah, but it's just a nitpick ATM > >>> + /* Update rss_ctx tracking */ >>> + if (create) { >>> + /* Ideally this should happen before calling the driver, >>> + * so that we can fail more cleanly; but we don't have the >>> + * context ID until the driver picks it, so we have to >>> + * wait until after. >>> + */ >>> + if (WARN_ON(xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context))) { >>> + /* context ID reused, our tracking is screwed */ >> >> why no error code set? > > Because at this point the driver *has* created the context, it's > in the hardware. If we wanted to return failure we'd have to > call the driver again to delete it, and that would still leave > an ugly case where that call fails. driver is creating both HW context and ID at the same time, after you call it from ethtool, eh :( then my only concern is why do we want to keep old context instead of update? (my only and last concern for this series by now) say dumb driver always says "ctx=1" because it does not now better, but wants to update the context > >> >>> + kfree(ctx); >>> + goto out; >>> + } >>> + /* Allocate the exact ID the driver gave us */ >>> + if (xa_is_err(xa_store(&dev->ethtool->rss_ctx, rxfh.rss_context, >>> + ctx, GFP_KERNEL))) { >> >> this is racy - assuming it is possible that context was set by other >> means (otherwisce you would not xa_load() a few lines above) - >> a concurrent writer could have done this just after you xa_load() call. > > I don't expect a concurrent writer - this is all under RTNL. > The xa_load() is there in case we create two contexts > consecutively and the driver gives us the same ID both times. thanks, makes sense with the other part of explanation :) > >> so, instead of xa_load() + xa_store() just use xa_insert() > > The reason for splitting it up is for the WARN_ON on the > xa_load(). I guess with xa_insert() it would have to be > WARN_ON(xa_insert() == -EBUSY)? you need to handle both EBUSY and ENOMEM, both of which you are handling by separate calls right now, but it is just stylistics at this point > >> anyway I feel the pain of trying to support both driver-selected IDs >> and your own
On 26/06/2024 10:05, Przemek Kitszel wrote: > On 6/25/24 15:39, Edward Cree wrote: >> On 20/06/2024 07:32, Przemek Kitszel wrote: >>> why no error code set? >> >> Because at this point the driver *has* created the context, it's >> in the hardware. If we wanted to return failure we'd have to >> call the driver again to delete it, and that would still leave >> an ugly case where that call fails. > > driver is creating both HW context and ID at the same time, after > you call it from ethtool, eh :( > > then my only concern is why do we want to keep old context instead of > update? (my only and last concern for this series by now) > say dumb driver always says "ctx=1" because it does not now better, > but wants to update the context Tbh I'm not sure there's a clear case either way, if driver is screwing up we don't know why or how. The old context could still be present too for all we know. So my preference is to say "we don't know what happened, let's just not touch the xarray at all". In any case the WARN_ON should hopefully quickly catch any drivers that are hitting this, and going forward new drivers using this API shouldn't get added. If you still feel strongly this should be changed, please elaborate further on the reasoning.
On 6/27/24 16:24, Edward Cree wrote: > On 26/06/2024 10:05, Przemek Kitszel wrote: >> On 6/25/24 15:39, Edward Cree wrote: >>> On 20/06/2024 07:32, Przemek Kitszel wrote: >>>> why no error code set? >>> >>> Because at this point the driver *has* created the context, it's >>> in the hardware. If we wanted to return failure we'd have to >>> call the driver again to delete it, and that would still leave >>> an ugly case where that call fails. >> >> driver is creating both HW context and ID at the same time, after >> you call it from ethtool, eh :( >> >> then my only concern is why do we want to keep old context instead of >> update? (my only and last concern for this series by now) >> say dumb driver always says "ctx=1" because it does not now better, >> but wants to update the context > > Tbh I'm not sure there's a clear case either way, if driver is > screwing up we don't know why or how. The old context could > still be present too for all we know. So my preference is to > say "we don't know what happened, let's just not touch the > xarray at all". > In any case the WARN_ON should hopefully quickly catch any > drivers that are hitting this, and going forward new drivers > using this API shouldn't get added. > > If you still feel strongly this should be changed, please > elaborate further on the reasoning. Thanks, it makes sense as currently in the code, works for me! I'll review v8
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 13c9c819de58..283ba4aff623 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -199,6 +199,17 @@ static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx) return (u8 *)(ethtool_rxfh_context_indir(ctx) + ctx->indir_size); } +static inline size_t ethtool_rxfh_context_size(u32 indir_size, u32 key_size, + u16 priv_size) +{ + size_t indir_bytes = array_size(indir_size, sizeof(u32)); + size_t flex_len; + + flex_len = size_add(size_add(indir_bytes, key_size), + ALIGN(priv_size, sizeof(u32))); + return struct_size((struct ethtool_rxfh_context *)0, data, flex_len); +} + /* declare a link mode bitmap */ #define __ETHTOOL_DECLARE_LINK_MODE_MASK(name) \ DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS) @@ -709,6 +720,8 @@ struct ethtool_rxfh_param { * contexts. * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor * RSS. + * @rxfh_priv_size: size of the driver private data area the core should + * allocate for an RSS context (in &struct ethtool_rxfh_context). * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Modern drivers no @@ -892,6 +905,7 @@ struct ethtool_ops { u32 cap_link_lanes_supported:1; u32 cap_rss_ctx_supported:1; u32 cap_rss_sym_xor_supported:1; + u16 rxfh_priv_size; u32 supported_coalesce_params; u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 998571f05deb..f879deb6ac4e 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1278,10 +1278,12 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; u32 dev_indir_size = 0, dev_key_size = 0, i; struct ethtool_rxfh_param rxfh_dev = {}; + struct ethtool_rxfh_context *ctx = NULL; struct netlink_ext_ack *extack = NULL; struct ethtool_rxnfc rx_rings; struct ethtool_rxfh rxfh; u32 indir_bytes = 0; + bool create = false; u8 *rss_config; int ret; @@ -1309,6 +1311,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && !ops->cap_rss_sym_xor_supported) return -EOPNOTSUPP; + create = rxfh.rss_context == ETH_RXFH_CONTEXT_ALLOC; /* If either indir, hash key or function is valid, proceed further. * Must request at least one change: indir size, hash key, function @@ -1374,13 +1377,42 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } } + if (create) { + if (rxfh_dev.rss_delete) { + ret = -EINVAL; + goto out; + } + ctx = kzalloc(ethtool_rxfh_context_size(dev_indir_size, + dev_key_size, + ops->rxfh_priv_size), + GFP_KERNEL_ACCOUNT); + if (!ctx) { + ret = -ENOMEM; + goto out; + } + ctx->indir_size = dev_indir_size; + ctx->key_size = dev_key_size; + ctx->hfunc = rxfh.hfunc; + ctx->input_xfrm = rxfh.input_xfrm; + ctx->priv_size = ops->rxfh_priv_size; + } else if (rxfh.rss_context) { + ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context); + if (!ctx) { + ret = -ENOENT; + goto out; + } + } rxfh_dev.hfunc = rxfh.hfunc; rxfh_dev.rss_context = rxfh.rss_context; rxfh_dev.input_xfrm = rxfh.input_xfrm; ret = ops->set_rxfh(dev, &rxfh_dev, extack); - if (ret) + if (ret) { + if (create) + /* failed to create, free our new tracking entry */ + kfree(ctx); goto out; + } if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context), &rxfh_dev.rss_context, sizeof(rxfh_dev.rss_context))) @@ -1393,6 +1425,46 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) dev->priv_flags |= IFF_RXFH_CONFIGURED; } + /* Update rss_ctx tracking */ + if (create) { + /* Ideally this should happen before calling the driver, + * so that we can fail more cleanly; but we don't have the + * context ID until the driver picks it, so we have to + * wait until after. + */ + if (WARN_ON(xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context))) { + /* context ID reused, our tracking is screwed */ + kfree(ctx); + goto out; + } + /* Allocate the exact ID the driver gave us */ + if (xa_is_err(xa_store(&dev->ethtool->rss_ctx, rxfh.rss_context, + ctx, GFP_KERNEL))) { + kfree(ctx); + goto out; + } + ctx->indir_configured = rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE; + ctx->key_configured = !!rxfh.key_size; + } + if (rxfh_dev.rss_delete) { + WARN_ON(xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context) != ctx); + kfree(ctx); + } else if (ctx) { + if (rxfh_dev.indir) { + for (i = 0; i < dev_indir_size; i++) + ethtool_rxfh_context_indir(ctx)[i] = rxfh_dev.indir[i]; + ctx->indir_configured = 1; + } + if (rxfh_dev.key) { + memcpy(ethtool_rxfh_context_key(ctx), rxfh_dev.key, + dev_key_size); + ctx->key_configured = 1; + } + if (rxfh_dev.hfunc != ETH_RSS_HASH_NO_CHANGE) + ctx->hfunc = rxfh_dev.hfunc; + if (rxfh_dev.input_xfrm != RXH_XFRM_NO_CHANGE) + ctx->input_xfrm = rxfh_dev.input_xfrm; + } out: kfree(rss_config);