Message ID | 20240430010732.666512-2-dw@davidwei.uk (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | bnxt: implement queue api | expand |
On Mon, Apr 29, 2024 at 6:07 PM David Wei <dw@davidwei.uk> wrote: > > From: Mina Almasry <almasrymina@google.com> > > This API enables the net stack to reset the queues used for devmem TCP. > > Signed-off-by: Mina Almasry <almasrymina@google.com> > Signed-off-by: David Wei <dw@davidwei.uk> > --- > include/linux/netdevice.h | 3 +++ > include/net/netdev_queues.h | 27 +++++++++++++++++++++++++++ > 2 files changed, 30 insertions(+) > > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h > index f849e7d110ed..6a58ec73c5e8 100644 > --- a/include/linux/netdevice.h > +++ b/include/linux/netdevice.h > @@ -1957,6 +1957,7 @@ enum netdev_reg_state { > * @sysfs_rx_queue_group: Space for optional per-rx queue attributes > * @rtnl_link_ops: Rtnl_link_ops > * @stat_ops: Optional ops for queue-aware statistics > + * @queue_mgmt_ops: Optional ops for queue management > * > * @gso_max_size: Maximum size of generic segmentation offload > * @tso_max_size: Device (as in HW) limit on the max TSO request size > @@ -2340,6 +2341,8 @@ struct net_device { > > const struct netdev_stat_ops *stat_ops; > > + const struct netdev_queue_mgmt_ops *queue_mgmt_ops; > + > /* for setting kernel sock attribute on TCP connection setup */ > #define GSO_MAX_SEGS 65535u > #define GSO_LEGACY_MAX_SIZE 65536u > diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h > index 1ec408585373..337df0860ae6 100644 > --- a/include/net/netdev_queues.h > +++ b/include/net/netdev_queues.h > @@ -60,6 +60,33 @@ struct netdev_stat_ops { > struct netdev_queue_stats_tx *tx); > }; > > +/** > + * struct netdev_queue_mgmt_ops - netdev ops for queue management > + * > + * @ndo_queue_mem_alloc: Allocate memory for an RX queue. The memory returned > + * in the form of a void* can be passed to > + * ndo_queue_mem_free() for freeing or to ndo_queue_start > + * to create an RX queue with this memory. > + * > + * @ndo_queue_mem_free: Free memory from an RX queue. > + * > + * @ndo_queue_start: Start an RX queue at the specified index. > + * > + * @ndo_queue_stop: Stop the RX queue at the specified index. > + */ > +struct netdev_queue_mgmt_ops { > + void * (*ndo_queue_mem_alloc)(struct net_device *dev, > + int idx); > + void (*ndo_queue_mem_free)(struct net_device *dev, > + void *queue_mem); > + int (*ndo_queue_start)(struct net_device *dev, > + int idx, > + void *queue_mem); > + int (*ndo_queue_stop)(struct net_device *dev, > + int idx, > + void **out_queue_mem); > +}; Sorry, I think we raced a bit, we updated our interface definition based on your/Jakub's feedback to expose the size of the struct for core to allocate, so it looks like this for us now: +struct netdev_queue_mgmt_ops { + size_t ndo_queue_mem_size; + int (*ndo_queue_mem_alloc)(struct net_device *dev, + void *per_queue_mem, + int idx); + void (*ndo_queue_mem_free)(struct net_device *dev, + void *per_queue_mem); + int (*ndo_queue_start)(struct net_device *dev, + void *per_queue_mem, + int idx); + int (*ndo_queue_stop)(struct net_device *dev, + void *per_queue_mem, + int idx); +}; + The idea is that ndo_queue_mem_size is the size of the memory per_queue_mem points to. The rest of the functions are slightly modified to allow core to allocate the memory and pass in the pointer for the driver to fill in/us. I think Shailend is close to posting the patches, let us know if you see any issues.
On 2024-04-30 11:00 am, Mina Almasry wrote: > On Mon, Apr 29, 2024 at 6:07 PM David Wei <dw@davidwei.uk> wrote: >> >> From: Mina Almasry <almasrymina@google.com> >> >> This API enables the net stack to reset the queues used for devmem TCP. >> >> Signed-off-by: Mina Almasry <almasrymina@google.com> >> Signed-off-by: David Wei <dw@davidwei.uk> >> --- >> include/linux/netdevice.h | 3 +++ >> include/net/netdev_queues.h | 27 +++++++++++++++++++++++++++ >> 2 files changed, 30 insertions(+) >> >> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h >> index f849e7d110ed..6a58ec73c5e8 100644 >> --- a/include/linux/netdevice.h >> +++ b/include/linux/netdevice.h >> @@ -1957,6 +1957,7 @@ enum netdev_reg_state { >> * @sysfs_rx_queue_group: Space for optional per-rx queue attributes >> * @rtnl_link_ops: Rtnl_link_ops >> * @stat_ops: Optional ops for queue-aware statistics >> + * @queue_mgmt_ops: Optional ops for queue management >> * >> * @gso_max_size: Maximum size of generic segmentation offload >> * @tso_max_size: Device (as in HW) limit on the max TSO request size >> @@ -2340,6 +2341,8 @@ struct net_device { >> >> const struct netdev_stat_ops *stat_ops; >> >> + const struct netdev_queue_mgmt_ops *queue_mgmt_ops; >> + >> /* for setting kernel sock attribute on TCP connection setup */ >> #define GSO_MAX_SEGS 65535u >> #define GSO_LEGACY_MAX_SIZE 65536u >> diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h >> index 1ec408585373..337df0860ae6 100644 >> --- a/include/net/netdev_queues.h >> +++ b/include/net/netdev_queues.h >> @@ -60,6 +60,33 @@ struct netdev_stat_ops { >> struct netdev_queue_stats_tx *tx); >> }; >> >> +/** >> + * struct netdev_queue_mgmt_ops - netdev ops for queue management >> + * >> + * @ndo_queue_mem_alloc: Allocate memory for an RX queue. The memory returned >> + * in the form of a void* can be passed to >> + * ndo_queue_mem_free() for freeing or to ndo_queue_start >> + * to create an RX queue with this memory. >> + * >> + * @ndo_queue_mem_free: Free memory from an RX queue. >> + * >> + * @ndo_queue_start: Start an RX queue at the specified index. >> + * >> + * @ndo_queue_stop: Stop the RX queue at the specified index. >> + */ >> +struct netdev_queue_mgmt_ops { >> + void * (*ndo_queue_mem_alloc)(struct net_device *dev, >> + int idx); >> + void (*ndo_queue_mem_free)(struct net_device *dev, >> + void *queue_mem); >> + int (*ndo_queue_start)(struct net_device *dev, >> + int idx, >> + void *queue_mem); >> + int (*ndo_queue_stop)(struct net_device *dev, >> + int idx, >> + void **out_queue_mem); >> +}; > > Sorry, I think we raced a bit, we updated our interface definition > based on your/Jakub's feedback to expose the size of the struct for > core to allocate, so it looks like this for us now: > > +struct netdev_queue_mgmt_ops { > + size_t ndo_queue_mem_size; > + int (*ndo_queue_mem_alloc)(struct net_device *dev, > + void *per_queue_mem, > + int idx); > + void (*ndo_queue_mem_free)(struct net_device *dev, > + void *per_queue_mem); > + int (*ndo_queue_start)(struct net_device *dev, > + void *per_queue_mem, > + int idx); > + int (*ndo_queue_stop)(struct net_device *dev, > + void *per_queue_mem, > + int idx); > +}; > + > > The idea is that ndo_queue_mem_size is the size of the memory > per_queue_mem points to. Thanks, I'll update this. No race, I'm just working on the bnxt side at the same time because I need feedback from Broadcom. Hope you don't mind whichever one merges first. Full credit is given to you on the queue API + netdev queue reset function. > > The rest of the functions are slightly modified to allow core to > allocate the memory and pass in the pointer for the driver to fill > in/us. I think Shailend is close to posting the patches, let us know > if you see any issues. > Great, I'll take a look when it is posted.
On 2024-04-30 11:00 am, Mina Almasry wrote: > On Mon, Apr 29, 2024 at 6:07 PM David Wei <dw@davidwei.uk> wrote: >> >> From: Mina Almasry <almasrymina@google.com> >> >> This API enables the net stack to reset the queues used for devmem TCP. >> >> Signed-off-by: Mina Almasry <almasrymina@google.com> >> Signed-off-by: David Wei <dw@davidwei.uk> >> --- >> include/linux/netdevice.h | 3 +++ >> include/net/netdev_queues.h | 27 +++++++++++++++++++++++++++ >> 2 files changed, 30 insertions(+) >> >> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h >> index f849e7d110ed..6a58ec73c5e8 100644 >> --- a/include/linux/netdevice.h >> +++ b/include/linux/netdevice.h >> @@ -1957,6 +1957,7 @@ enum netdev_reg_state { >> * @sysfs_rx_queue_group: Space for optional per-rx queue attributes >> * @rtnl_link_ops: Rtnl_link_ops >> * @stat_ops: Optional ops for queue-aware statistics >> + * @queue_mgmt_ops: Optional ops for queue management >> * >> * @gso_max_size: Maximum size of generic segmentation offload >> * @tso_max_size: Device (as in HW) limit on the max TSO request size >> @@ -2340,6 +2341,8 @@ struct net_device { >> >> const struct netdev_stat_ops *stat_ops; >> >> + const struct netdev_queue_mgmt_ops *queue_mgmt_ops; >> + >> /* for setting kernel sock attribute on TCP connection setup */ >> #define GSO_MAX_SEGS 65535u >> #define GSO_LEGACY_MAX_SIZE 65536u >> diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h >> index 1ec408585373..337df0860ae6 100644 >> --- a/include/net/netdev_queues.h >> +++ b/include/net/netdev_queues.h >> @@ -60,6 +60,33 @@ struct netdev_stat_ops { >> struct netdev_queue_stats_tx *tx); >> }; >> >> +/** >> + * struct netdev_queue_mgmt_ops - netdev ops for queue management >> + * >> + * @ndo_queue_mem_alloc: Allocate memory for an RX queue. The memory returned >> + * in the form of a void* can be passed to >> + * ndo_queue_mem_free() for freeing or to ndo_queue_start >> + * to create an RX queue with this memory. >> + * >> + * @ndo_queue_mem_free: Free memory from an RX queue. >> + * >> + * @ndo_queue_start: Start an RX queue at the specified index. >> + * >> + * @ndo_queue_stop: Stop the RX queue at the specified index. >> + */ >> +struct netdev_queue_mgmt_ops { >> + void * (*ndo_queue_mem_alloc)(struct net_device *dev, >> + int idx); >> + void (*ndo_queue_mem_free)(struct net_device *dev, >> + void *queue_mem); >> + int (*ndo_queue_start)(struct net_device *dev, >> + int idx, >> + void *queue_mem); >> + int (*ndo_queue_stop)(struct net_device *dev, >> + int idx, >> + void **out_queue_mem); >> +}; > > Sorry, I think we raced a bit, we updated our interface definition > based on your/Jakub's feedback to expose the size of the struct for > core to allocate, so it looks like this for us now: > > +struct netdev_queue_mgmt_ops { > + size_t ndo_queue_mem_size; > + int (*ndo_queue_mem_alloc)(struct net_device *dev, > + void *per_queue_mem, > + int idx); > + void (*ndo_queue_mem_free)(struct net_device *dev, > + void *per_queue_mem); > + int (*ndo_queue_start)(struct net_device *dev, > + void *per_queue_mem, > + int idx); > + int (*ndo_queue_stop)(struct net_device *dev, > + void *per_queue_mem, > + int idx); > +}; > + > > The idea is that ndo_queue_mem_size is the size of the memory > per_queue_mem points to. > > The rest of the functions are slightly modified to allow core to > allocate the memory and pass in the pointer for the driver to fill > in/us. I think Shailend is close to posting the patches, let us know > if you see any issues. > Hmm. Thinking about this a bit more, are you having netdev core manage all of the queues, i.e. alloc/free during open()/stop()? Otherwise how can netdev core pass in the old queue mem into ndo_queue_stop(), and where is the queue mem stored? Or is it the new queue mem being passed into ndo_queue_stop()? My takeaway from the discussion on Shailend's last RFC is that for the first iteration we'll keep what we had before and have the driver alloc/free the qmem. Implementing this for bnxt felt pretty good as well.
On Wed, May 1, 2024 at 6:00 PM David Wei <dw@davidwei.uk> wrote: > > On 2024-04-30 11:00 am, Mina Almasry wrote: > > > > Sorry, I think we raced a bit, we updated our interface definition > > based on your/Jakub's feedback to expose the size of the struct for > > core to allocate, so it looks like this for us now: > > > > +struct netdev_queue_mgmt_ops { > > + size_t ndo_queue_mem_size; > > + int (*ndo_queue_mem_alloc)(struct net_device *dev, > > + void *per_queue_mem, > > + int idx); > > + void (*ndo_queue_mem_free)(struct net_device *dev, > > + void *per_queue_mem); > > + int (*ndo_queue_start)(struct net_device *dev, > > + void *per_queue_mem, > > + int idx); > > + int (*ndo_queue_stop)(struct net_device *dev, > > + void *per_queue_mem, > > + int idx); > > +}; > > + > > > > The idea is that ndo_queue_mem_size is the size of the memory > > per_queue_mem points to. > > Thanks, I'll update this. > > No race, I'm just working on the bnxt side at the same time because I > need feedback from Broadcom. Hope you don't mind whichever one merges > first. Full credit is given to you on the queue API + netdev queue reset > function. > Thanks! No concerns from me on what gets merged first. By 'raced' I just meant that we were incorporating your feedback while you were working on the older version :D
On Wed, May 1, 2024 at 7:44 PM David Wei <dw@davidwei.uk> wrote: > > On 2024-04-30 11:00 am, Mina Almasry wrote: > > > > Sorry, I think we raced a bit, we updated our interface definition > > based on your/Jakub's feedback to expose the size of the struct for > > core to allocate, so it looks like this for us now: > > > > +struct netdev_queue_mgmt_ops { > > + size_t ndo_queue_mem_size; > > + int (*ndo_queue_mem_alloc)(struct net_device *dev, > > + void *per_queue_mem, > > + int idx); > > + void (*ndo_queue_mem_free)(struct net_device *dev, > > + void *per_queue_mem); > > + int (*ndo_queue_start)(struct net_device *dev, > > + void *per_queue_mem, > > + int idx); > > + int (*ndo_queue_stop)(struct net_device *dev, > > + void *per_queue_mem, > > + int idx); > > +}; > > + > > > > The idea is that ndo_queue_mem_size is the size of the memory > > per_queue_mem points to. > > > > The rest of the functions are slightly modified to allow core to > > allocate the memory and pass in the pointer for the driver to fill > > in/us. I think Shailend is close to posting the patches, let us know > > if you see any issues. > > > > Hmm. Thinking about this a bit more, are you having netdev core manage > all of the queues, i.e. alloc/free during open()/stop()? No, we do not modify open()/stop(). I think in the future that is the plan. However when it comes to the future direction of queue management I think that's more Jakub's purview so I'm leaving it up to him. For devmem TCP I'm just implementing what we need, and I'm trusting that it is aligned with the general direction Jakub wants to take things in eventually as he hasn't (yet) complained in the reviews :D > Otherwise how > can netdev core pass in the old queue mem into ndo_queue_stop(), and > where is the queue mem stored? > What we have in mind, is: 1. driver declares how much memory it needs in ndo_queue_mem_size 2. Core kzalloc's that much memory. 3. Core passes a pointer to that memory to ndo_queue_stop. The driver fills in the memory and stops the queue. Then, core will have a pointer to the 'old queue mem'. Core can then free that memory if allocing/starting a new queue succeeded, or do a ndo_queue_start(old_mem) if it wishes to start a new queue. We do something similar for ndo_queue_mem_alloc: 1. driver declares how much memory it needs in ndo_queue_mem_size 2. Core kzallocs's that much memory. 3. Core passes that memory to ndo_queue_mem_alloc. The driver allocs the resources for a new queue, attaches the resources to the passed pointer, and returns. We can also discuss over a public netdev bbb call if face to face time makes it easier to align quickly. > Or is it the new queue mem being passed into ndo_queue_stop()? > > My takeaway from the discussion on Shailend's last RFC is that for the > first iteration we'll keep what we had before and have the driver > alloc/free the qmem. Implementing this for bnxt felt pretty good as > well. We can certainly switch back to what we had before, and remove the ndo_queue_mem_size we added if you've changed your mind, not a big deal.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f849e7d110ed..6a58ec73c5e8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1957,6 +1957,7 @@ enum netdev_reg_state { * @sysfs_rx_queue_group: Space for optional per-rx queue attributes * @rtnl_link_ops: Rtnl_link_ops * @stat_ops: Optional ops for queue-aware statistics + * @queue_mgmt_ops: Optional ops for queue management * * @gso_max_size: Maximum size of generic segmentation offload * @tso_max_size: Device (as in HW) limit on the max TSO request size @@ -2340,6 +2341,8 @@ struct net_device { const struct netdev_stat_ops *stat_ops; + const struct netdev_queue_mgmt_ops *queue_mgmt_ops; + /* for setting kernel sock attribute on TCP connection setup */ #define GSO_MAX_SEGS 65535u #define GSO_LEGACY_MAX_SIZE 65536u diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 1ec408585373..337df0860ae6 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -60,6 +60,33 @@ struct netdev_stat_ops { struct netdev_queue_stats_tx *tx); }; +/** + * struct netdev_queue_mgmt_ops - netdev ops for queue management + * + * @ndo_queue_mem_alloc: Allocate memory for an RX queue. The memory returned + * in the form of a void* can be passed to + * ndo_queue_mem_free() for freeing or to ndo_queue_start + * to create an RX queue with this memory. + * + * @ndo_queue_mem_free: Free memory from an RX queue. + * + * @ndo_queue_start: Start an RX queue at the specified index. + * + * @ndo_queue_stop: Stop the RX queue at the specified index. + */ +struct netdev_queue_mgmt_ops { + void * (*ndo_queue_mem_alloc)(struct net_device *dev, + int idx); + void (*ndo_queue_mem_free)(struct net_device *dev, + void *queue_mem); + int (*ndo_queue_start)(struct net_device *dev, + int idx, + void *queue_mem); + int (*ndo_queue_stop)(struct net_device *dev, + int idx, + void **out_queue_mem); +}; + /** * DOC: Lockless queue stopping / waking helpers. *