Message ID | 0-v1-a5ead4a0c19d+c3a-cma_list_head_jgg@nvidia.com (mailing list archive) |
---|---|
State | Accepted |
Delegated to: | Jason Gunthorpe |
Headers | show |
Series | RDMA/cma: Split apart the multiple uses of the same list heads | expand |
On 9/16/2021 12:25 AM, Jason Gunthorpe wrote: > External email: Use caution opening links or attachments > > > Two list heads in the rdma_id_private are being used for multiple > purposes, to save a few bytes of memory. Give the different purposes > different names and union the memory that is clearly exclusive. > > list splits into device_item and listen_any_item. device_item is threaded > onto the cma_device's list and listen_any goes onto the > listen_any_list. IDs doing any listen cannot have devices. > > listen_list splits into listen_item and listen_list. listen_list is on the > parent listen any rdma_id_private and listen_item is on child listen that > is bound to a specific cma_dev. > > Which name should be used in which case depends on the state and other > factors of the rdma_id_private. Remap all the confusing references to make > sense with the new names, so at least there is some hope of matching the > necessary preconditions with each access. > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > --- > drivers/infiniband/core/cma.c | 34 ++++++++++++++++-------------- > drivers/infiniband/core/cma_priv.h | 11 ++++++++-- > 2 files changed, 27 insertions(+), 18 deletions(-) > > diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c > index 5aa58897965df4..f671771a474288 100644 > --- a/drivers/infiniband/core/cma.c > +++ b/drivers/infiniband/core/cma.c > @@ -453,7 +453,7 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv, > id_priv->id.device = cma_dev->device; > id_priv->id.route.addr.dev_addr.transport = > rdma_node_get_transport(cma_dev->device->node_type); > - list_add_tail(&id_priv->list, &cma_dev->id_list); > + list_add_tail(&id_priv->device_item, &cma_dev->id_list); > > trace_cm_id_attach(id_priv, cma_dev->device); > } > @@ -470,7 +470,7 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv, > static void cma_release_dev(struct rdma_id_private *id_priv) > { > mutex_lock(&lock); > - list_del(&id_priv->list); > + list_del_init(&id_priv->device_item); > cma_dev_put(id_priv->cma_dev); > id_priv->cma_dev = NULL; > id_priv->id.device = NULL; > @@ -854,6 +854,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler, > init_completion(&id_priv->comp); > refcount_set(&id_priv->refcount, 1); > mutex_init(&id_priv->handler_mutex); > + INIT_LIST_HEAD(&id_priv->device_item); > INIT_LIST_HEAD(&id_priv->listen_list); > INIT_LIST_HEAD(&id_priv->mc_list); > get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); > @@ -1647,7 +1648,7 @@ static struct rdma_id_private *cma_find_listener( > return id_priv; > list_for_each_entry(id_priv_dev, > &id_priv->listen_list, > - listen_list) { > + listen_item) { > if (id_priv_dev->id.device == cm_id->device && > cma_match_net_dev(&id_priv_dev->id, > net_dev, req)) > @@ -1756,14 +1757,15 @@ static void _cma_cancel_listens(struct rdma_id_private *id_priv) > * Remove from listen_any_list to prevent added devices from spawning > * additional listen requests. > */ > - list_del(&id_priv->list); > + list_del_init(&id_priv->listen_any_item); > > while (!list_empty(&id_priv->listen_list)) { > - dev_id_priv = list_entry(id_priv->listen_list.next, > - struct rdma_id_private, listen_list); > + dev_id_priv = > + list_first_entry(&id_priv->listen_list, > + struct rdma_id_private, listen_item); > /* sync with device removal to avoid duplicate destruction */ > - list_del_init(&dev_id_priv->list); > - list_del(&dev_id_priv->listen_list); > + list_del_init(&dev_id_priv->device_item); > + list_del_init(&dev_id_priv->listen_item); > mutex_unlock(&lock); > > rdma_destroy_id(&dev_id_priv->id); > @@ -2556,7 +2558,7 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv, > ret = rdma_listen(&dev_id_priv->id, id_priv->backlog); > if (ret) > goto err_listen; > - list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); > + list_add_tail(&dev_id_priv->listen_item, &id_priv->listen_list); > return 0; > err_listen: > /* Caller must destroy this after releasing lock */ > @@ -2572,13 +2574,13 @@ static int cma_listen_on_all(struct rdma_id_private *id_priv) > int ret; > > mutex_lock(&lock); > - list_add_tail(&id_priv->list, &listen_any_list); > + list_add_tail(&id_priv->listen_any_item, &listen_any_list); > list_for_each_entry(cma_dev, &dev_list, list) { > ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); > if (ret) { > /* Prevent racing with cma_process_remove() */ > if (to_destroy) > - list_del_init(&to_destroy->list); > + list_del_init(&to_destroy->device_item); > goto err_listen; > } > } > @@ -4868,7 +4870,7 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event, > > mutex_lock(&lock); > list_for_each_entry(cma_dev, &dev_list, list) > - list_for_each_entry(id_priv, &cma_dev->id_list, list) { > + list_for_each_entry(id_priv, &cma_dev->id_list, device_item) { > ret = cma_netdev_change(ndev, id_priv); > if (ret) > goto out; > @@ -4928,10 +4930,10 @@ static void cma_process_remove(struct cma_device *cma_dev) > mutex_lock(&lock); > while (!list_empty(&cma_dev->id_list)) { > struct rdma_id_private *id_priv = list_first_entry( > - &cma_dev->id_list, struct rdma_id_private, list); > + &cma_dev->id_list, struct rdma_id_private, device_item); > > - list_del(&id_priv->listen_list); > - list_del_init(&id_priv->list); > + list_del_init(&id_priv->listen_item); Should it still be list_del(&id_priv->listen_list); as it isn't dev_id_priv?
On Thu, Sep 16, 2021 at 01:11:14PM +0800, Mark Zhang wrote: > > @@ -4928,10 +4930,10 @@ static void cma_process_remove(struct cma_device *cma_dev) > > mutex_lock(&lock); > > while (!list_empty(&cma_dev->id_list)) { > > struct rdma_id_private *id_priv = list_first_entry( > > - &cma_dev->id_list, struct rdma_id_private, list); > > + &cma_dev->id_list, struct rdma_id_private, device_item); > > > > - list_del(&id_priv->listen_list); > > - list_del_init(&id_priv->list); > > + list_del_init(&id_priv->listen_item); > > Should it still be > list_del(&id_priv->listen_list); > as it isn't dev_id_priv? Yes, probably should stay here, but it isn't entirely sane The next code block must trigger the list_del or the wait_for_completion() below will block forever. The whole RDMA_CM_EVENT_DEVICE_REMOVAL bit is kind of insane and needs some cleaning. For instance I think it is a bug if any ULP doesn't return 1 from the event. Jason
On Thu, Sep 16, 2021 at 01:11:14PM +0800, Mark Zhang wrote: > > @@ -4928,10 +4930,10 @@ static void cma_process_remove(struct cma_device *cma_dev) > > mutex_lock(&lock); > > while (!list_empty(&cma_dev->id_list)) { > > struct rdma_id_private *id_priv = list_first_entry( > > - &cma_dev->id_list, struct rdma_id_private, list); > > + &cma_dev->id_list, struct rdma_id_private, device_item); > > > > - list_del(&id_priv->listen_list); > > - list_del_init(&id_priv->list); > > + list_del_init(&id_priv->listen_item); > > Should it still be > list_del(&id_priv->listen_list); > as it isn't dev_id_priv? Actually I misunderstood your question Yes, it should be listen_item - it makes no sense to delete a list - that would just randomly remove the first item. At this point we are iterating over a device list and the rules have device id_privs using the list_item side of the union. Only IDs on the listen_any_list use the listen_list side. Jason
On Wed, Sep 15, 2021 at 01:25:19PM -0300, Jason Gunthorpe wrote: > Two list heads in the rdma_id_private are being used for multiple > purposes, to save a few bytes of memory. Give the different purposes > different names and union the memory that is clearly exclusive. > > list splits into device_item and listen_any_item. device_item is threaded > onto the cma_device's list and listen_any goes onto the > listen_any_list. IDs doing any listen cannot have devices. > > listen_list splits into listen_item and listen_list. listen_list is on the > parent listen any rdma_id_private and listen_item is on child listen that > is bound to a specific cma_dev. > > Which name should be used in which case depends on the state and other > factors of the rdma_id_private. Remap all the confusing references to make > sense with the new names, so at least there is some hope of matching the > necessary preconditions with each access. > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > --- > drivers/infiniband/core/cma.c | 34 ++++++++++++++++-------------- > drivers/infiniband/core/cma_priv.h | 11 ++++++++-- > 2 files changed, 27 insertions(+), 18 deletions(-) Applied to for-next Jason
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 5aa58897965df4..f671771a474288 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -453,7 +453,7 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv, id_priv->id.device = cma_dev->device; id_priv->id.route.addr.dev_addr.transport = rdma_node_get_transport(cma_dev->device->node_type); - list_add_tail(&id_priv->list, &cma_dev->id_list); + list_add_tail(&id_priv->device_item, &cma_dev->id_list); trace_cm_id_attach(id_priv, cma_dev->device); } @@ -470,7 +470,7 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv, static void cma_release_dev(struct rdma_id_private *id_priv) { mutex_lock(&lock); - list_del(&id_priv->list); + list_del_init(&id_priv->device_item); cma_dev_put(id_priv->cma_dev); id_priv->cma_dev = NULL; id_priv->id.device = NULL; @@ -854,6 +854,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler, init_completion(&id_priv->comp); refcount_set(&id_priv->refcount, 1); mutex_init(&id_priv->handler_mutex); + INIT_LIST_HEAD(&id_priv->device_item); INIT_LIST_HEAD(&id_priv->listen_list); INIT_LIST_HEAD(&id_priv->mc_list); get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); @@ -1647,7 +1648,7 @@ static struct rdma_id_private *cma_find_listener( return id_priv; list_for_each_entry(id_priv_dev, &id_priv->listen_list, - listen_list) { + listen_item) { if (id_priv_dev->id.device == cm_id->device && cma_match_net_dev(&id_priv_dev->id, net_dev, req)) @@ -1756,14 +1757,15 @@ static void _cma_cancel_listens(struct rdma_id_private *id_priv) * Remove from listen_any_list to prevent added devices from spawning * additional listen requests. */ - list_del(&id_priv->list); + list_del_init(&id_priv->listen_any_item); while (!list_empty(&id_priv->listen_list)) { - dev_id_priv = list_entry(id_priv->listen_list.next, - struct rdma_id_private, listen_list); + dev_id_priv = + list_first_entry(&id_priv->listen_list, + struct rdma_id_private, listen_item); /* sync with device removal to avoid duplicate destruction */ - list_del_init(&dev_id_priv->list); - list_del(&dev_id_priv->listen_list); + list_del_init(&dev_id_priv->device_item); + list_del_init(&dev_id_priv->listen_item); mutex_unlock(&lock); rdma_destroy_id(&dev_id_priv->id); @@ -2556,7 +2558,7 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv, ret = rdma_listen(&dev_id_priv->id, id_priv->backlog); if (ret) goto err_listen; - list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); + list_add_tail(&dev_id_priv->listen_item, &id_priv->listen_list); return 0; err_listen: /* Caller must destroy this after releasing lock */ @@ -2572,13 +2574,13 @@ static int cma_listen_on_all(struct rdma_id_private *id_priv) int ret; mutex_lock(&lock); - list_add_tail(&id_priv->list, &listen_any_list); + list_add_tail(&id_priv->listen_any_item, &listen_any_list); list_for_each_entry(cma_dev, &dev_list, list) { ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); if (ret) { /* Prevent racing with cma_process_remove() */ if (to_destroy) - list_del_init(&to_destroy->list); + list_del_init(&to_destroy->device_item); goto err_listen; } } @@ -4868,7 +4870,7 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event, mutex_lock(&lock); list_for_each_entry(cma_dev, &dev_list, list) - list_for_each_entry(id_priv, &cma_dev->id_list, list) { + list_for_each_entry(id_priv, &cma_dev->id_list, device_item) { ret = cma_netdev_change(ndev, id_priv); if (ret) goto out; @@ -4928,10 +4930,10 @@ static void cma_process_remove(struct cma_device *cma_dev) mutex_lock(&lock); while (!list_empty(&cma_dev->id_list)) { struct rdma_id_private *id_priv = list_first_entry( - &cma_dev->id_list, struct rdma_id_private, list); + &cma_dev->id_list, struct rdma_id_private, device_item); - list_del(&id_priv->listen_list); - list_del_init(&id_priv->list); + list_del_init(&id_priv->listen_item); + list_del_init(&id_priv->device_item); cma_id_get(id_priv); mutex_unlock(&lock); @@ -5008,7 +5010,7 @@ static int cma_add_one(struct ib_device *device) mutex_lock(&lock); list_add_tail(&cma_dev->list, &dev_list); - list_for_each_entry(id_priv, &listen_any_list, list) { + list_for_each_entry(id_priv, &listen_any_list, listen_any_item) { ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); if (ret) goto free_listen; diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h index 5c463da9984536..666d8abdfe4e84 100644 --- a/drivers/infiniband/core/cma_priv.h +++ b/drivers/infiniband/core/cma_priv.h @@ -55,8 +55,15 @@ struct rdma_id_private { struct rdma_bind_list *bind_list; struct hlist_node node; - struct list_head list; /* listen_any_list or cma_device.list */ - struct list_head listen_list; /* per device listens */ + union { + struct list_head device_item; /* On cma_device->id_list */ + struct list_head listen_any_item; /* On listen_any_list */ + }; + union { + /* On rdma_id_private->listen_list */ + struct list_head listen_item; + struct list_head listen_list; + }; struct cma_device *cma_dev; struct list_head mc_list;
Two list heads in the rdma_id_private are being used for multiple purposes, to save a few bytes of memory. Give the different purposes different names and union the memory that is clearly exclusive. list splits into device_item and listen_any_item. device_item is threaded onto the cma_device's list and listen_any goes onto the listen_any_list. IDs doing any listen cannot have devices. listen_list splits into listen_item and listen_list. listen_list is on the parent listen any rdma_id_private and listen_item is on child listen that is bound to a specific cma_dev. Which name should be used in which case depends on the state and other factors of the rdma_id_private. Remap all the confusing references to make sense with the new names, so at least there is some hope of matching the necessary preconditions with each access. Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> --- drivers/infiniband/core/cma.c | 34 ++++++++++++++++-------------- drivers/infiniband/core/cma_priv.h | 11 ++++++++-- 2 files changed, 27 insertions(+), 18 deletions(-) base-commit: ca465e1f1f9b38fe916a36f7d80c5d25f2337c81