Message ID | 44253f7508b21eb2caefea3980c2bc072869116c.1718553901.git.leon@kernel.org (mailing list archive) |
---|---|
State | Accepted |
Headers | show |
Series | Multi-plane support for mlx5 | expand |
在 2024/6/17 0:08, Leon Romanovsky 写道: > From: Mark Zhang <markzhang@nvidia.com> > > This patch adds 2 APIs, as well as driver operations to support adding > and deleting an IB sub device, which provides part of functionalities > of it's parent. > > A sub device has a type; for a sub device with type "SMI", it provides > the smi capability through umad for its parent, meaning uverb is not > supported. > > A sub device cannot live without a parent. So when a parent is > released, all it's sub devices are released as well. > > Signed-off-by: Mark Zhang <markzhang@nvidia.com> > Signed-off-by: Leon Romanovsky <leonro@nvidia.com> > --- > drivers/infiniband/core/device.c | 68 +++++++++++++++++++++++++++ > drivers/infiniband/core/uverbs_main.c | 3 +- > include/rdma/ib_verbs.h | 43 +++++++++++++++++ > include/uapi/rdma/rdma_netlink.h | 5 ++ > 4 files changed, 118 insertions(+), 1 deletion(-) > > diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c > index 55aa7aa32d4a..8547cab50b23 100644 > --- a/drivers/infiniband/core/device.c > +++ b/drivers/infiniband/core/device.c > @@ -641,6 +641,11 @@ struct ib_device *_ib_alloc_device(size_t size) > BIT_ULL(IB_USER_VERBS_CMD_REG_MR) | > BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) | > BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ); > + > + mutex_init(&device->subdev_lock); > + INIT_LIST_HEAD(&device->subdev_list_head); > + INIT_LIST_HEAD(&device->subdev_list); > + > return device; > } > EXPORT_SYMBOL(_ib_alloc_device); > @@ -1461,6 +1466,18 @@ EXPORT_SYMBOL(ib_register_device); > /* Callers must hold a get on the device. */ > static void __ib_unregister_device(struct ib_device *ib_dev) > { > + struct ib_device *sub, *tmp; > + > + mutex_lock(&ib_dev->subdev_lock); > + list_for_each_entry_safe_reverse(sub, tmp, > + &ib_dev->subdev_list_head, > + subdev_list) { > + list_del(&sub->subdev_list); > + ib_dev->ops.del_sub_dev(sub); > + ib_device_put(ib_dev); > + } > + mutex_unlock(&ib_dev->subdev_lock); > + > /* > * We have a registration lock so that all the calls to unregister are > * fully fenced, once any unregister returns the device is truely > @@ -2597,6 +2614,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) > ops->uverbs_no_driver_id_binding; > > SET_DEVICE_OP(dev_ops, add_gid); > + SET_DEVICE_OP(dev_ops, add_sub_dev); > SET_DEVICE_OP(dev_ops, advise_mr); > SET_DEVICE_OP(dev_ops, alloc_dm); > SET_DEVICE_OP(dev_ops, alloc_hw_device_stats); > @@ -2631,6 +2649,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) > SET_DEVICE_OP(dev_ops, dealloc_ucontext); > SET_DEVICE_OP(dev_ops, dealloc_xrcd); > SET_DEVICE_OP(dev_ops, del_gid); > + SET_DEVICE_OP(dev_ops, del_sub_dev); > SET_DEVICE_OP(dev_ops, dereg_mr); > SET_DEVICE_OP(dev_ops, destroy_ah); > SET_DEVICE_OP(dev_ops, destroy_counters); > @@ -2727,6 +2746,55 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) > } > EXPORT_SYMBOL(ib_set_device_ops); > > +int ib_add_sub_device(struct ib_device *parent, > + enum rdma_nl_dev_type type, > + const char *name) > +{ > + struct ib_device *sub; > + int ret = 0; > + > + if (!parent->ops.add_sub_dev || !parent->ops.del_sub_dev) > + return -EOPNOTSUPP; > + > + if (!ib_device_try_get(parent)) > + return -EINVAL; > + > + sub = parent->ops.add_sub_dev(parent, type, name); > + if (IS_ERR(sub)) { > + ib_device_put(parent); > + return PTR_ERR(sub); > + } > + > + sub->type = type; > + sub->parent = parent; > + > + mutex_lock(&parent->subdev_lock); > + list_add_tail(&parent->subdev_list_head, &sub->subdev_list); > + mutex_unlock(&parent->subdev_lock); > + > + return ret; > +} > +EXPORT_SYMBOL(ib_add_sub_device); > + > +int ib_del_sub_device_and_put(struct ib_device *sub) > +{ > + struct ib_device *parent = sub->parent; > + > + if (!parent) > + return -EOPNOTSUPP; > + > + mutex_lock(&parent->subdev_lock); mutex_destroy of subdev_lock is missing. When mutex_lock is called, it had better call mutex_destroy when the mutex lock is not used any more. Other mutex locks in this file, for example subdev_lock and subdev_lock, call mutex_destroy in the function ib_device_release. Perhaps subdev_lock can also call mutex_destroy in ib_device_release? Zhu Yanjun > + list_del(&sub->subdev_list); > + mutex_unlock(&parent->subdev_lock); > + > + ib_device_put(sub); > + parent->ops.del_sub_dev(sub); > + ib_device_put(parent); > + > + return 0; > +} > +EXPORT_SYMBOL(ib_del_sub_device_and_put); > + > #ifdef CONFIG_INFINIBAND_VIRT_DMA > int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents) > { > diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c > index 495d5a5d0373..bc099287de9a 100644 > --- a/drivers/infiniband/core/uverbs_main.c > +++ b/drivers/infiniband/core/uverbs_main.c > @@ -1114,7 +1114,8 @@ static int ib_uverbs_add_one(struct ib_device *device) > struct ib_uverbs_device *uverbs_dev; > int ret; > > - if (!device->ops.alloc_ucontext) > + if (!device->ops.alloc_ucontext || > + device->type == RDMA_DEVICE_TYPE_SMI) > return -EOPNOTSUPP; > > uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL); > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h > index 477bf9dd5e71..bebc2d22f466 100644 > --- a/include/rdma/ib_verbs.h > +++ b/include/rdma/ib_verbs.h > @@ -2661,6 +2661,18 @@ struct ib_device_ops { > */ > int (*get_numa_node)(struct ib_device *dev); > > + /** > + * add_sub_dev - Add a sub IB device > + */ > + struct ib_device *(*add_sub_dev)(struct ib_device *parent, > + enum rdma_nl_dev_type type, > + const char *name); > + > + /** > + * del_sub_dev - Delete a sub IB device > + */ > + void (*del_sub_dev)(struct ib_device *sub_dev); > + > DECLARE_RDMA_OBJ_SIZE(ib_ah); > DECLARE_RDMA_OBJ_SIZE(ib_counters); > DECLARE_RDMA_OBJ_SIZE(ib_cq); > @@ -2771,6 +2783,15 @@ struct ib_device { > char iw_ifname[IFNAMSIZ]; > u32 iw_driver_flags; > u32 lag_flags; > + > + /* A parent device has a list of sub-devices */ > + struct mutex subdev_lock; > + struct list_head subdev_list_head; > + > + /* A sub device has a type and a parent */ > + enum rdma_nl_dev_type type; > + struct ib_device *parent; > + struct list_head subdev_list; > }; > > static inline void *rdma_zalloc_obj(struct ib_device *dev, size_t size, > @@ -4820,4 +4841,26 @@ static inline u16 rdma_get_udp_sport(u32 fl, u32 lqpn, u32 rqpn) > > const struct ib_port_immutable* > ib_port_immutable_read(struct ib_device *dev, unsigned int port); > + > +/** ib_add_sub_device - Add a sub IB device on an existing one > + * > + * @parent: The IB device that needs to add a sub device > + * @type: The type of the new sub device > + * @name: The name of the new sub device > + * > + * > + * Return 0 on success, an error code otherwise > + */ > +int ib_add_sub_device(struct ib_device *parent, > + enum rdma_nl_dev_type type, > + const char *name); > + > + > +/** ib_del_sub_device_and_put - Delect an IB sub device while holding a 'get' > + * > + * @sub: The sub device that is going to be deleted > + * > + * Return 0 on success, an error code otherwise > + */ > +int ib_del_sub_device_and_put(struct ib_device *sub); > #endif /* IB_VERBS_H */ > diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h > index a214fc259f28..d15ee16be722 100644 > --- a/include/uapi/rdma/rdma_netlink.h > +++ b/include/uapi/rdma/rdma_netlink.h > @@ -602,4 +602,9 @@ enum rdma_nl_counter_mask { > RDMA_COUNTER_MASK_QP_TYPE = 1, > RDMA_COUNTER_MASK_PID = 1 << 1, > }; > + > +/* Supported rdma device types. */ > +enum rdma_nl_dev_type { > + RDMA_DEVICE_TYPE_SMI = 1, > +}; > #endif /* _UAPI_RDMA_NETLINK_H */
On Sat, Jun 29, 2024 at 08:14:56AM +0800, Zhu Yanjun wrote: > 在 2024/6/17 0:08, Leon Romanovsky 写道: > > From: Mark Zhang <markzhang@nvidia.com> > > > > This patch adds 2 APIs, as well as driver operations to support adding > > and deleting an IB sub device, which provides part of functionalities > > of it's parent. > > > > A sub device has a type; for a sub device with type "SMI", it provides > > the smi capability through umad for its parent, meaning uverb is not > > supported. > > > > A sub device cannot live without a parent. So when a parent is > > released, all it's sub devices are released as well. > > > > Signed-off-by: Mark Zhang <markzhang@nvidia.com> > > Signed-off-by: Leon Romanovsky <leonro@nvidia.com> > > --- > > drivers/infiniband/core/device.c | 68 +++++++++++++++++++++++++++ > > drivers/infiniband/core/uverbs_main.c | 3 +- > > include/rdma/ib_verbs.h | 43 +++++++++++++++++ > > include/uapi/rdma/rdma_netlink.h | 5 ++ > > 4 files changed, 118 insertions(+), 1 deletion(-) <...> > > +int ib_del_sub_device_and_put(struct ib_device *sub) > > +{ > > + struct ib_device *parent = sub->parent; > > + > > + if (!parent) > > + return -EOPNOTSUPP; > > + > > + mutex_lock(&parent->subdev_lock); > > mutex_destroy of subdev_lock is missing. When mutex_lock is called, it had > better call mutex_destroy when the mutex lock is not used any more. > Other mutex locks in this file, for example subdev_lock and subdev_lock, > call mutex_destroy in the function ib_device_release. > > Perhaps subdev_lock can also call mutex_destroy in ib_device_release? Thanks, I will add this fixup to the series. diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 7aaf2b4c1844..7b418c717f29 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -503,6 +503,7 @@ static void ib_device_release(struct device *device) rcu_head); } + mutex_destroy(&dev->subdev_lock); mutex_destroy(&dev->unregistration_lock); mutex_destroy(&dev->compat_devs_mutex); Thanks > > Zhu Yanjun > > > + list_del(&sub->subdev_list); > > + mutex_unlock(&parent->subdev_lock); > > + > > + ib_device_put(sub); > > + parent->ops.del_sub_dev(sub); > > + ib_device_put(parent); > > + > > + return 0; > > +} > > +EXPORT_SYMBOL(ib_del_sub_device_and_put); > > + > > #ifdef CONFIG_INFINIBAND_VIRT_DMA > > int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents) > > { > > diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c > > index 495d5a5d0373..bc099287de9a 100644 > > --- a/drivers/infiniband/core/uverbs_main.c > > +++ b/drivers/infiniband/core/uverbs_main.c > > @@ -1114,7 +1114,8 @@ static int ib_uverbs_add_one(struct ib_device *device) > > struct ib_uverbs_device *uverbs_dev; > > int ret; > > - if (!device->ops.alloc_ucontext) > > + if (!device->ops.alloc_ucontext || > > + device->type == RDMA_DEVICE_TYPE_SMI) > > return -EOPNOTSUPP; > > uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL); > > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h > > index 477bf9dd5e71..bebc2d22f466 100644 > > --- a/include/rdma/ib_verbs.h > > +++ b/include/rdma/ib_verbs.h > > @@ -2661,6 +2661,18 @@ struct ib_device_ops { > > */ > > int (*get_numa_node)(struct ib_device *dev); > > + /** > > + * add_sub_dev - Add a sub IB device > > + */ > > + struct ib_device *(*add_sub_dev)(struct ib_device *parent, > > + enum rdma_nl_dev_type type, > > + const char *name); > > + > > + /** > > + * del_sub_dev - Delete a sub IB device > > + */ > > + void (*del_sub_dev)(struct ib_device *sub_dev); > > + > > DECLARE_RDMA_OBJ_SIZE(ib_ah); > > DECLARE_RDMA_OBJ_SIZE(ib_counters); > > DECLARE_RDMA_OBJ_SIZE(ib_cq); > > @@ -2771,6 +2783,15 @@ struct ib_device { > > char iw_ifname[IFNAMSIZ]; > > u32 iw_driver_flags; > > u32 lag_flags; > > + > > + /* A parent device has a list of sub-devices */ > > + struct mutex subdev_lock; > > + struct list_head subdev_list_head; > > + > > + /* A sub device has a type and a parent */ > > + enum rdma_nl_dev_type type; > > + struct ib_device *parent; > > + struct list_head subdev_list; > > }; > > static inline void *rdma_zalloc_obj(struct ib_device *dev, size_t size, > > @@ -4820,4 +4841,26 @@ static inline u16 rdma_get_udp_sport(u32 fl, u32 lqpn, u32 rqpn) > > const struct ib_port_immutable* > > ib_port_immutable_read(struct ib_device *dev, unsigned int port); > > + > > +/** ib_add_sub_device - Add a sub IB device on an existing one > > + * > > + * @parent: The IB device that needs to add a sub device > > + * @type: The type of the new sub device > > + * @name: The name of the new sub device > > + * > > + * > > + * Return 0 on success, an error code otherwise > > + */ > > +int ib_add_sub_device(struct ib_device *parent, > > + enum rdma_nl_dev_type type, > > + const char *name); > > + > > + > > +/** ib_del_sub_device_and_put - Delect an IB sub device while holding a 'get' > > + * > > + * @sub: The sub device that is going to be deleted > > + * > > + * Return 0 on success, an error code otherwise > > + */ > > +int ib_del_sub_device_and_put(struct ib_device *sub); > > #endif /* IB_VERBS_H */ > > diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h > > index a214fc259f28..d15ee16be722 100644 > > --- a/include/uapi/rdma/rdma_netlink.h > > +++ b/include/uapi/rdma/rdma_netlink.h > > @@ -602,4 +602,9 @@ enum rdma_nl_counter_mask { > > RDMA_COUNTER_MASK_QP_TYPE = 1, > > RDMA_COUNTER_MASK_PID = 1 << 1, > > }; > > + > > +/* Supported rdma device types. */ > > +enum rdma_nl_dev_type { > > + RDMA_DEVICE_TYPE_SMI = 1, > > +}; > > #endif /* _UAPI_RDMA_NETLINK_H */ >
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 55aa7aa32d4a..8547cab50b23 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -641,6 +641,11 @@ struct ib_device *_ib_alloc_device(size_t size) BIT_ULL(IB_USER_VERBS_CMD_REG_MR) | BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ); + + mutex_init(&device->subdev_lock); + INIT_LIST_HEAD(&device->subdev_list_head); + INIT_LIST_HEAD(&device->subdev_list); + return device; } EXPORT_SYMBOL(_ib_alloc_device); @@ -1461,6 +1466,18 @@ EXPORT_SYMBOL(ib_register_device); /* Callers must hold a get on the device. */ static void __ib_unregister_device(struct ib_device *ib_dev) { + struct ib_device *sub, *tmp; + + mutex_lock(&ib_dev->subdev_lock); + list_for_each_entry_safe_reverse(sub, tmp, + &ib_dev->subdev_list_head, + subdev_list) { + list_del(&sub->subdev_list); + ib_dev->ops.del_sub_dev(sub); + ib_device_put(ib_dev); + } + mutex_unlock(&ib_dev->subdev_lock); + /* * We have a registration lock so that all the calls to unregister are * fully fenced, once any unregister returns the device is truely @@ -2597,6 +2614,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) ops->uverbs_no_driver_id_binding; SET_DEVICE_OP(dev_ops, add_gid); + SET_DEVICE_OP(dev_ops, add_sub_dev); SET_DEVICE_OP(dev_ops, advise_mr); SET_DEVICE_OP(dev_ops, alloc_dm); SET_DEVICE_OP(dev_ops, alloc_hw_device_stats); @@ -2631,6 +2649,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, dealloc_ucontext); SET_DEVICE_OP(dev_ops, dealloc_xrcd); SET_DEVICE_OP(dev_ops, del_gid); + SET_DEVICE_OP(dev_ops, del_sub_dev); SET_DEVICE_OP(dev_ops, dereg_mr); SET_DEVICE_OP(dev_ops, destroy_ah); SET_DEVICE_OP(dev_ops, destroy_counters); @@ -2727,6 +2746,55 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) } EXPORT_SYMBOL(ib_set_device_ops); +int ib_add_sub_device(struct ib_device *parent, + enum rdma_nl_dev_type type, + const char *name) +{ + struct ib_device *sub; + int ret = 0; + + if (!parent->ops.add_sub_dev || !parent->ops.del_sub_dev) + return -EOPNOTSUPP; + + if (!ib_device_try_get(parent)) + return -EINVAL; + + sub = parent->ops.add_sub_dev(parent, type, name); + if (IS_ERR(sub)) { + ib_device_put(parent); + return PTR_ERR(sub); + } + + sub->type = type; + sub->parent = parent; + + mutex_lock(&parent->subdev_lock); + list_add_tail(&parent->subdev_list_head, &sub->subdev_list); + mutex_unlock(&parent->subdev_lock); + + return ret; +} +EXPORT_SYMBOL(ib_add_sub_device); + +int ib_del_sub_device_and_put(struct ib_device *sub) +{ + struct ib_device *parent = sub->parent; + + if (!parent) + return -EOPNOTSUPP; + + mutex_lock(&parent->subdev_lock); + list_del(&sub->subdev_list); + mutex_unlock(&parent->subdev_lock); + + ib_device_put(sub); + parent->ops.del_sub_dev(sub); + ib_device_put(parent); + + return 0; +} +EXPORT_SYMBOL(ib_del_sub_device_and_put); + #ifdef CONFIG_INFINIBAND_VIRT_DMA int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents) { diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 495d5a5d0373..bc099287de9a 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -1114,7 +1114,8 @@ static int ib_uverbs_add_one(struct ib_device *device) struct ib_uverbs_device *uverbs_dev; int ret; - if (!device->ops.alloc_ucontext) + if (!device->ops.alloc_ucontext || + device->type == RDMA_DEVICE_TYPE_SMI) return -EOPNOTSUPP; uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 477bf9dd5e71..bebc2d22f466 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2661,6 +2661,18 @@ struct ib_device_ops { */ int (*get_numa_node)(struct ib_device *dev); + /** + * add_sub_dev - Add a sub IB device + */ + struct ib_device *(*add_sub_dev)(struct ib_device *parent, + enum rdma_nl_dev_type type, + const char *name); + + /** + * del_sub_dev - Delete a sub IB device + */ + void (*del_sub_dev)(struct ib_device *sub_dev); + DECLARE_RDMA_OBJ_SIZE(ib_ah); DECLARE_RDMA_OBJ_SIZE(ib_counters); DECLARE_RDMA_OBJ_SIZE(ib_cq); @@ -2771,6 +2783,15 @@ struct ib_device { char iw_ifname[IFNAMSIZ]; u32 iw_driver_flags; u32 lag_flags; + + /* A parent device has a list of sub-devices */ + struct mutex subdev_lock; + struct list_head subdev_list_head; + + /* A sub device has a type and a parent */ + enum rdma_nl_dev_type type; + struct ib_device *parent; + struct list_head subdev_list; }; static inline void *rdma_zalloc_obj(struct ib_device *dev, size_t size, @@ -4820,4 +4841,26 @@ static inline u16 rdma_get_udp_sport(u32 fl, u32 lqpn, u32 rqpn) const struct ib_port_immutable* ib_port_immutable_read(struct ib_device *dev, unsigned int port); + +/** ib_add_sub_device - Add a sub IB device on an existing one + * + * @parent: The IB device that needs to add a sub device + * @type: The type of the new sub device + * @name: The name of the new sub device + * + * + * Return 0 on success, an error code otherwise + */ +int ib_add_sub_device(struct ib_device *parent, + enum rdma_nl_dev_type type, + const char *name); + + +/** ib_del_sub_device_and_put - Delect an IB sub device while holding a 'get' + * + * @sub: The sub device that is going to be deleted + * + * Return 0 on success, an error code otherwise + */ +int ib_del_sub_device_and_put(struct ib_device *sub); #endif /* IB_VERBS_H */ diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index a214fc259f28..d15ee16be722 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -602,4 +602,9 @@ enum rdma_nl_counter_mask { RDMA_COUNTER_MASK_QP_TYPE = 1, RDMA_COUNTER_MASK_PID = 1 << 1, }; + +/* Supported rdma device types. */ +enum rdma_nl_dev_type { + RDMA_DEVICE_TYPE_SMI = 1, +}; #endif /* _UAPI_RDMA_NETLINK_H */