diff mbox series

[v2,for-next,3/3] RDMA/hns: Disassociate mmap pages for all uctx when HW is being reset

Message ID 20240812125640.1003948-4-huangjunxian6@hisilicon.com (mailing list archive)
State Superseded
Headers show
Series RDMA: Provide an API for drivers to disassociate mmap pages | expand

Commit Message

Junxian Huang Aug. 12, 2024, 12:56 p.m. UTC
From: Chengchang Tang <tangchengchang@huawei.com>

When HW is being reset, userspace should not ring doorbell otherwise
it may lead to abnormal consequence such as RAS.

Disassociate mmap pages for all uctx to prevent userspace from ringing
doorbell to HW.

Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver")
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

Comments

Jason Gunthorpe Aug. 23, 2024, 3:18 p.m. UTC | #1
On Mon, Aug 12, 2024 at 08:56:40PM +0800, Junxian Huang wrote:
> From: Chengchang Tang <tangchengchang@huawei.com>
> 
> When HW is being reset, userspace should not ring doorbell otherwise
> it may lead to abnormal consequence such as RAS.
> 
> Disassociate mmap pages for all uctx to prevent userspace from ringing
> doorbell to HW.
> 
> Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver")
> Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
> Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
> ---
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 17 +++++++++++++++++
>  1 file changed, 17 insertions(+)
> 
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> index 621b057fb9da..e30610a426b3 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> @@ -7012,6 +7012,20 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
>  
>  	handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
>  }
> +
> +static void hns_roce_v2_reset_notify_user(struct hns_roce_dev *hr_dev)
> +{
> +	struct hns_roce_ucontext *uctx, *tmp;
> +
> +	mutex_lock(&hr_dev->uctx_list_mutex);
> +	list_for_each_entry_safe(uctx, tmp, &hr_dev->uctx_list, list) {
> +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
> +		rdma_user_mmap_disassociate(&uctx->ibucontext);
> +#endif
> +	}
> +	mutex_unlock(&hr_dev->uctx_list_mutex);
> +}

I'm not keen on the drivers having to maintain a list of ucontexts
when the caller already does it.

Try something like this?

diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 00dab5bfb78cc8..6b4d5a660a2f9d 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -76,6 +76,7 @@ static dev_t dynamic_uverbs_dev;
 static DEFINE_IDA(uverbs_ida);
 static int ib_uverbs_add_one(struct ib_device *device);
 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
+static struct ib_client uverbs_client;
 
 static char *uverbs_devnode(const struct device *dev, umode_t *mode)
 {
@@ -881,23 +882,25 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
 }
 
 /**
- * rdma_user_mmap_disassociate() - disassociate the mmap from the ucontext.
+ * rdma_user_mmap_disassociate() - Revoke mmaps for a device
+ * @device: device to revoke
  *
- * @ucontext: associated user context.
- *
- * This function should be called by drivers that need to disable mmap for
- * some ucontexts.
+ * This function should be called by drivers that need to disable mmaps for the
+ * device, for instance because it is going to be reset.
  */
-void rdma_user_mmap_disassociate(struct ib_ucontext *ucontext)
+void rdma_user_mmap_disassociate(struct ib_device *device)
 {
-	struct ib_uverbs_file *ufile = ucontext->ufile;
+	struct ib_uverbs_device *uverbs_dev =
+		ib_get_client_data(device, &uverbs_client);
+	struct ib_uverbs_file *ufile;
 
-	/* Racing with uverbs_destroy_ufile_hw */
-	if (!down_read_trylock(&ufile->hw_destroy_rwsem))
-		return;
-
-	uverbs_user_mmap_disassociate(ufile);
-	up_read(&ufile->hw_destroy_rwsem);
+	mutex_lock(&uverbs_dev->lists_mutex);
+	list_for_each_entry(ufile, &uverbs_dev->uverbs_file_list, list) {
+		down_read(&ufile->hw_destroy_rwsem);
+		uverbs_user_mmap_disassociate(ufile);
+		up_read(&ufile->hw_destroy_rwsem);
+	}
+	mutex_unlock(&uverbs_dev->lists_mutex);
 }
 EXPORT_SYMBOL(rdma_user_mmap_disassociate);
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index e30610a426b387..ecf4f1c9f51d32 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -7015,15 +7015,7 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
 
 static void hns_roce_v2_reset_notify_user(struct hns_roce_dev *hr_dev)
 {
-	struct hns_roce_ucontext *uctx, *tmp;
-
-	mutex_lock(&hr_dev->uctx_list_mutex);
-	list_for_each_entry_safe(uctx, tmp, &hr_dev->uctx_list, list) {
-#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-		rdma_user_mmap_disassociate(&uctx->ibucontext);
-#endif
-	}
-	mutex_unlock(&hr_dev->uctx_list_mutex);
+	rdma_user_mmap_disassociate(&hr_dev->ib_dev);
 }
 
 static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index d6e34ca5c7276c..de9db15243c66f 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2947,7 +2947,13 @@ int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
 				      struct rdma_user_mmap_entry *entry,
 				      size_t length, u32 min_pgoff,
 				      u32 max_pgoff);
-void rdma_user_mmap_disassociate(struct ib_ucontext *ucontext);
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+void rdma_user_mmap_disassociate(struct ib_device *device);
+#else
+static inline void rdma_user_mmap_disassociate(struct ib_device *device)
+{
+}
+#endif
 
 static inline int
 rdma_user_mmap_entry_insert_exact(struct ib_ucontext *ucontext,
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 621b057fb9da..e30610a426b3 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -7012,6 +7012,20 @@  static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
 
 	handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
 }
+
+static void hns_roce_v2_reset_notify_user(struct hns_roce_dev *hr_dev)
+{
+	struct hns_roce_ucontext *uctx, *tmp;
+
+	mutex_lock(&hr_dev->uctx_list_mutex);
+	list_for_each_entry_safe(uctx, tmp, &hr_dev->uctx_list, list) {
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+		rdma_user_mmap_disassociate(&uctx->ibucontext);
+#endif
+	}
+	mutex_unlock(&hr_dev->uctx_list_mutex);
+}
+
 static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
 {
 	struct hns_roce_dev *hr_dev;
@@ -7030,6 +7044,9 @@  static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
 
 	hr_dev->active = false;
 	hr_dev->dis_db = true;
+
+	hns_roce_v2_reset_notify_user(hr_dev);
+
 	hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN;
 
 	return 0;