diff mbox series

[v5,for-next,2/2] RDMA/hns: Disassociate mmap pages for all uctx when HW is being reset

Message ID 20240913122955.1283597-3-huangjunxian6@hisilicon.com (mailing list archive)
State Superseded
Headers show
Series RDMA: Provide an API for drivers to disassociate mmap pages | expand

Commit Message

Junxian Huang Sept. 13, 2024, 12:29 p.m. UTC
From: Chengchang Tang <tangchengchang@huawei.com>

When HW is being reset, userspace should not ring doorbell otherwise
it may lead to abnormal consequence such as RAS.

Disassociate mmap pages for all uctx to prevent userspace from ringing
doorbell to HW. Since all resources will be destroyed during HW reset,
no new mmap is allowed after HW reset is completed.

Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver")
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 9 +++++++++
 drivers/infiniband/hw/hns/hns_roce_main.c  | 5 +++++
 2 files changed, 14 insertions(+)

Comments

Leon Romanovsky Sept. 16, 2024, 9:13 a.m. UTC | #1
On Fri, Sep 13, 2024 at 08:29:55PM +0800, Junxian Huang wrote:
> From: Chengchang Tang <tangchengchang@huawei.com>
> 
> When HW is being reset, userspace should not ring doorbell otherwise
> it may lead to abnormal consequence such as RAS.
> 
> Disassociate mmap pages for all uctx to prevent userspace from ringing
> doorbell to HW. Since all resources will be destroyed during HW reset,
> no new mmap is allowed after HW reset is completed.
> 
> Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver")
> Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
> Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
> ---
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 9 +++++++++
>  drivers/infiniband/hw/hns/hns_roce_main.c  | 5 +++++
>  2 files changed, 14 insertions(+)
> 
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> index 24e906b9d3ae..4e374b2da101 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> @@ -7017,6 +7017,12 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
>  
>  	handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
>  }
> +
> +static void hns_roce_v2_reset_notify_user(struct hns_roce_dev *hr_dev)
> +{
> +	rdma_user_mmap_disassociate(&hr_dev->ib_dev);
> +}

There is no need in one line function, please inline it.

> +
>  static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
>  {
>  	struct hns_roce_dev *hr_dev;
> @@ -7035,6 +7041,9 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
>  
>  	hr_dev->active = false;
>  	hr_dev->dis_db = true;
> +
> +	hns_roce_v2_reset_notify_user(hr_dev);
> +
>  	hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN;
>  
>  	return 0;
> diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
> index 4cb0af733587..49315f39361d 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_main.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_main.c
> @@ -466,6 +466,11 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
>  	pgprot_t prot;
>  	int ret;
>  
> +	if (hr_dev->dis_db) {

How do you clear dis_db after calling to hns_roce_hw_v2_reset_notify_down()? Does it have any locking protection?

> +		atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
> +		return -EPERM;
> +	}
> +
>  	rdma_entry = rdma_user_mmap_entry_get_pgoff(uctx, vma->vm_pgoff);
>  	if (!rdma_entry) {
>  		atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
> -- 
> 2.33.0
>
Junxian Huang Sept. 20, 2024, 9:18 a.m. UTC | #2
On 2024/9/16 17:13, Leon Romanovsky wrote:
> On Fri, Sep 13, 2024 at 08:29:55PM +0800, Junxian Huang wrote:
>> From: Chengchang Tang <tangchengchang@huawei.com>
>>
>> When HW is being reset, userspace should not ring doorbell otherwise
>> it may lead to abnormal consequence such as RAS.
>>
>> Disassociate mmap pages for all uctx to prevent userspace from ringing
>> doorbell to HW. Since all resources will be destroyed during HW reset,
>> no new mmap is allowed after HW reset is completed.
>>
>> Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver")
>> Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
>> Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
>> ---
>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 9 +++++++++
>>  drivers/infiniband/hw/hns/hns_roce_main.c  | 5 +++++
>>  2 files changed, 14 insertions(+)
>>
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> index 24e906b9d3ae..4e374b2da101 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>> @@ -7017,6 +7017,12 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
>>  
>>  	handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
>>  }
>> +
>> +static void hns_roce_v2_reset_notify_user(struct hns_roce_dev *hr_dev)
>> +{
>> +	rdma_user_mmap_disassociate(&hr_dev->ib_dev);
>> +}
> 
> There is no need in one line function, please inline it.
> 

Sure.

>> +
>>  static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
>>  {
>>  	struct hns_roce_dev *hr_dev;
>> @@ -7035,6 +7041,9 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
>>  
>>  	hr_dev->active = false;
>>  	hr_dev->dis_db = true;
>> +
>> +	hns_roce_v2_reset_notify_user(hr_dev);
>> +
>>  	hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN;
>>  
>>  	return 0;
>> diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
>> index 4cb0af733587..49315f39361d 100644
>> --- a/drivers/infiniband/hw/hns/hns_roce_main.c
>> +++ b/drivers/infiniband/hw/hns/hns_roce_main.c
>> @@ -466,6 +466,11 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
>>  	pgprot_t prot;
>>  	int ret;
>>  
>> +	if (hr_dev->dis_db) {
> 
> How do you clear dis_db after calling to hns_roce_hw_v2_reset_notify_down()? Does it have any locking protection?
> 

Sorry for the late response, I just came back from vacation.

After calling hns_roce_hw_v2_reset_notify_down(), we will call ib_unregister_device()
and destory all HW resources eventually, so there is no need to clear dis_db.

Junxian

>> +		atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
>> +		return -EPERM;
>> +	}
>> +
>>  	rdma_entry = rdma_user_mmap_entry_get_pgoff(uctx, vma->vm_pgoff);
>>  	if (!rdma_entry) {
>>  		atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
>> -- 
>> 2.33.0
>>
>
Jason Gunthorpe Sept. 20, 2024, 12:47 p.m. UTC | #3
On Fri, Sep 20, 2024 at 05:18:14PM +0800, Junxian Huang wrote:

> >> diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
> >> index 4cb0af733587..49315f39361d 100644
> >> --- a/drivers/infiniband/hw/hns/hns_roce_main.c
> >> +++ b/drivers/infiniband/hw/hns/hns_roce_main.c
> >> @@ -466,6 +466,11 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
> >>  	pgprot_t prot;
> >>  	int ret;
> >>  
> >> +	if (hr_dev->dis_db) {
> > 
> > How do you clear dis_db after calling to hns_roce_hw_v2_reset_notify_down()? Does it have any locking protection?
> > 
> 
> Sorry for the late response, I just came back from vacation.
> 
> After calling hns_roce_hw_v2_reset_notify_down(), we will call ib_unregister_device()
> and destory all HW resources eventually, so there is no need to clear dis_db.

Why can't you do the unregister device sooner then and avoid all this
special stuff?

I assumed you'd bring the same device back after completing the reset??

Jason
Junxian Huang Sept. 23, 2024, 6:17 a.m. UTC | #4
On 2024/9/20 20:47, Jason Gunthorpe wrote:
> On Fri, Sep 20, 2024 at 05:18:14PM +0800, Junxian Huang wrote:
> 
>>>> diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
>>>> index 4cb0af733587..49315f39361d 100644
>>>> --- a/drivers/infiniband/hw/hns/hns_roce_main.c
>>>> +++ b/drivers/infiniband/hw/hns/hns_roce_main.c
>>>> @@ -466,6 +466,11 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
>>>>  	pgprot_t prot;
>>>>  	int ret;
>>>>  
>>>> +	if (hr_dev->dis_db) {
>>>
>>> How do you clear dis_db after calling to hns_roce_hw_v2_reset_notify_down()? Does it have any locking protection?
>>>
>>
>> Sorry for the late response, I just came back from vacation.
>>
>> After calling hns_roce_hw_v2_reset_notify_down(), we will call ib_unregister_device()
>> and destory all HW resources eventually, so there is no need to clear dis_db.
> 
> Why can't you do the unregister device sooner then and avoid all this
> special stuff?
> 

It's a limitation of HW. Resources such as QP/CQ/MR will be destoryed
during unregistering device. This is not allowed by HW until
hns_roce_hw_v2_reset_notify_uninit(), or it may lead to some HW errors.

> I assumed you'd bring the same device back after completing the reset??
> 

Yes

> Jason
Leon Romanovsky Sept. 23, 2024, 9:02 a.m. UTC | #5
On Mon, Sep 23, 2024 at 02:17:40PM +0800, Junxian Huang wrote:
> 
> 
> On 2024/9/20 20:47, Jason Gunthorpe wrote:
> > On Fri, Sep 20, 2024 at 05:18:14PM +0800, Junxian Huang wrote:
> > 
> >>>> diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
> >>>> index 4cb0af733587..49315f39361d 100644
> >>>> --- a/drivers/infiniband/hw/hns/hns_roce_main.c
> >>>> +++ b/drivers/infiniband/hw/hns/hns_roce_main.c
> >>>> @@ -466,6 +466,11 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
> >>>>  	pgprot_t prot;
> >>>>  	int ret;
> >>>>  
> >>>> +	if (hr_dev->dis_db) {
> >>>
> >>> How do you clear dis_db after calling to hns_roce_hw_v2_reset_notify_down()? Does it have any locking protection?
> >>>
> >>
> >> Sorry for the late response, I just came back from vacation.
> >>
> >> After calling hns_roce_hw_v2_reset_notify_down(), we will call ib_unregister_device()
> >> and destory all HW resources eventually, so there is no need to clear dis_db.
> > 
> > Why can't you do the unregister device sooner then and avoid all this
> > special stuff?
> > 
> 
> It's a limitation of HW. Resources such as QP/CQ/MR will be destoryed
> during unregistering device. This is not allowed by HW until
> hns_roce_hw_v2_reset_notify_uninit(), or it may lead to some HW errors.

It is interested claim given the fact that you are changing original
code from 2016.

Thanks

> 
> > I assumed you'd bring the same device back after completing the reset??
> > 
> 
> Yes
> 
> > Jason
>
Junxian Huang Sept. 23, 2024, 2:15 p.m. UTC | #6
On 2024/9/23 17:02, Leon Romanovsky wrote:
> On Mon, Sep 23, 2024 at 02:17:40PM +0800, Junxian Huang wrote:
>>
>>
>> On 2024/9/20 20:47, Jason Gunthorpe wrote:
>>> On Fri, Sep 20, 2024 at 05:18:14PM +0800, Junxian Huang wrote:
>>>
>>>>>> diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
>>>>>> index 4cb0af733587..49315f39361d 100644
>>>>>> --- a/drivers/infiniband/hw/hns/hns_roce_main.c
>>>>>> +++ b/drivers/infiniband/hw/hns/hns_roce_main.c
>>>>>> @@ -466,6 +466,11 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
>>>>>>  	pgprot_t prot;
>>>>>>  	int ret;
>>>>>>  
>>>>>> +	if (hr_dev->dis_db) {
>>>>>
>>>>> How do you clear dis_db after calling to hns_roce_hw_v2_reset_notify_down()? Does it have any locking protection?
>>>>>
>>>>
>>>> Sorry for the late response, I just came back from vacation.
>>>>
>>>> After calling hns_roce_hw_v2_reset_notify_down(), we will call ib_unregister_device()
>>>> and destory all HW resources eventually, so there is no need to clear dis_db.
>>>
>>> Why can't you do the unregister device sooner then and avoid all this
>>> special stuff?
>>>
>>
>> It's a limitation of HW. Resources such as QP/CQ/MR will be destoryed
>> during unregistering device. This is not allowed by HW until
>> hns_roce_hw_v2_reset_notify_uninit(), or it may lead to some HW errors.
> 
> It is interested claim given the fact that you are changing original
> code from 2016.
> 

Well, this isn't a new issue. We once sent a patch to try to address it
in 2019 [1], but that solution wasn't the right way since it relied on
userspace. We haven't come up with any new solutions since then, until
this series recently.

[1] https://lore.kernel.org/linux-rdma/20190812055220.GA8440@mtr-leonro.mtl.com/

Junxian

> Thanks
> 
>>
>>> I assumed you'd bring the same device back after completing the reset??
>>>
>>
>> Yes
>>
>>> Jason
>>
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 24e906b9d3ae..4e374b2da101 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -7017,6 +7017,12 @@  static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
 
 	handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
 }
+
+static void hns_roce_v2_reset_notify_user(struct hns_roce_dev *hr_dev)
+{
+	rdma_user_mmap_disassociate(&hr_dev->ib_dev);
+}
+
 static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
 {
 	struct hns_roce_dev *hr_dev;
@@ -7035,6 +7041,9 @@  static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
 
 	hr_dev->active = false;
 	hr_dev->dis_db = true;
+
+	hns_roce_v2_reset_notify_user(hr_dev);
+
 	hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN;
 
 	return 0;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 4cb0af733587..49315f39361d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -466,6 +466,11 @@  static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
 	pgprot_t prot;
 	int ret;
 
+	if (hr_dev->dis_db) {
+		atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
+		return -EPERM;
+	}
+
 	rdma_entry = rdma_user_mmap_entry_get_pgoff(uctx, vma->vm_pgoff);
 	if (!rdma_entry) {
 		atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);