diff mbox series

[for-rc] IB/rdmavt: Fix frwr memory registration

Message ID 20190415183415.24136.56776.stgit@scvm10.sc.intel.com (mailing list archive)
State Mainlined
Commit 7c39f7f671d2acc0a1f39ebbbee4303ad499bbfa
Delegated to: Jason Gunthorpe
Headers show
Series [for-rc] IB/rdmavt: Fix frwr memory registration | expand

Commit Message

Dennis Dalessandro April 15, 2019, 6:34 p.m. UTC
From: Josh Collier <josh.d.collier@intel.com>

Current implementation was not properly handling frwr memory
registrations. This was uncovered by:
  commit 27f26cec761das
  xprtrdma: Plant XID in on-the-wire RDMA offset (FRWR)
in which xprtrdma, which is used for NFS over RDMA, started
failing as it was the first ULP to modify the ib_mr iova
resulting in the NFS server getting REMOTE ACCESS ERROR
when attempting to perform RDMA Writes to the client.

The fix is to properly capture the true iova, offset, and length
in the call to ib_map_mr_sg, and then update the iova when
processing the IB_WR_REG_MEM on the send queue.

Fixes: a41081aa5936 ("IB/rdmavt: Add support for ib_map_mr_sg")
Cc: stable@vger.kernel.org
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Josh Collier <josh.d.collier@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
---
 drivers/infiniband/sw/rdmavt/mr.c |   17 ++++++++++-------
 1 files changed, 10 insertions(+), 7 deletions(-)

Comments

Tom Talpey April 15, 2019, 7:15 p.m. UTC | #1
On 4/15/2019 2:34 PM, Dennis Dalessandro wrote:
> From: Josh Collier <josh.d.collier@intel.com>
> 
> Current implementation was not properly handling frwr memory
> registrations. This was uncovered by:
>    commit 27f26cec761das
>    xprtrdma: Plant XID in on-the-wire RDMA offset (FRWR)
> in which xprtrdma, which is used for NFS over RDMA, started
> failing as it was the first ULP to modify the ib_mr iova
> resulting in the NFS server getting REMOTE ACCESS ERROR
> when attempting to perform RDMA Writes to the client.
> 
> The fix is to properly capture the true iova, offset, and length
> in the call to ib_map_mr_sg, and then update the iova when
> processing the IB_WR_REG_MEM on the send queue.
> 
> Fixes: a41081aa5936 ("IB/rdmavt: Add support for ib_map_mr_sg")
> Cc: stable@vger.kernel.org
> Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
> Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
> Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
> Signed-off-by: Josh Collier <josh.d.collier@intel.com>
> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
> ---
>   drivers/infiniband/sw/rdmavt/mr.c |   17 ++++++++++-------
>   1 files changed, 10 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
> index 7287950..0bb6e39 100644
> --- a/drivers/infiniband/sw/rdmavt/mr.c
> +++ b/drivers/infiniband/sw/rdmavt/mr.c
> @@ -608,11 +608,6 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
>   	if (unlikely(mapped_segs == mr->mr.max_segs))
>   		return -ENOMEM;
>   
> -	if (mr->mr.length == 0) {
> -		mr->mr.user_base = addr;
> -		mr->mr.iova = addr;
> -	}
> -
>   	m = mapped_segs / RVT_SEGSZ;
>   	n = mapped_segs % RVT_SEGSZ;
>   	mr->mr.map[m]->segs[n].vaddr = (void *)addr;
> @@ -630,17 +625,24 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
>    * @sg_nents: number of entries in sg
>    * @sg_offset: offset in bytes into sg
>    *
> + * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages.
> + *
>    * Return: number of sg elements mapped to the memory region
>    */
>   int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
>   		  int sg_nents, unsigned int *sg_offset)
>   {
>   	struct rvt_mr *mr = to_imr(ibmr);
> +	int ret;
>   
>   	mr->mr.length = 0;

The above mr.length assignment is no longer needed, since it is
unconditionally overwritten several lines below.

>   	mr->mr.page_shift = PAGE_SHIFT;
> -	return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
> -			      rvt_set_page);
> +	ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page);
> +	mr->mr.user_base = ibmr->iova;
> +	mr->mr.iova = ibmr->iova;
> +	mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr;
> +	mr->mr.length = (size_t)ibmr->length;
> +	return ret;
>   }
>   
>   /**
> @@ -671,6 +673,7 @@ int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key,
>   	ibmr->rkey = key;
>   	mr->mr.lkey = key;
>   	mr->mr.access_flags = access;
> +	mr->mr.iova = ibmr->iova;

Isn't this a redundant assignment? If it isn't, then why is the
mr->mr.offset not being recalculated?

>   	atomic_set(&mr->mr.lkey_invalid, 0);
>   
>   	return 0;
> 
> 
>
Collier, Josh D April 15, 2019, 8:47 p.m. UTC | #2
> -----Original Message-----
> From: Tom Talpey [mailto:tom@talpey.com]
> Sent: Monday, April 15, 2019 3:15 PM
> To: Dalessandro, Dennis <dennis.dalessandro@intel.com>; jgg@ziepe.ca;
> dledford@redhat.com
> Cc: linux-rdma@vger.kernel.org; Ruhl, Michael J <michael.j.ruhl@intel.com>;
> Marciniszyn, Mike <mike.marciniszyn@intel.com>; Collier, Josh D
> <josh.d.collier@intel.com>; stable@vger.kernel.org
> Subject: Re: [PATCH for-rc] IB/rdmavt: Fix frwr memory registration
> 
> On 4/15/2019 2:34 PM, Dennis Dalessandro wrote:
> > From: Josh Collier <josh.d.collier@intel.com>
> >
> > Current implementation was not properly handling frwr memory
> > registrations. This was uncovered by:
> >    commit 27f26cec761das
> >    xprtrdma: Plant XID in on-the-wire RDMA offset (FRWR) in which
> > xprtrdma, which is used for NFS over RDMA, started failing as it was
> > the first ULP to modify the ib_mr iova resulting in the NFS server
> > getting REMOTE ACCESS ERROR when attempting to perform RDMA Writes
> to
> > the client.
> >
> > The fix is to properly capture the true iova, offset, and length in
> > the call to ib_map_mr_sg, and then update the iova when processing the
> > IB_WR_REG_MEM on the send queue.
> >
> > Fixes: a41081aa5936 ("IB/rdmavt: Add support for ib_map_mr_sg")
> > Cc: stable@vger.kernel.org
> > Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
> > Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
> > Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
> > Signed-off-by: Josh Collier <josh.d.collier@intel.com>
> > Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
> > ---
> >   drivers/infiniband/sw/rdmavt/mr.c |   17 ++++++++++-------
> >   1 files changed, 10 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/infiniband/sw/rdmavt/mr.c
> > b/drivers/infiniband/sw/rdmavt/mr.c
> > index 7287950..0bb6e39 100644
> > --- a/drivers/infiniband/sw/rdmavt/mr.c
> > +++ b/drivers/infiniband/sw/rdmavt/mr.c
> > @@ -608,11 +608,6 @@ static int rvt_set_page(struct ib_mr *ibmr, u64
> addr)
> >   	if (unlikely(mapped_segs == mr->mr.max_segs))
> >   		return -ENOMEM;
> >
> > -	if (mr->mr.length == 0) {
> > -		mr->mr.user_base = addr;
> > -		mr->mr.iova = addr;
> > -	}
> > -
> >   	m = mapped_segs / RVT_SEGSZ;
> >   	n = mapped_segs % RVT_SEGSZ;
> >   	mr->mr.map[m]->segs[n].vaddr = (void *)addr; @@ -630,17 +625,24
> @@
> > static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
> >    * @sg_nents: number of entries in sg
> >    * @sg_offset: offset in bytes into sg
> >    *
> > + * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages.
> > + *
> >    * Return: number of sg elements mapped to the memory region
> >    */
> >   int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
> >   		  int sg_nents, unsigned int *sg_offset)
> >   {
> >   	struct rvt_mr *mr = to_imr(ibmr);
> > +	int ret;
> >
> >   	mr->mr.length = 0;
> 
> The above mr.length assignment is no longer needed, since it is
> unconditionally overwritten several lines below.

The mr.length is still temporarily used by our rvt_set_page function effectively
as a page index and needs to continue to be initialized for that purpose.

> 
> >   	mr->mr.page_shift = PAGE_SHIFT;
> > -	return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
> > -			      rvt_set_page);
> > +	ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page);
> > +	mr->mr.user_base = ibmr->iova;
> > +	mr->mr.iova = ibmr->iova;
> > +	mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr;
> > +	mr->mr.length = (size_t)ibmr->length;
> > +	return ret;
> >   }
> >
> >   /**
> > @@ -671,6 +673,7 @@ int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr
> *ibmr, u32 key,
> >   	ibmr->rkey = key;
> >   	mr->mr.lkey = key;
> >   	mr->mr.access_flags = access;
> > +	mr->mr.iova = ibmr->iova;
> 
> Isn't this a redundant assignment? If it isn't, then why is the
> mr->mr.offset not being recalculated?

The ib_mr iova can be changed by the ULP to any value, but any value should
index to the previously mapped MR's starting page and offset, hence those do
not change, and we only update the iova.

> 
> >   	atomic_set(&mr->mr.lkey_invalid, 0);
> >
> >   	return 0;
> >
> >
> >
Tom Talpey April 15, 2019, 8:55 p.m. UTC | #3
On 4/15/2019 4:47 PM, Collier, Josh D wrote:
> 
>> -----Original Message-----
>> From: Tom Talpey [mailto:tom@talpey.com]
>> Sent: Monday, April 15, 2019 3:15 PM
>> To: Dalessandro, Dennis <dennis.dalessandro@intel.com>; jgg@ziepe.ca;
>> dledford@redhat.com
>> Cc: linux-rdma@vger.kernel.org; Ruhl, Michael J <michael.j.ruhl@intel.com>;
>> Marciniszyn, Mike <mike.marciniszyn@intel.com>; Collier, Josh D
>> <josh.d.collier@intel.com>; stable@vger.kernel.org
>> Subject: Re: [PATCH for-rc] IB/rdmavt: Fix frwr memory registration
>>
>> On 4/15/2019 2:34 PM, Dennis Dalessandro wrote:
>>> From: Josh Collier <josh.d.collier@intel.com>
>>>
>>> Current implementation was not properly handling frwr memory
>>> registrations. This was uncovered by:
>>>     commit 27f26cec761das
>>>     xprtrdma: Plant XID in on-the-wire RDMA offset (FRWR) in which
>>> xprtrdma, which is used for NFS over RDMA, started failing as it was
>>> the first ULP to modify the ib_mr iova resulting in the NFS server
>>> getting REMOTE ACCESS ERROR when attempting to perform RDMA Writes
>> to
>>> the client.
>>>
>>> The fix is to properly capture the true iova, offset, and length in
>>> the call to ib_map_mr_sg, and then update the iova when processing the
>>> IB_WR_REG_MEM on the send queue.
>>>
>>> Fixes: a41081aa5936 ("IB/rdmavt: Add support for ib_map_mr_sg")
>>> Cc: stable@vger.kernel.org
>>> Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
>>> Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
>>> Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
>>> Signed-off-by: Josh Collier <josh.d.collier@intel.com>
>>> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
>>> ---
>>>    drivers/infiniband/sw/rdmavt/mr.c |   17 ++++++++++-------
>>>    1 files changed, 10 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/infiniband/sw/rdmavt/mr.c
>>> b/drivers/infiniband/sw/rdmavt/mr.c
>>> index 7287950..0bb6e39 100644
>>> --- a/drivers/infiniband/sw/rdmavt/mr.c
>>> +++ b/drivers/infiniband/sw/rdmavt/mr.c
>>> @@ -608,11 +608,6 @@ static int rvt_set_page(struct ib_mr *ibmr, u64
>> addr)
>>>    	if (unlikely(mapped_segs == mr->mr.max_segs))
>>>    		return -ENOMEM;
>>>
>>> -	if (mr->mr.length == 0) {
>>> -		mr->mr.user_base = addr;
>>> -		mr->mr.iova = addr;
>>> -	}
>>> -
>>>    	m = mapped_segs / RVT_SEGSZ;
>>>    	n = mapped_segs % RVT_SEGSZ;
>>>    	mr->mr.map[m]->segs[n].vaddr = (void *)addr; @@ -630,17 +625,24
>> @@
>>> static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
>>>     * @sg_nents: number of entries in sg
>>>     * @sg_offset: offset in bytes into sg
>>>     *
>>> + * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages.
>>> + *
>>>     * Return: number of sg elements mapped to the memory region
>>>     */
>>>    int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
>>>    		  int sg_nents, unsigned int *sg_offset)
>>>    {
>>>    	struct rvt_mr *mr = to_imr(ibmr);
>>> +	int ret;
>>>
>>>    	mr->mr.length = 0;
>>
>> The above mr.length assignment is no longer needed, since it is
>> unconditionally overwritten several lines below.
> 
> The mr.length is still temporarily used by our rvt_set_page function effectively
> as a page index and needs to continue to be initialized for that purpose.

Hmm, ok I guess. It seems quite odd to need to initialize it
to zero just to have it set by the callee to the proper value.

>>>    	mr->mr.page_shift = PAGE_SHIFT;
>>> -	return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
>>> -			      rvt_set_page);
>>> +	ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page);
>>> +	mr->mr.user_base = ibmr->iova;
>>> +	mr->mr.iova = ibmr->iova;
>>> +	mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr;
>>> +	mr->mr.length = (size_t)ibmr->length;
>>> +	return ret;
>>>    }
>>>
>>>    /**
>>> @@ -671,6 +673,7 @@ int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr
>> *ibmr, u32 key,
>>>    	ibmr->rkey = key;
>>>    	mr->mr.lkey = key;
>>>    	mr->mr.access_flags = access;
>>> +	mr->mr.iova = ibmr->iova;
>>
>> Isn't this a redundant assignment? If it isn't, then why is the
>> mr->mr.offset not being recalculated?
> 
> The ib_mr iova can be changed by the ULP to any value, but any value should
> index to the previously mapped MR's starting page and offset, hence those do
> not change, and we only update the iova.

Ah, got it. The ULP changes iova between the map and the fastreg,
so the mr captures the fastreg value. Agreed.

Tom.

> 
>>
>>>    	atomic_set(&mr->mr.lkey_invalid, 0);
>>>
>>>    	return 0;
>>>
>>>
>>>
Jason Gunthorpe April 16, 2019, 10:15 a.m. UTC | #4
On Mon, Apr 15, 2019 at 11:34:22AM -0700, Dennis Dalessandro wrote:
> From: Josh Collier <josh.d.collier@intel.com>
> 
> Current implementation was not properly handling frwr memory
> registrations. This was uncovered by:
>   commit 27f26cec761das
>   xprtrdma: Plant XID in on-the-wire RDMA offset (FRWR)
> in which xprtrdma, which is used for NFS over RDMA, started
> failing as it was the first ULP to modify the ib_mr iova
> resulting in the NFS server getting REMOTE ACCESS ERROR
> when attempting to perform RDMA Writes to the client.
> 
> The fix is to properly capture the true iova, offset, and length
> in the call to ib_map_mr_sg, and then update the iova when
> processing the IB_WR_REG_MEM on the send queue.
> 
> Fixes: a41081aa5936 ("IB/rdmavt: Add support for ib_map_mr_sg")
> Cc: stable@vger.kernel.org
> Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
> Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
> Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
> Signed-off-by: Josh Collier <josh.d.collier@intel.com>
> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
> ---
>  drivers/infiniband/sw/rdmavt/mr.c |   17 ++++++++++-------
>  1 files changed, 10 insertions(+), 7 deletions(-)

Applied to for-rc

Thanks,
Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 7287950..0bb6e39 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -608,11 +608,6 @@  static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
 	if (unlikely(mapped_segs == mr->mr.max_segs))
 		return -ENOMEM;
 
-	if (mr->mr.length == 0) {
-		mr->mr.user_base = addr;
-		mr->mr.iova = addr;
-	}
-
 	m = mapped_segs / RVT_SEGSZ;
 	n = mapped_segs % RVT_SEGSZ;
 	mr->mr.map[m]->segs[n].vaddr = (void *)addr;
@@ -630,17 +625,24 @@  static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
  * @sg_nents: number of entries in sg
  * @sg_offset: offset in bytes into sg
  *
+ * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages.
+ *
  * Return: number of sg elements mapped to the memory region
  */
 int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
 		  int sg_nents, unsigned int *sg_offset)
 {
 	struct rvt_mr *mr = to_imr(ibmr);
+	int ret;
 
 	mr->mr.length = 0;
 	mr->mr.page_shift = PAGE_SHIFT;
-	return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
-			      rvt_set_page);
+	ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page);
+	mr->mr.user_base = ibmr->iova;
+	mr->mr.iova = ibmr->iova;
+	mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr;
+	mr->mr.length = (size_t)ibmr->length;
+	return ret;
 }
 
 /**
@@ -671,6 +673,7 @@  int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key,
 	ibmr->rkey = key;
 	mr->mr.lkey = key;
 	mr->mr.access_flags = access;
+	mr->mr.iova = ibmr->iova;
 	atomic_set(&mr->mr.lkey_invalid, 0);
 
 	return 0;