diff mbox series

[for-next] RDMA/cxgb4: use dma_mmap_coherent() for mapping non-contiguous memory

Message ID 20240705131753.15550-1-anumula@chelsio.com (mailing list archive)
State Superseded
Headers show
Series [for-next] RDMA/cxgb4: use dma_mmap_coherent() for mapping non-contiguous memory | expand

Commit Message

Anumula Murali Mohan Reddy July 5, 2024, 1:17 p.m. UTC
dma_alloc_coherent() allocates contiguous memory irrespective of
iommu mode, but after commit f5ff79fddf0e ("dma-mapping: remove
CONFIG_DMA_REMAP") if iommu is enabled in translate mode,
dma_alloc_coherent() may allocate non-contiguous memory.
Attempt to map this memory results in panic.
This patch fixes the issue by using dma_mmap_coherent() to map each page
to user space.

Fixes: f5ff79fddf0e ("dma-mapping: remove CONFIG_DMA_REMAP")
Signed-off-by: Anumula Murali Mohan Reddy <anumula@chelsio.com>
Signed-off-by: Potnuri Bharat Teja <bharat@chelsio.com>
---
 drivers/infiniband/hw/cxgb4/cq.c       |  4 +++
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h |  2 ++
 drivers/infiniband/hw/cxgb4/provider.c | 48 +++++++++++++++++++++-----
 drivers/infiniband/hw/cxgb4/qp.c       | 14 ++++++++
 4 files changed, 59 insertions(+), 9 deletions(-)

Comments

Zhu Yanjun July 6, 2024, 12:16 a.m. UTC | #1
在 2024/7/5 21:17, Anumula Murali Mohan Reddy 写道:
> dma_alloc_coherent() allocates contiguous memory irrespective of
> iommu mode, but after commit f5ff79fddf0e ("dma-mapping: remove
> CONFIG_DMA_REMAP") if iommu is enabled in translate mode,

CC linux-mm@kvack.org

Zhu Yanjun

> dma_alloc_coherent() may allocate non-contiguous memory.
> Attempt to map this memory results in panic.
> This patch fixes the issue by using dma_mmap_coherent() to map each page
> to user space.
> 
> Fixes: f5ff79fddf0e ("dma-mapping: remove CONFIG_DMA_REMAP")
> Signed-off-by: Anumula Murali Mohan Reddy <anumula@chelsio.com>
> Signed-off-by: Potnuri Bharat Teja <bharat@chelsio.com>
> ---
>   drivers/infiniband/hw/cxgb4/cq.c       |  4 +++
>   drivers/infiniband/hw/cxgb4/iw_cxgb4.h |  2 ++
>   drivers/infiniband/hw/cxgb4/provider.c | 48 +++++++++++++++++++++-----
>   drivers/infiniband/hw/cxgb4/qp.c       | 14 ++++++++
>   4 files changed, 59 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
> index 5111421f9473..81cfc876fa89 100644
> --- a/drivers/infiniband/hw/cxgb4/cq.c
> +++ b/drivers/infiniband/hw/cxgb4/cq.c
> @@ -1127,12 +1127,16 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
>   
>   		mm->key = uresp.key;
>   		mm->addr = virt_to_phys(chp->cq.queue);
> +		mm->vaddr = chp->cq.queue;
> +		mm->dma_addr = chp->cq.dma_addr;
>   		mm->len = chp->cq.memsize;
>   		insert_mmap(ucontext, mm);
>   
>   		mm2->key = uresp.gts_key;
>   		mm2->addr = chp->cq.bar2_pa;
>   		mm2->len = PAGE_SIZE;
> +		mm2->vaddr = NULL;
> +		mm2->dma_addr = 0;
>   		insert_mmap(ucontext, mm2);
>   	}
>   
> diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
> index f838bb6718af..5eedc6cf0f8c 100644
> --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
> +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
> @@ -536,6 +536,8 @@ struct c4iw_mm_entry {
>   	struct list_head entry;
>   	u64 addr;
>   	u32 key;
> +	void *vaddr;
> +	dma_addr_t dma_addr;
>   	unsigned len;
>   };
>   
> diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
> index 246b739ddb2b..6227775970c9 100644
> --- a/drivers/infiniband/hw/cxgb4/provider.c
> +++ b/drivers/infiniband/hw/cxgb4/provider.c
> @@ -131,6 +131,10 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
>   	struct c4iw_mm_entry *mm;
>   	struct c4iw_ucontext *ucontext;
>   	u64 addr;
> +	size_t size;
> +	void *vaddr;
> +	unsigned long vm_pgoff;
> +	dma_addr_t dma_addr;
>   
>   	pr_debug("pgoff 0x%lx key 0x%x len %d\n", vma->vm_pgoff,
>   		 key, len);
> @@ -145,6 +149,9 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
>   	if (!mm)
>   		return -EINVAL;
>   	addr = mm->addr;
> +	vaddr = mm->vaddr;
> +	dma_addr = mm->dma_addr;
> +	size = mm->len;
>   	kfree(mm);
>   
>   	if ((addr >= pci_resource_start(rdev->lldi.pdev, 0)) &&
> @@ -155,9 +162,17 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
>   		 * MA_SYNC register...
>   		 */
>   		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> -		ret = io_remap_pfn_range(vma, vma->vm_start,
> -					 addr >> PAGE_SHIFT,
> -					 len, vma->vm_page_prot);
> +		if (vaddr && is_vmalloc_addr(vaddr)) {
> +			vm_pgoff = vma->vm_pgoff;
> +			vma->vm_pgoff = 0;
> +			ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
> +						vaddr, dma_addr, size);
> +			vma->vm_pgoff = vm_pgoff;
> +		} else {
> +			ret = io_remap_pfn_range(vma, vma->vm_start,
> +						 addr >> PAGE_SHIFT,
> +						 len, vma->vm_page_prot);
> +		}
>   	} else if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) &&
>   		   (addr < (pci_resource_start(rdev->lldi.pdev, 2) +
>   		    pci_resource_len(rdev->lldi.pdev, 2)))) {
> @@ -175,17 +190,32 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
>   				vma->vm_page_prot =
>   					pgprot_noncached(vma->vm_page_prot);
>   		}
> -		ret = io_remap_pfn_range(vma, vma->vm_start,
> -					 addr >> PAGE_SHIFT,
> -					 len, vma->vm_page_prot);
> +		if (vaddr && is_vmalloc_addr(vaddr)) {
> +			vm_pgoff = vma->vm_pgoff;
> +			vma->vm_pgoff = 0;
> +			ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
> +						vaddr, dma_addr, size);
> +			vma->vm_pgoff = vm_pgoff;
> +		} else {
> +			ret = io_remap_pfn_range(vma, vma->vm_start,
> +						 addr >> PAGE_SHIFT,
> +						 len, vma->vm_page_prot);
> +		}
>   	} else {
>   
>   		/*
>   		 * Map WQ or CQ contig dma memory...
>   		 */
> -		ret = remap_pfn_range(vma, vma->vm_start,
> -				      addr >> PAGE_SHIFT,
> -				      len, vma->vm_page_prot);
> +		if (vaddr && is_vmalloc_addr(vaddr)) {
> +			vm_pgoff = vma->vm_pgoff;
> +			vma->vm_pgoff = 0;
> +			ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
> +						vaddr, dma_addr, size);
> +		} else {
> +			ret = remap_pfn_range(vma, vma->vm_start,
> +					      addr >> PAGE_SHIFT,
> +					      len, vma->vm_page_prot);
> +		}
>   	}
>   
>   	return ret;
> diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
> index d16d8eaa1415..3f6fb4b34d5a 100644
> --- a/drivers/infiniband/hw/cxgb4/qp.c
> +++ b/drivers/infiniband/hw/cxgb4/qp.c
> @@ -2282,16 +2282,22 @@ int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
>   			goto err_free_ma_sync_key;
>   		sq_key_mm->key = uresp.sq_key;
>   		sq_key_mm->addr = qhp->wq.sq.phys_addr;
> +		sq_key_mm->vaddr = qhp->wq.sq.queue;
> +		sq_key_mm->dma_addr = qhp->wq.sq.dma_addr;
>   		sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize);
>   		insert_mmap(ucontext, sq_key_mm);
>   		if (!attrs->srq) {
>   			rq_key_mm->key = uresp.rq_key;
>   			rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
> +			rq_key_mm->vaddr = qhp->wq.rq.queue;
> +			rq_key_mm->dma_addr = qhp->wq.rq.dma_addr;
>   			rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
>   			insert_mmap(ucontext, rq_key_mm);
>   		}
>   		sq_db_key_mm->key = uresp.sq_db_gts_key;
>   		sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa;
> +		sq_db_key_mm->vaddr = NULL;
> +		sq_db_key_mm->dma_addr = 0;
>   		sq_db_key_mm->len = PAGE_SIZE;
>   		insert_mmap(ucontext, sq_db_key_mm);
>   		if (!attrs->srq) {
> @@ -2299,6 +2305,8 @@ int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
>   			rq_db_key_mm->addr =
>   				(u64)(unsigned long)qhp->wq.rq.bar2_pa;
>   			rq_db_key_mm->len = PAGE_SIZE;
> +			rq_db_key_mm->vaddr = NULL;
> +			rq_db_key_mm->dma_addr = 0;
>   			insert_mmap(ucontext, rq_db_key_mm);
>   		}
>   		if (ma_sync_key_mm) {
> @@ -2307,6 +2315,8 @@ int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
>   				(pci_resource_start(rhp->rdev.lldi.pdev, 0) +
>   				PCIE_MA_SYNC_A) & PAGE_MASK;
>   			ma_sync_key_mm->len = PAGE_SIZE;
> +			ma_sync_key_mm->vaddr = NULL;
> +			ma_sync_key_mm->dma_addr = 0;
>   			insert_mmap(ucontext, ma_sync_key_mm);
>   		}
>   
> @@ -2763,10 +2773,14 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
>   		srq_key_mm->key = uresp.srq_key;
>   		srq_key_mm->addr = virt_to_phys(srq->wq.queue);
>   		srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize);
> +		srq_key_mm->vaddr = srq->wq.queue;
> +		srq_key_mm->dma_addr = srq->wq.dma_addr;
>   		insert_mmap(ucontext, srq_key_mm);
>   		srq_db_key_mm->key = uresp.srq_db_gts_key;
>   		srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa;
>   		srq_db_key_mm->len = PAGE_SIZE;
> +		srq_db_key_mm->vaddr = NULL;
> +		srq_db_key_mm->dma_addr = 0;
>   		insert_mmap(ucontext, srq_db_key_mm);
>   	}
>
Leon Romanovsky July 7, 2024, 9:11 a.m. UTC | #2
On Fri, Jul 05, 2024 at 06:47:53PM +0530, Anumula Murali Mohan Reddy wrote:
> dma_alloc_coherent() allocates contiguous memory irrespective of
> iommu mode, but after commit f5ff79fddf0e ("dma-mapping: remove
> CONFIG_DMA_REMAP") if iommu is enabled in translate mode,
> dma_alloc_coherent() may allocate non-contiguous memory.
> Attempt to map this memory results in panic.
> This patch fixes the issue by using dma_mmap_coherent() to map each page
> to user space.

It is perfect time to move to use rdma_user_mmap_io(), instead of
open-code it in the driver.

> 
> Fixes: f5ff79fddf0e ("dma-mapping: remove CONFIG_DMA_REMAP")

+ authors of the commit mentioned in Fixes.

Thanks

> Signed-off-by: Anumula Murali Mohan Reddy <anumula@chelsio.com>
> Signed-off-by: Potnuri Bharat Teja <bharat@chelsio.com>
> ---
>  drivers/infiniband/hw/cxgb4/cq.c       |  4 +++
>  drivers/infiniband/hw/cxgb4/iw_cxgb4.h |  2 ++
>  drivers/infiniband/hw/cxgb4/provider.c | 48 +++++++++++++++++++++-----
>  drivers/infiniband/hw/cxgb4/qp.c       | 14 ++++++++
>  4 files changed, 59 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
> index 5111421f9473..81cfc876fa89 100644
> --- a/drivers/infiniband/hw/cxgb4/cq.c
> +++ b/drivers/infiniband/hw/cxgb4/cq.c
> @@ -1127,12 +1127,16 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
>  
>  		mm->key = uresp.key;
>  		mm->addr = virt_to_phys(chp->cq.queue);
> +		mm->vaddr = chp->cq.queue;
> +		mm->dma_addr = chp->cq.dma_addr;
>  		mm->len = chp->cq.memsize;
>  		insert_mmap(ucontext, mm);
>  
>  		mm2->key = uresp.gts_key;
>  		mm2->addr = chp->cq.bar2_pa;
>  		mm2->len = PAGE_SIZE;
> +		mm2->vaddr = NULL;
> +		mm2->dma_addr = 0;
>  		insert_mmap(ucontext, mm2);
>  	}
>  
> diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
> index f838bb6718af..5eedc6cf0f8c 100644
> --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
> +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
> @@ -536,6 +536,8 @@ struct c4iw_mm_entry {
>  	struct list_head entry;
>  	u64 addr;
>  	u32 key;
> +	void *vaddr;
> +	dma_addr_t dma_addr;
>  	unsigned len;
>  };
>  
> diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
> index 246b739ddb2b..6227775970c9 100644
> --- a/drivers/infiniband/hw/cxgb4/provider.c
> +++ b/drivers/infiniband/hw/cxgb4/provider.c
> @@ -131,6 +131,10 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
>  	struct c4iw_mm_entry *mm;
>  	struct c4iw_ucontext *ucontext;
>  	u64 addr;
> +	size_t size;
> +	void *vaddr;
> +	unsigned long vm_pgoff;
> +	dma_addr_t dma_addr;
>  
>  	pr_debug("pgoff 0x%lx key 0x%x len %d\n", vma->vm_pgoff,
>  		 key, len);
> @@ -145,6 +149,9 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
>  	if (!mm)
>  		return -EINVAL;
>  	addr = mm->addr;
> +	vaddr = mm->vaddr;
> +	dma_addr = mm->dma_addr;
> +	size = mm->len;
>  	kfree(mm);
>  
>  	if ((addr >= pci_resource_start(rdev->lldi.pdev, 0)) &&
> @@ -155,9 +162,17 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
>  		 * MA_SYNC register...
>  		 */
>  		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> -		ret = io_remap_pfn_range(vma, vma->vm_start,
> -					 addr >> PAGE_SHIFT,
> -					 len, vma->vm_page_prot);
> +		if (vaddr && is_vmalloc_addr(vaddr)) {
> +			vm_pgoff = vma->vm_pgoff;
> +			vma->vm_pgoff = 0;
> +			ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
> +						vaddr, dma_addr, size);
> +			vma->vm_pgoff = vm_pgoff;
> +		} else {
> +			ret = io_remap_pfn_range(vma, vma->vm_start,
> +						 addr >> PAGE_SHIFT,
> +						 len, vma->vm_page_prot);
> +		}
>  	} else if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) &&
>  		   (addr < (pci_resource_start(rdev->lldi.pdev, 2) +
>  		    pci_resource_len(rdev->lldi.pdev, 2)))) {
> @@ -175,17 +190,32 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
>  				vma->vm_page_prot =
>  					pgprot_noncached(vma->vm_page_prot);
>  		}
> -		ret = io_remap_pfn_range(vma, vma->vm_start,
> -					 addr >> PAGE_SHIFT,
> -					 len, vma->vm_page_prot);
> +		if (vaddr && is_vmalloc_addr(vaddr)) {
> +			vm_pgoff = vma->vm_pgoff;
> +			vma->vm_pgoff = 0;
> +			ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
> +						vaddr, dma_addr, size);
> +			vma->vm_pgoff = vm_pgoff;
> +		} else {
> +			ret = io_remap_pfn_range(vma, vma->vm_start,
> +						 addr >> PAGE_SHIFT,
> +						 len, vma->vm_page_prot);
> +		}
>  	} else {
>  
>  		/*
>  		 * Map WQ or CQ contig dma memory...
>  		 */
> -		ret = remap_pfn_range(vma, vma->vm_start,
> -				      addr >> PAGE_SHIFT,
> -				      len, vma->vm_page_prot);
> +		if (vaddr && is_vmalloc_addr(vaddr)) {
> +			vm_pgoff = vma->vm_pgoff;
> +			vma->vm_pgoff = 0;
> +			ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
> +						vaddr, dma_addr, size);
> +		} else {
> +			ret = remap_pfn_range(vma, vma->vm_start,
> +					      addr >> PAGE_SHIFT,
> +					      len, vma->vm_page_prot);
> +		}
>  	}
>  
>  	return ret;
> diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
> index d16d8eaa1415..3f6fb4b34d5a 100644
> --- a/drivers/infiniband/hw/cxgb4/qp.c
> +++ b/drivers/infiniband/hw/cxgb4/qp.c
> @@ -2282,16 +2282,22 @@ int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
>  			goto err_free_ma_sync_key;
>  		sq_key_mm->key = uresp.sq_key;
>  		sq_key_mm->addr = qhp->wq.sq.phys_addr;
> +		sq_key_mm->vaddr = qhp->wq.sq.queue;
> +		sq_key_mm->dma_addr = qhp->wq.sq.dma_addr;
>  		sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize);
>  		insert_mmap(ucontext, sq_key_mm);
>  		if (!attrs->srq) {
>  			rq_key_mm->key = uresp.rq_key;
>  			rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
> +			rq_key_mm->vaddr = qhp->wq.rq.queue;
> +			rq_key_mm->dma_addr = qhp->wq.rq.dma_addr;
>  			rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
>  			insert_mmap(ucontext, rq_key_mm);
>  		}
>  		sq_db_key_mm->key = uresp.sq_db_gts_key;
>  		sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa;
> +		sq_db_key_mm->vaddr = NULL;
> +		sq_db_key_mm->dma_addr = 0;
>  		sq_db_key_mm->len = PAGE_SIZE;
>  		insert_mmap(ucontext, sq_db_key_mm);
>  		if (!attrs->srq) {
> @@ -2299,6 +2305,8 @@ int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
>  			rq_db_key_mm->addr =
>  				(u64)(unsigned long)qhp->wq.rq.bar2_pa;
>  			rq_db_key_mm->len = PAGE_SIZE;
> +			rq_db_key_mm->vaddr = NULL;
> +			rq_db_key_mm->dma_addr = 0;
>  			insert_mmap(ucontext, rq_db_key_mm);
>  		}
>  		if (ma_sync_key_mm) {
> @@ -2307,6 +2315,8 @@ int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
>  				(pci_resource_start(rhp->rdev.lldi.pdev, 0) +
>  				PCIE_MA_SYNC_A) & PAGE_MASK;
>  			ma_sync_key_mm->len = PAGE_SIZE;
> +			ma_sync_key_mm->vaddr = NULL;
> +			ma_sync_key_mm->dma_addr = 0;
>  			insert_mmap(ucontext, ma_sync_key_mm);
>  		}
>  
> @@ -2763,10 +2773,14 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
>  		srq_key_mm->key = uresp.srq_key;
>  		srq_key_mm->addr = virt_to_phys(srq->wq.queue);
>  		srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize);
> +		srq_key_mm->vaddr = srq->wq.queue;
> +		srq_key_mm->dma_addr = srq->wq.dma_addr;
>  		insert_mmap(ucontext, srq_key_mm);
>  		srq_db_key_mm->key = uresp.srq_db_gts_key;
>  		srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa;
>  		srq_db_key_mm->len = PAGE_SIZE;
> +		srq_db_key_mm->vaddr = NULL;
> +		srq_db_key_mm->dma_addr = 0;
>  		insert_mmap(ucontext, srq_db_key_mm);
>  	}
>  
> -- 
> 2.39.3
> 
>
Christoph Hellwig July 7, 2024, 11:31 a.m. UTC | #3
On Sun, Jul 07, 2024 at 12:11:05PM +0300, Leon Romanovsky wrote:
> On Fri, Jul 05, 2024 at 06:47:53PM +0530, Anumula Murali Mohan Reddy wrote:
> > dma_alloc_coherent() allocates contiguous memory irrespective of
> > iommu mode, but after commit f5ff79fddf0e ("dma-mapping: remove
> > CONFIG_DMA_REMAP") if iommu is enabled in translate mode,
> > dma_alloc_coherent() may allocate non-contiguous memory.
> > Attempt to map this memory results in panic.
> > This patch fixes the issue by using dma_mmap_coherent() to map each page
> > to user space.
> 
> It is perfect time to move to use rdma_user_mmap_io(), instead of
> open-code it in the driver.

rdma_user_mmap_io does not work on dma coherent allocations.

> > Fixes: f5ff79fddf0e ("dma-mapping: remove CONFIG_DMA_REMAP")
> 
> + authors of the commit mentioned in Fixes.

If that commit triggered a bug for you it was buggy before, you
just didn't hit it.  The fixes tag needs to point to the commit
assuming trying to convert the return value from dma_alloc* into
a page/pfn/physical address.

> > +++ b/drivers/infiniband/hw/cxgb4/cq.c
> > @@ -1127,12 +1127,16 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
> >  
> >  		mm->key = uresp.key;
> >  		mm->addr = virt_to_phys(chp->cq.queue);

... aka this one.  And it still is buggy and needs to go away.

> > +		if (vaddr && is_vmalloc_addr(vaddr)) {

And this check is broken.  The virtual address returned from
dma_alloc_coherent can also be other things than a vmalloc address.

>
>
> > +			vm_pgoff = vma->vm_pgoff;
> > +			vma->vm_pgoff = 0;
> > +			ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
> > +						vaddr, dma_addr, size);
> > +			vma->vm_pgoff = vm_pgoff;

... and you thus must use this path unconditionally.

Same for the other hunks.
Leon Romanovsky July 7, 2024, 11:39 a.m. UTC | #4
On Sun, Jul 07, 2024 at 01:31:03PM +0200, Christoph Hellwig wrote:
> On Sun, Jul 07, 2024 at 12:11:05PM +0300, Leon Romanovsky wrote:
> > On Fri, Jul 05, 2024 at 06:47:53PM +0530, Anumula Murali Mohan Reddy wrote:
> > > dma_alloc_coherent() allocates contiguous memory irrespective of
> > > iommu mode, but after commit f5ff79fddf0e ("dma-mapping: remove
> > > CONFIG_DMA_REMAP") if iommu is enabled in translate mode,
> > > dma_alloc_coherent() may allocate non-contiguous memory.
> > > Attempt to map this memory results in panic.
> > > This patch fixes the issue by using dma_mmap_coherent() to map each page
> > > to user space.
> > 
> > It is perfect time to move to use rdma_user_mmap_io(), instead of
> > open-code it in the driver.
> 
> rdma_user_mmap_io does not work on dma coherent allocations.

They used dma_mmap_coherent() to implement workaround, original cxgb4
didn't use it and probably doesn't need too.

Thanks
Christoph Hellwig July 8, 2024, 10:05 a.m. UTC | #5
On Sun, Jul 07, 2024 at 02:39:57PM +0300, Leon Romanovsky wrote:
> > > It is perfect time to move to use rdma_user_mmap_io(), instead of
> > > open-code it in the driver.
> > 
> > rdma_user_mmap_io does not work on dma coherent allocations.
> 
> They used dma_mmap_coherent() to implement workaround, original cxgb4
> didn't use it and probably doesn't need too.

dma_mmap_coherent must be paired with dma_alloc_coherent.
It seems like cxgb4 uses a c4iw_mm_entry as a sort of generic
containers for objects that can be mmaped which are used on first come,
first serve bassis in c4iw_mmap (WTF???).  Not questioning the sanity
of the higher level logic here, which is ABI by now, the right fix
is to tag each entry with what is being mmaped, DMA_ALLOC, vs
uncached ba vs WC bar and remove the guessing logic there.

While we're it, pgprot_writecombine is generally available,
t4_pgprot_wc should go away aswell.
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 5111421f9473..81cfc876fa89 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -1127,12 +1127,16 @@  int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 
 		mm->key = uresp.key;
 		mm->addr = virt_to_phys(chp->cq.queue);
+		mm->vaddr = chp->cq.queue;
+		mm->dma_addr = chp->cq.dma_addr;
 		mm->len = chp->cq.memsize;
 		insert_mmap(ucontext, mm);
 
 		mm2->key = uresp.gts_key;
 		mm2->addr = chp->cq.bar2_pa;
 		mm2->len = PAGE_SIZE;
+		mm2->vaddr = NULL;
+		mm2->dma_addr = 0;
 		insert_mmap(ucontext, mm2);
 	}
 
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index f838bb6718af..5eedc6cf0f8c 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -536,6 +536,8 @@  struct c4iw_mm_entry {
 	struct list_head entry;
 	u64 addr;
 	u32 key;
+	void *vaddr;
+	dma_addr_t dma_addr;
 	unsigned len;
 };
 
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 246b739ddb2b..6227775970c9 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -131,6 +131,10 @@  static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 	struct c4iw_mm_entry *mm;
 	struct c4iw_ucontext *ucontext;
 	u64 addr;
+	size_t size;
+	void *vaddr;
+	unsigned long vm_pgoff;
+	dma_addr_t dma_addr;
 
 	pr_debug("pgoff 0x%lx key 0x%x len %d\n", vma->vm_pgoff,
 		 key, len);
@@ -145,6 +149,9 @@  static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 	if (!mm)
 		return -EINVAL;
 	addr = mm->addr;
+	vaddr = mm->vaddr;
+	dma_addr = mm->dma_addr;
+	size = mm->len;
 	kfree(mm);
 
 	if ((addr >= pci_resource_start(rdev->lldi.pdev, 0)) &&
@@ -155,9 +162,17 @@  static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 		 * MA_SYNC register...
 		 */
 		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-		ret = io_remap_pfn_range(vma, vma->vm_start,
-					 addr >> PAGE_SHIFT,
-					 len, vma->vm_page_prot);
+		if (vaddr && is_vmalloc_addr(vaddr)) {
+			vm_pgoff = vma->vm_pgoff;
+			vma->vm_pgoff = 0;
+			ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
+						vaddr, dma_addr, size);
+			vma->vm_pgoff = vm_pgoff;
+		} else {
+			ret = io_remap_pfn_range(vma, vma->vm_start,
+						 addr >> PAGE_SHIFT,
+						 len, vma->vm_page_prot);
+		}
 	} else if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) &&
 		   (addr < (pci_resource_start(rdev->lldi.pdev, 2) +
 		    pci_resource_len(rdev->lldi.pdev, 2)))) {
@@ -175,17 +190,32 @@  static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 				vma->vm_page_prot =
 					pgprot_noncached(vma->vm_page_prot);
 		}
-		ret = io_remap_pfn_range(vma, vma->vm_start,
-					 addr >> PAGE_SHIFT,
-					 len, vma->vm_page_prot);
+		if (vaddr && is_vmalloc_addr(vaddr)) {
+			vm_pgoff = vma->vm_pgoff;
+			vma->vm_pgoff = 0;
+			ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
+						vaddr, dma_addr, size);
+			vma->vm_pgoff = vm_pgoff;
+		} else {
+			ret = io_remap_pfn_range(vma, vma->vm_start,
+						 addr >> PAGE_SHIFT,
+						 len, vma->vm_page_prot);
+		}
 	} else {
 
 		/*
 		 * Map WQ or CQ contig dma memory...
 		 */
-		ret = remap_pfn_range(vma, vma->vm_start,
-				      addr >> PAGE_SHIFT,
-				      len, vma->vm_page_prot);
+		if (vaddr && is_vmalloc_addr(vaddr)) {
+			vm_pgoff = vma->vm_pgoff;
+			vma->vm_pgoff = 0;
+			ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
+						vaddr, dma_addr, size);
+		} else {
+			ret = remap_pfn_range(vma, vma->vm_start,
+					      addr >> PAGE_SHIFT,
+					      len, vma->vm_page_prot);
+		}
 	}
 
 	return ret;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index d16d8eaa1415..3f6fb4b34d5a 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -2282,16 +2282,22 @@  int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
 			goto err_free_ma_sync_key;
 		sq_key_mm->key = uresp.sq_key;
 		sq_key_mm->addr = qhp->wq.sq.phys_addr;
+		sq_key_mm->vaddr = qhp->wq.sq.queue;
+		sq_key_mm->dma_addr = qhp->wq.sq.dma_addr;
 		sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize);
 		insert_mmap(ucontext, sq_key_mm);
 		if (!attrs->srq) {
 			rq_key_mm->key = uresp.rq_key;
 			rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
+			rq_key_mm->vaddr = qhp->wq.rq.queue;
+			rq_key_mm->dma_addr = qhp->wq.rq.dma_addr;
 			rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
 			insert_mmap(ucontext, rq_key_mm);
 		}
 		sq_db_key_mm->key = uresp.sq_db_gts_key;
 		sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa;
+		sq_db_key_mm->vaddr = NULL;
+		sq_db_key_mm->dma_addr = 0;
 		sq_db_key_mm->len = PAGE_SIZE;
 		insert_mmap(ucontext, sq_db_key_mm);
 		if (!attrs->srq) {
@@ -2299,6 +2305,8 @@  int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
 			rq_db_key_mm->addr =
 				(u64)(unsigned long)qhp->wq.rq.bar2_pa;
 			rq_db_key_mm->len = PAGE_SIZE;
+			rq_db_key_mm->vaddr = NULL;
+			rq_db_key_mm->dma_addr = 0;
 			insert_mmap(ucontext, rq_db_key_mm);
 		}
 		if (ma_sync_key_mm) {
@@ -2307,6 +2315,8 @@  int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
 				(pci_resource_start(rhp->rdev.lldi.pdev, 0) +
 				PCIE_MA_SYNC_A) & PAGE_MASK;
 			ma_sync_key_mm->len = PAGE_SIZE;
+			ma_sync_key_mm->vaddr = NULL;
+			ma_sync_key_mm->dma_addr = 0;
 			insert_mmap(ucontext, ma_sync_key_mm);
 		}
 
@@ -2763,10 +2773,14 @@  int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
 		srq_key_mm->key = uresp.srq_key;
 		srq_key_mm->addr = virt_to_phys(srq->wq.queue);
 		srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize);
+		srq_key_mm->vaddr = srq->wq.queue;
+		srq_key_mm->dma_addr = srq->wq.dma_addr;
 		insert_mmap(ucontext, srq_key_mm);
 		srq_db_key_mm->key = uresp.srq_db_gts_key;
 		srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa;
 		srq_db_key_mm->len = PAGE_SIZE;
+		srq_db_key_mm->vaddr = NULL;
+		srq_db_key_mm->dma_addr = 0;
 		insert_mmap(ucontext, srq_db_key_mm);
 	}