diff mbox series

RDMA/rxe: Replace page_address() with kmap_local_page()

Message ID 1668136765-34-1-git-send-email-yangx.jy@fujitsu.com (mailing list archive)
State Superseded
Headers show
Series RDMA/rxe: Replace page_address() with kmap_local_page() | expand

Commit Message

Xiao Yang Nov. 11, 2022, 3:19 a.m. UTC
1) Use kmap_local_page() for new in-kernel memory protection schemes.
2) Do some cleanup(e.g. remove struct rxe_phys_buf).

Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com>
---
 drivers/infiniband/sw/rxe/rxe_loc.h   |  2 +
 drivers/infiniband/sw/rxe/rxe_mr.c    | 84 +++++++++++++--------------
 drivers/infiniband/sw/rxe/rxe_resp.c  |  1 +
 drivers/infiniband/sw/rxe/rxe_verbs.c |  6 +-
 drivers/infiniband/sw/rxe/rxe_verbs.h |  9 +--
 5 files changed, 45 insertions(+), 57 deletions(-)

Comments

Yunsheng Lin Nov. 11, 2022, 3:34 a.m. UTC | #1
On 2022/11/11 11:19, Xiao Yang wrote:
> 1) Use kmap_local_page() for new in-kernel memory protection schemes.
> 2) Do some cleanup(e.g. remove struct rxe_phys_buf).

As the commit log above, it seems better to spilt it to two patches:
Patch 1: Do some cleanup
Patch 2: Use kmap_local_page()


Alas, does not pin_user_pages_fast() in ib_umem_get() ensure the
user memory is accessible in the kernel space, which means we
can use page_address() safely?


> 
> Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com>
> ---
>  drivers/infiniband/sw/rxe/rxe_loc.h   |  2 +
>  drivers/infiniband/sw/rxe/rxe_mr.c    | 84 +++++++++++++--------------
>  drivers/infiniband/sw/rxe/rxe_resp.c  |  1 +
>  drivers/infiniband/sw/rxe/rxe_verbs.c |  6 +-
>  drivers/infiniband/sw/rxe/rxe_verbs.h |  9 +--
>  5 files changed, 45 insertions(+), 57 deletions(-)
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
> index c2a5c8814a48..a63d29156a66 100644
> --- a/drivers/infiniband/sw/rxe/rxe_loc.h
> +++ b/drivers/infiniband/sw/rxe/rxe_loc.h
> @@ -68,6 +68,8 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr);
>  int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
>  		     int access, struct rxe_mr *mr);
>  int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
> +void *rxe_map_to_vaddr(struct rxe_mr *mr, int map_index, int addr_index, size_t offset);
> +void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr);
>  int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
>  		enum rxe_mr_copy_dir dir);
>  int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
> diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
> index bc081002bddc..4246b7f34a29 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mr.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mr.c
> @@ -115,13 +115,10 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
>  		     int access, struct rxe_mr *mr)
>  {
>  	struct rxe_map		**map;
> -	struct rxe_phys_buf	*buf = NULL;
>  	struct ib_umem		*umem;
>  	struct sg_page_iter	sg_iter;
>  	int			num_buf;
> -	void			*vaddr;
>  	int err;
> -	int i;
>  
>  	umem = ib_umem_get(&rxe->ib_dev, start, length, access);
>  	if (IS_ERR(umem)) {
> @@ -144,32 +141,19 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
>  
>  	mr->page_shift = PAGE_SHIFT;
>  	mr->page_mask = PAGE_SIZE - 1;
> +	mr->ibmr.page_size = PAGE_SIZE;
>  
> -	num_buf			= 0;
> +	num_buf = 0;
>  	map = mr->map;
>  	if (length > 0) {
> -		buf = map[0]->buf;
> -
>  		for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
>  			if (num_buf >= RXE_BUF_PER_MAP) {
>  				map++;
> -				buf = map[0]->buf;
>  				num_buf = 0;
>  			}
>  
> -			vaddr = page_address(sg_page_iter_page(&sg_iter));
> -			if (!vaddr) {
> -				pr_warn("%s: Unable to get virtual address\n",
> -						__func__);
> -				err = -ENOMEM;
> -				goto err_cleanup_map;
> -			}
> -
> -			buf->addr = (uintptr_t)vaddr;
> -			buf->size = PAGE_SIZE;
> +			map[0]->addrs[num_buf] = (uintptr_t)sg_page_iter_page(&sg_iter);
>  			num_buf++;
> -			buf++;
> -
>  		}
>  	}
>  
> @@ -181,10 +165,6 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
>  
>  	return 0;
>  
> -err_cleanup_map:
> -	for (i = 0; i < mr->num_map; i++)
> -		kfree(mr->map[i]);
> -	kfree(mr->map);
>  err_release_umem:
>  	ib_umem_release(umem);
>  err_out:
> @@ -216,9 +196,9 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
>  			size_t *offset_out)
>  {
>  	size_t offset = iova - mr->ibmr.iova + mr->offset;
> +	u64 length = mr->ibmr.page_size;
>  	int			map_index;
> -	int			buf_index;
> -	u64			length;
> +	int			addr_index;
>  
>  	if (likely(mr->page_shift)) {
>  		*offset_out = offset & mr->page_mask;
> @@ -227,27 +207,46 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
>  		*m_out = offset >> mr->map_shift;
>  	} else {
>  		map_index = 0;
> -		buf_index = 0;
> -
> -		length = mr->map[map_index]->buf[buf_index].size;
> +		addr_index = 0;
>  
>  		while (offset >= length) {
>  			offset -= length;
> -			buf_index++;
> +			addr_index++;
>  
> -			if (buf_index == RXE_BUF_PER_MAP) {
> +			if (addr_index == RXE_BUF_PER_MAP) {
>  				map_index++;
> -				buf_index = 0;
> +				addr_index = 0;
>  			}
> -			length = mr->map[map_index]->buf[buf_index].size;
>  		}
>  
>  		*m_out = map_index;
> -		*n_out = buf_index;
> +		*n_out = addr_index;
>  		*offset_out = offset;
>  	}
>  }
>  
> +void *rxe_map_to_vaddr(struct rxe_mr *mr, int map_index, int addr_index, size_t offset)
> +{
> +	void *vaddr = NULL;
> +
> +	if (mr->ibmr.type == IB_MR_TYPE_USER) {
> +		vaddr = kmap_local_page((struct page *)mr->map[map_index]->addrs[addr_index]);
> +		if (vaddr == NULL) {
> +			pr_warn("Failed to map page");
> +			return NULL;
> +		}
> +	} else
> +		vaddr = (void *)(uintptr_t)mr->map[map_index]->addrs[addr_index];
> +
> +	return vaddr + offset;
> +}
> +
> +void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr)
> +{
> +	if (mr->ibmr.type == IB_MR_TYPE_USER)
> +		kunmap_local(vaddr);
> +}
> +
>  void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
>  {
>  	size_t offset;
> @@ -273,13 +272,13 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
>  
>  	lookup_iova(mr, iova, &m, &n, &offset);
>  
> -	if (offset + length > mr->map[m]->buf[n].size) {
> +	if (offset + length > mr->ibmr.page_size) {
>  		pr_warn("crosses page boundary\n");
>  		addr = NULL;
>  		goto out;
>  	}
>  
> -	addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
> +	addr = rxe_map_to_vaddr(mr, m, n, offset);
>  
>  out:
>  	return addr;
> @@ -294,8 +293,6 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
>  	int			err;
>  	int			bytes;
>  	u8			*va;
> -	struct rxe_map		**map;
> -	struct rxe_phys_buf	*buf;
>  	int			m;
>  	int			i;
>  	size_t			offset;
> @@ -325,17 +322,14 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
>  
>  	lookup_iova(mr, iova, &m, &i, &offset);
>  
> -	map = mr->map + m;
> -	buf	= map[0]->buf + i;
> -
>  	while (length > 0) {
>  		u8 *src, *dest;
>  
> -		va	= (u8 *)(uintptr_t)buf->addr + offset;
> +		va = (u8 *)rxe_map_to_vaddr(mr, m, i, offset);
>  		src = (dir == RXE_TO_MR_OBJ) ? addr : va;
>  		dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
>  
> -		bytes	= buf->size - offset;
> +		bytes = mr->ibmr.page_size - offset;
>  
>  		if (bytes > length)
>  			bytes = length;
> @@ -346,14 +340,14 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
>  		addr	+= bytes;
>  
>  		offset	= 0;
> -		buf++;
>  		i++;
>  
>  		if (i == RXE_BUF_PER_MAP) {
>  			i = 0;
> -			map++;
> -			buf = map[0]->buf;
> +			m++;
>  		}
> +
> +		rxe_unmap_vaddr(mr, va);
>  	}
>  
>  	return 0;
> diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
> index c32bc12cc82f..31f9ba11a921 100644
> --- a/drivers/infiniband/sw/rxe/rxe_resp.c
> +++ b/drivers/infiniband/sw/rxe/rxe_resp.c
> @@ -652,6 +652,7 @@ static enum resp_states atomic_reply(struct rxe_qp *qp,
>  
>  	ret = RESPST_ACKNOWLEDGE;
>  out:
> +	rxe_unmap_vaddr(mr, vaddr);
>  	return ret;
>  }
>  
> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
> index bcdfdadaebbc..13e4d660cb02 100644
> --- a/drivers/infiniband/sw/rxe/rxe_verbs.c
> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
> @@ -948,16 +948,12 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
>  {
>  	struct rxe_mr *mr = to_rmr(ibmr);
>  	struct rxe_map *map;
> -	struct rxe_phys_buf *buf;
>  
>  	if (unlikely(mr->nbuf == mr->num_buf))
>  		return -ENOMEM;
>  
>  	map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];
> -	buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];
> -
> -	buf->addr = addr;
> -	buf->size = ibmr->page_size;
> +	map->addrs[mr->nbuf % RXE_BUF_PER_MAP] = addr;
>  	mr->nbuf++;
>  
>  	return 0;
> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
> index 22a299b0a9f0..d136f02d5b56 100644
> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
> @@ -277,15 +277,10 @@ enum rxe_mr_lookup_type {
>  	RXE_LOOKUP_REMOTE,
>  };
>  
> -#define RXE_BUF_PER_MAP		(PAGE_SIZE / sizeof(struct rxe_phys_buf))
> -
> -struct rxe_phys_buf {
> -	u64      addr;
> -	u64      size;
> -};
> +#define RXE_BUF_PER_MAP		(PAGE_SIZE / sizeof(u64))
>  
>  struct rxe_map {
> -	struct rxe_phys_buf	buf[RXE_BUF_PER_MAP];
> +	u64 addrs[RXE_BUF_PER_MAP];
>  };
>  
>  static inline int rkey_is_mw(u32 rkey)
>
Zhijian Li (Fujitsu) Nov. 11, 2022, 3:52 a.m. UTC | #2
Xiao,

What a coincidence!! i aslo had a similar patches do the same thing.
which would be more easy to be reviewed, i will post it as well. You are 
free to use any of this or ignore it.


Thanks
Zhijian




On 11/11/2022 11:19, Xiao Yang wrote:
> 1) Use kmap_local_page() for new in-kernel memory protection schemes.
> 2) Do some cleanup(e.g. remove struct rxe_phys_buf).
> 
> Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com>
> ---
>   drivers/infiniband/sw/rxe/rxe_loc.h   |  2 +
>   drivers/infiniband/sw/rxe/rxe_mr.c    | 84 +++++++++++++--------------
>   drivers/infiniband/sw/rxe/rxe_resp.c  |  1 +
>   drivers/infiniband/sw/rxe/rxe_verbs.c |  6 +-
>   drivers/infiniband/sw/rxe/rxe_verbs.h |  9 +--
>   5 files changed, 45 insertions(+), 57 deletions(-)
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
> index c2a5c8814a48..a63d29156a66 100644
> --- a/drivers/infiniband/sw/rxe/rxe_loc.h
> +++ b/drivers/infiniband/sw/rxe/rxe_loc.h
> @@ -68,6 +68,8 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr);
>   int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
>   		     int access, struct rxe_mr *mr);
>   int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
> +void *rxe_map_to_vaddr(struct rxe_mr *mr, int map_index, int addr_index, size_t offset);
> +void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr);
>   int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
>   		enum rxe_mr_copy_dir dir);
>   int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
> diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
> index bc081002bddc..4246b7f34a29 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mr.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mr.c
> @@ -115,13 +115,10 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
>   		     int access, struct rxe_mr *mr)
>   {
>   	struct rxe_map		**map;
> -	struct rxe_phys_buf	*buf = NULL;
>   	struct ib_umem		*umem;
>   	struct sg_page_iter	sg_iter;
>   	int			num_buf;
> -	void			*vaddr;
>   	int err;
> -	int i;
>   
>   	umem = ib_umem_get(&rxe->ib_dev, start, length, access);
>   	if (IS_ERR(umem)) {
> @@ -144,32 +141,19 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
>   
>   	mr->page_shift = PAGE_SHIFT;
>   	mr->page_mask = PAGE_SIZE - 1;
> +	mr->ibmr.page_size = PAGE_SIZE;
>   
> -	num_buf			= 0;
> +	num_buf = 0;
>   	map = mr->map;
>   	if (length > 0) {
> -		buf = map[0]->buf;
> -
>   		for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
>   			if (num_buf >= RXE_BUF_PER_MAP) {
>   				map++;
> -				buf = map[0]->buf;
>   				num_buf = 0;
>   			}
>   
> -			vaddr = page_address(sg_page_iter_page(&sg_iter));
> -			if (!vaddr) {
> -				pr_warn("%s: Unable to get virtual address\n",
> -						__func__);
> -				err = -ENOMEM;
> -				goto err_cleanup_map;
> -			}
> -
> -			buf->addr = (uintptr_t)vaddr;
> -			buf->size = PAGE_SIZE;
> +			map[0]->addrs[num_buf] = (uintptr_t)sg_page_iter_page(&sg_iter);
>   			num_buf++;
> -			buf++;
> -
>   		}
>   	}
>   
> @@ -181,10 +165,6 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
>   
>   	return 0;
>   
> -err_cleanup_map:
> -	for (i = 0; i < mr->num_map; i++)
> -		kfree(mr->map[i]);
> -	kfree(mr->map);
>   err_release_umem:
>   	ib_umem_release(umem);
>   err_out:
> @@ -216,9 +196,9 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
>   			size_t *offset_out)
>   {
>   	size_t offset = iova - mr->ibmr.iova + mr->offset;
> +	u64 length = mr->ibmr.page_size;
>   	int			map_index;
> -	int			buf_index;
> -	u64			length;
> +	int			addr_index;
>   
>   	if (likely(mr->page_shift)) {
>   		*offset_out = offset & mr->page_mask;
> @@ -227,27 +207,46 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
>   		*m_out = offset >> mr->map_shift;
>   	} else {
>   		map_index = 0;
> -		buf_index = 0;
> -
> -		length = mr->map[map_index]->buf[buf_index].size;
> +		addr_index = 0;
>   
>   		while (offset >= length) {
>   			offset -= length;
> -			buf_index++;
> +			addr_index++;
>   
> -			if (buf_index == RXE_BUF_PER_MAP) {
> +			if (addr_index == RXE_BUF_PER_MAP) {
>   				map_index++;
> -				buf_index = 0;
> +				addr_index = 0;
>   			}
> -			length = mr->map[map_index]->buf[buf_index].size;
>   		}
>   
>   		*m_out = map_index;
> -		*n_out = buf_index;
> +		*n_out = addr_index;
>   		*offset_out = offset;
>   	}
>   }
>   
> +void *rxe_map_to_vaddr(struct rxe_mr *mr, int map_index, int addr_index, size_t offset)
> +{
> +	void *vaddr = NULL;
> +
> +	if (mr->ibmr.type == IB_MR_TYPE_USER) {
> +		vaddr = kmap_local_page((struct page *)mr->map[map_index]->addrs[addr_index]);
> +		if (vaddr == NULL) {
> +			pr_warn("Failed to map page");
> +			return NULL;
> +		}
> +	} else
> +		vaddr = (void *)(uintptr_t)mr->map[map_index]->addrs[addr_index];
> +
> +	return vaddr + offset;
> +}
> +
> +void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr)
> +{
> +	if (mr->ibmr.type == IB_MR_TYPE_USER)
> +		kunmap_local(vaddr);
> +}
> +
>   void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
>   {
>   	size_t offset;
> @@ -273,13 +272,13 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
>   
>   	lookup_iova(mr, iova, &m, &n, &offset);
>   
> -	if (offset + length > mr->map[m]->buf[n].size) {
> +	if (offset + length > mr->ibmr.page_size) {
>   		pr_warn("crosses page boundary\n");
>   		addr = NULL;
>   		goto out;
>   	}
>   
> -	addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
> +	addr = rxe_map_to_vaddr(mr, m, n, offset);
>   
>   out:
>   	return addr;
> @@ -294,8 +293,6 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
>   	int			err;
>   	int			bytes;
>   	u8			*va;
> -	struct rxe_map		**map;
> -	struct rxe_phys_buf	*buf;
>   	int			m;
>   	int			i;
>   	size_t			offset;
> @@ -325,17 +322,14 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
>   
>   	lookup_iova(mr, iova, &m, &i, &offset);
>   
> -	map = mr->map + m;
> -	buf	= map[0]->buf + i;
> -
>   	while (length > 0) {
>   		u8 *src, *dest;
>   
> -		va	= (u8 *)(uintptr_t)buf->addr + offset;
> +		va = (u8 *)rxe_map_to_vaddr(mr, m, i, offset);
>   		src = (dir == RXE_TO_MR_OBJ) ? addr : va;
>   		dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
>   
> -		bytes	= buf->size - offset;
> +		bytes = mr->ibmr.page_size - offset;
>   
>   		if (bytes > length)
>   			bytes = length;
> @@ -346,14 +340,14 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
>   		addr	+= bytes;
>   
>   		offset	= 0;
> -		buf++;
>   		i++;
>   
>   		if (i == RXE_BUF_PER_MAP) {
>   			i = 0;
> -			map++;
> -			buf = map[0]->buf;
> +			m++;
>   		}
> +
> +		rxe_unmap_vaddr(mr, va);
>   	}
>   
>   	return 0;
> diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
> index c32bc12cc82f..31f9ba11a921 100644
> --- a/drivers/infiniband/sw/rxe/rxe_resp.c
> +++ b/drivers/infiniband/sw/rxe/rxe_resp.c
> @@ -652,6 +652,7 @@ static enum resp_states atomic_reply(struct rxe_qp *qp,
>   
>   	ret = RESPST_ACKNOWLEDGE;
>   out:
> +	rxe_unmap_vaddr(mr, vaddr);
>   	return ret;
>   }
>   
> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
> index bcdfdadaebbc..13e4d660cb02 100644
> --- a/drivers/infiniband/sw/rxe/rxe_verbs.c
> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
> @@ -948,16 +948,12 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
>   {
>   	struct rxe_mr *mr = to_rmr(ibmr);
>   	struct rxe_map *map;
> -	struct rxe_phys_buf *buf;
>   
>   	if (unlikely(mr->nbuf == mr->num_buf))
>   		return -ENOMEM;
>   
>   	map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];
> -	buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];
> -
> -	buf->addr = addr;
> -	buf->size = ibmr->page_size;
> +	map->addrs[mr->nbuf % RXE_BUF_PER_MAP] = addr;
>   	mr->nbuf++;
>   
>   	return 0;
> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
> index 22a299b0a9f0..d136f02d5b56 100644
> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
> @@ -277,15 +277,10 @@ enum rxe_mr_lookup_type {
>   	RXE_LOOKUP_REMOTE,
>   };
>   
> -#define RXE_BUF_PER_MAP		(PAGE_SIZE / sizeof(struct rxe_phys_buf))
> -
> -struct rxe_phys_buf {
> -	u64      addr;
> -	u64      size;
> -};
> +#define RXE_BUF_PER_MAP		(PAGE_SIZE / sizeof(u64))
>   
>   struct rxe_map {
> -	struct rxe_phys_buf	buf[RXE_BUF_PER_MAP];
> +	u64 addrs[RXE_BUF_PER_MAP];
>   };
>   
>   static inline int rkey_is_mw(u32 rkey)
Xiao Yang Nov. 11, 2022, 6:07 a.m. UTC | #3
On 2022/11/11 11:34, Yunsheng Lin wrote:
> On 2022/11/11 11:19, Xiao Yang wrote:
>> 1) Use kmap_local_page() for new in-kernel memory protection schemes.
>> 2) Do some cleanup(e.g. remove struct rxe_phys_buf).
> 
> As the commit log above, it seems better to spilt it to two patches:
> Patch 1: Do some cleanup
> Patch 2: Use kmap_local_page()

Hi Lin,

It's fine for me to split the patchset. I will send v2 soon.

> 
> 
> Alas, does not pin_user_pages_fast() in ib_umem_get() ensure the
> user memory is accessible in the kernel space, which means we
> can use page_address() safely?

Now it's safe to call page_address() here but it will be broken when new 
in-kernel memory protection schemes(e.g. PKS[1]) are applied in the future.

[1]: 
https://lore.kernel.org/lkml/20220419170649.1022246-1-ira.weiny@intel.com/

Jason suggested that we should replace page_address() with 
kmap_local_page(), please see the detailed discussion[2][3]

[2]: 
https://lore.kernel.org/linux-rdma/20220121160654.GC773547@iweiny-DESK2.sc.intel.com/
[3]: https://lore.kernel.org/linux-rdma/Y1bFG%2FOM5zSOoWcr@nvidia.com/

Best Regards,
Xiao Yang
> 
> 
>>
>> Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com>
>> ---
>>   drivers/infiniband/sw/rxe/rxe_loc.h   |  2 +
>>   drivers/infiniband/sw/rxe/rxe_mr.c    | 84 +++++++++++++--------------
>>   drivers/infiniband/sw/rxe/rxe_resp.c  |  1 +
>>   drivers/infiniband/sw/rxe/rxe_verbs.c |  6 +-
>>   drivers/infiniband/sw/rxe/rxe_verbs.h |  9 +--
>>   5 files changed, 45 insertions(+), 57 deletions(-)
>>
>> diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
>> index c2a5c8814a48..a63d29156a66 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_loc.h
>> +++ b/drivers/infiniband/sw/rxe/rxe_loc.h
>> @@ -68,6 +68,8 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr);
>>   int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
>>   		     int access, struct rxe_mr *mr);
>>   int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
>> +void *rxe_map_to_vaddr(struct rxe_mr *mr, int map_index, int addr_index, size_t offset);
>> +void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr);
>>   int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
>>   		enum rxe_mr_copy_dir dir);
>>   int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
>> diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
>> index bc081002bddc..4246b7f34a29 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_mr.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_mr.c
>> @@ -115,13 +115,10 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
>>   		     int access, struct rxe_mr *mr)
>>   {
>>   	struct rxe_map		**map;
>> -	struct rxe_phys_buf	*buf = NULL;
>>   	struct ib_umem		*umem;
>>   	struct sg_page_iter	sg_iter;
>>   	int			num_buf;
>> -	void			*vaddr;
>>   	int err;
>> -	int i;
>>   
>>   	umem = ib_umem_get(&rxe->ib_dev, start, length, access);
>>   	if (IS_ERR(umem)) {
>> @@ -144,32 +141,19 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
>>   
>>   	mr->page_shift = PAGE_SHIFT;
>>   	mr->page_mask = PAGE_SIZE - 1;
>> +	mr->ibmr.page_size = PAGE_SIZE;
>>   
>> -	num_buf			= 0;
>> +	num_buf = 0;
>>   	map = mr->map;
>>   	if (length > 0) {
>> -		buf = map[0]->buf;
>> -
>>   		for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
>>   			if (num_buf >= RXE_BUF_PER_MAP) {
>>   				map++;
>> -				buf = map[0]->buf;
>>   				num_buf = 0;
>>   			}
>>   
>> -			vaddr = page_address(sg_page_iter_page(&sg_iter));
>> -			if (!vaddr) {
>> -				pr_warn("%s: Unable to get virtual address\n",
>> -						__func__);
>> -				err = -ENOMEM;
>> -				goto err_cleanup_map;
>> -			}
>> -
>> -			buf->addr = (uintptr_t)vaddr;
>> -			buf->size = PAGE_SIZE;
>> +			map[0]->addrs[num_buf] = (uintptr_t)sg_page_iter_page(&sg_iter);
>>   			num_buf++;
>> -			buf++;
>> -
>>   		}
>>   	}
>>   
>> @@ -181,10 +165,6 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
>>   
>>   	return 0;
>>   
>> -err_cleanup_map:
>> -	for (i = 0; i < mr->num_map; i++)
>> -		kfree(mr->map[i]);
>> -	kfree(mr->map);
>>   err_release_umem:
>>   	ib_umem_release(umem);
>>   err_out:
>> @@ -216,9 +196,9 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
>>   			size_t *offset_out)
>>   {
>>   	size_t offset = iova - mr->ibmr.iova + mr->offset;
>> +	u64 length = mr->ibmr.page_size;
>>   	int			map_index;
>> -	int			buf_index;
>> -	u64			length;
>> +	int			addr_index;
>>   
>>   	if (likely(mr->page_shift)) {
>>   		*offset_out = offset & mr->page_mask;
>> @@ -227,27 +207,46 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
>>   		*m_out = offset >> mr->map_shift;
>>   	} else {
>>   		map_index = 0;
>> -		buf_index = 0;
>> -
>> -		length = mr->map[map_index]->buf[buf_index].size;
>> +		addr_index = 0;
>>   
>>   		while (offset >= length) {
>>   			offset -= length;
>> -			buf_index++;
>> +			addr_index++;
>>   
>> -			if (buf_index == RXE_BUF_PER_MAP) {
>> +			if (addr_index == RXE_BUF_PER_MAP) {
>>   				map_index++;
>> -				buf_index = 0;
>> +				addr_index = 0;
>>   			}
>> -			length = mr->map[map_index]->buf[buf_index].size;
>>   		}
>>   
>>   		*m_out = map_index;
>> -		*n_out = buf_index;
>> +		*n_out = addr_index;
>>   		*offset_out = offset;
>>   	}
>>   }
>>   
>> +void *rxe_map_to_vaddr(struct rxe_mr *mr, int map_index, int addr_index, size_t offset)
>> +{
>> +	void *vaddr = NULL;
>> +
>> +	if (mr->ibmr.type == IB_MR_TYPE_USER) {
>> +		vaddr = kmap_local_page((struct page *)mr->map[map_index]->addrs[addr_index]);
>> +		if (vaddr == NULL) {
>> +			pr_warn("Failed to map page");
>> +			return NULL;
>> +		}
>> +	} else
>> +		vaddr = (void *)(uintptr_t)mr->map[map_index]->addrs[addr_index];
>> +
>> +	return vaddr + offset;
>> +}
>> +
>> +void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr)
>> +{
>> +	if (mr->ibmr.type == IB_MR_TYPE_USER)
>> +		kunmap_local(vaddr);
>> +}
>> +
>>   void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
>>   {
>>   	size_t offset;
>> @@ -273,13 +272,13 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
>>   
>>   	lookup_iova(mr, iova, &m, &n, &offset);
>>   
>> -	if (offset + length > mr->map[m]->buf[n].size) {
>> +	if (offset + length > mr->ibmr.page_size) {
>>   		pr_warn("crosses page boundary\n");
>>   		addr = NULL;
>>   		goto out;
>>   	}
>>   
>> -	addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
>> +	addr = rxe_map_to_vaddr(mr, m, n, offset);
>>   
>>   out:
>>   	return addr;
>> @@ -294,8 +293,6 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
>>   	int			err;
>>   	int			bytes;
>>   	u8			*va;
>> -	struct rxe_map		**map;
>> -	struct rxe_phys_buf	*buf;
>>   	int			m;
>>   	int			i;
>>   	size_t			offset;
>> @@ -325,17 +322,14 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
>>   
>>   	lookup_iova(mr, iova, &m, &i, &offset);
>>   
>> -	map = mr->map + m;
>> -	buf	= map[0]->buf + i;
>> -
>>   	while (length > 0) {
>>   		u8 *src, *dest;
>>   
>> -		va	= (u8 *)(uintptr_t)buf->addr + offset;
>> +		va = (u8 *)rxe_map_to_vaddr(mr, m, i, offset);
>>   		src = (dir == RXE_TO_MR_OBJ) ? addr : va;
>>   		dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
>>   
>> -		bytes	= buf->size - offset;
>> +		bytes = mr->ibmr.page_size - offset;
>>   
>>   		if (bytes > length)
>>   			bytes = length;
>> @@ -346,14 +340,14 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
>>   		addr	+= bytes;
>>   
>>   		offset	= 0;
>> -		buf++;
>>   		i++;
>>   
>>   		if (i == RXE_BUF_PER_MAP) {
>>   			i = 0;
>> -			map++;
>> -			buf = map[0]->buf;
>> +			m++;
>>   		}
>> +
>> +		rxe_unmap_vaddr(mr, va);
>>   	}
>>   
>>   	return 0;
>> diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
>> index c32bc12cc82f..31f9ba11a921 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_resp.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_resp.c
>> @@ -652,6 +652,7 @@ static enum resp_states atomic_reply(struct rxe_qp *qp,
>>   
>>   	ret = RESPST_ACKNOWLEDGE;
>>   out:
>> +	rxe_unmap_vaddr(mr, vaddr);
>>   	return ret;
>>   }
>>   
>> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
>> index bcdfdadaebbc..13e4d660cb02 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_verbs.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
>> @@ -948,16 +948,12 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
>>   {
>>   	struct rxe_mr *mr = to_rmr(ibmr);
>>   	struct rxe_map *map;
>> -	struct rxe_phys_buf *buf;
>>   
>>   	if (unlikely(mr->nbuf == mr->num_buf))
>>   		return -ENOMEM;
>>   
>>   	map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];
>> -	buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];
>> -
>> -	buf->addr = addr;
>> -	buf->size = ibmr->page_size;
>> +	map->addrs[mr->nbuf % RXE_BUF_PER_MAP] = addr;
>>   	mr->nbuf++;
>>   
>>   	return 0;
>> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
>> index 22a299b0a9f0..d136f02d5b56 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
>> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
>> @@ -277,15 +277,10 @@ enum rxe_mr_lookup_type {
>>   	RXE_LOOKUP_REMOTE,
>>   };
>>   
>> -#define RXE_BUF_PER_MAP		(PAGE_SIZE / sizeof(struct rxe_phys_buf))
>> -
>> -struct rxe_phys_buf {
>> -	u64      addr;
>> -	u64      size;
>> -};
>> +#define RXE_BUF_PER_MAP		(PAGE_SIZE / sizeof(u64))
>>   
>>   struct rxe_map {
>> -	struct rxe_phys_buf	buf[RXE_BUF_PER_MAP];
>> +	u64 addrs[RXE_BUF_PER_MAP];
>>   };
>>   
>>   static inline int rkey_is_mw(u32 rkey)
>>
Xiao Yang Nov. 11, 2022, 8:10 a.m. UTC | #4
On 2022/11/11 11:52, Li, Zhijian/李 ζ™Ίεš wrote:
> Xiao,
> 
> What a coincidence!! i aslo had a similar patches do the same thing.
> which would be more easy to be reviewed, i will post it as well. You are
> free to use any of this or ignore it.

Hi Zhijian,

Thanks a lot for your reference.

Li Zhijian (5):
   RDMA/rxe: Remove rxe_phys_buf.size
   -> It has been included in my first patch[1].
   RDMA/rxe: use iova_to_vaddr to transform iova for rxe_mr_copy
   -> I don't want to do the check (e.g. mr_check_range()) in loops.
   RDMA/rxe: iova_to_vaddr cleanup
   RDMA/rxe: refactor iova_to_vaddr
   RDMA/rxe: Rename iova_to_vaddr to rxe_map_iova
   -> I think these three are similar to my second patch[2].

[1]: 
https://lore.kernel.org/linux-rdma/1668153085-15-1-git-send-email-yangx.jy@fujitsu.com/T/#t
[2]: 
https://lore.kernel.org/linux-rdma/1668153085-15-2-git-send-email-yangx.jy@fujitsu.com/T/#u

Best Regards,
Xiao Yang

> 
> 
> Thanks
> Zhijian
> 
> 
> 
> 
> On 11/11/2022 11:19, Xiao Yang wrote:
>> 1) Use kmap_local_page() for new in-kernel memory protection schemes.
>> 2) Do some cleanup(e.g. remove struct rxe_phys_buf).
>>
>> Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com>
>> ---
>>    drivers/infiniband/sw/rxe/rxe_loc.h   |  2 +
>>    drivers/infiniband/sw/rxe/rxe_mr.c    | 84 +++++++++++++--------------
>>    drivers/infiniband/sw/rxe/rxe_resp.c  |  1 +
>>    drivers/infiniband/sw/rxe/rxe_verbs.c |  6 +-
>>    drivers/infiniband/sw/rxe/rxe_verbs.h |  9 +--
>>    5 files changed, 45 insertions(+), 57 deletions(-)
>>
>> diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
>> index c2a5c8814a48..a63d29156a66 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_loc.h
>> +++ b/drivers/infiniband/sw/rxe/rxe_loc.h
>> @@ -68,6 +68,8 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr);
>>    int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
>>    		     int access, struct rxe_mr *mr);
>>    int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
>> +void *rxe_map_to_vaddr(struct rxe_mr *mr, int map_index, int addr_index, size_t offset);
>> +void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr);
>>    int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
>>    		enum rxe_mr_copy_dir dir);
>>    int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
>> diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
>> index bc081002bddc..4246b7f34a29 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_mr.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_mr.c
>> @@ -115,13 +115,10 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
>>    		     int access, struct rxe_mr *mr)
>>    {
>>    	struct rxe_map		**map;
>> -	struct rxe_phys_buf	*buf = NULL;
>>    	struct ib_umem		*umem;
>>    	struct sg_page_iter	sg_iter;
>>    	int			num_buf;
>> -	void			*vaddr;
>>    	int err;
>> -	int i;
>>    
>>    	umem = ib_umem_get(&rxe->ib_dev, start, length, access);
>>    	if (IS_ERR(umem)) {
>> @@ -144,32 +141,19 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
>>    
>>    	mr->page_shift = PAGE_SHIFT;
>>    	mr->page_mask = PAGE_SIZE - 1;
>> +	mr->ibmr.page_size = PAGE_SIZE;
>>    
>> -	num_buf			= 0;
>> +	num_buf = 0;
>>    	map = mr->map;
>>    	if (length > 0) {
>> -		buf = map[0]->buf;
>> -
>>    		for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
>>    			if (num_buf >= RXE_BUF_PER_MAP) {
>>    				map++;
>> -				buf = map[0]->buf;
>>    				num_buf = 0;
>>    			}
>>    
>> -			vaddr = page_address(sg_page_iter_page(&sg_iter));
>> -			if (!vaddr) {
>> -				pr_warn("%s: Unable to get virtual address\n",
>> -						__func__);
>> -				err = -ENOMEM;
>> -				goto err_cleanup_map;
>> -			}
>> -
>> -			buf->addr = (uintptr_t)vaddr;
>> -			buf->size = PAGE_SIZE;
>> +			map[0]->addrs[num_buf] = (uintptr_t)sg_page_iter_page(&sg_iter);
>>    			num_buf++;
>> -			buf++;
>> -
>>    		}
>>    	}
>>    
>> @@ -181,10 +165,6 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
>>    
>>    	return 0;
>>    
>> -err_cleanup_map:
>> -	for (i = 0; i < mr->num_map; i++)
>> -		kfree(mr->map[i]);
>> -	kfree(mr->map);
>>    err_release_umem:
>>    	ib_umem_release(umem);
>>    err_out:
>> @@ -216,9 +196,9 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
>>    			size_t *offset_out)
>>    {
>>    	size_t offset = iova - mr->ibmr.iova + mr->offset;
>> +	u64 length = mr->ibmr.page_size;
>>    	int			map_index;
>> -	int			buf_index;
>> -	u64			length;
>> +	int			addr_index;
>>    
>>    	if (likely(mr->page_shift)) {
>>    		*offset_out = offset & mr->page_mask;
>> @@ -227,27 +207,46 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
>>    		*m_out = offset >> mr->map_shift;
>>    	} else {
>>    		map_index = 0;
>> -		buf_index = 0;
>> -
>> -		length = mr->map[map_index]->buf[buf_index].size;
>> +		addr_index = 0;
>>    
>>    		while (offset >= length) {
>>    			offset -= length;
>> -			buf_index++;
>> +			addr_index++;
>>    
>> -			if (buf_index == RXE_BUF_PER_MAP) {
>> +			if (addr_index == RXE_BUF_PER_MAP) {
>>    				map_index++;
>> -				buf_index = 0;
>> +				addr_index = 0;
>>    			}
>> -			length = mr->map[map_index]->buf[buf_index].size;
>>    		}
>>    
>>    		*m_out = map_index;
>> -		*n_out = buf_index;
>> +		*n_out = addr_index;
>>    		*offset_out = offset;
>>    	}
>>    }
>>    
>> +void *rxe_map_to_vaddr(struct rxe_mr *mr, int map_index, int addr_index, size_t offset)
>> +{
>> +	void *vaddr = NULL;
>> +
>> +	if (mr->ibmr.type == IB_MR_TYPE_USER) {
>> +		vaddr = kmap_local_page((struct page *)mr->map[map_index]->addrs[addr_index]);
>> +		if (vaddr == NULL) {
>> +			pr_warn("Failed to map page");
>> +			return NULL;
>> +		}
>> +	} else
>> +		vaddr = (void *)(uintptr_t)mr->map[map_index]->addrs[addr_index];
>> +
>> +	return vaddr + offset;
>> +}
>> +
>> +void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr)
>> +{
>> +	if (mr->ibmr.type == IB_MR_TYPE_USER)
>> +		kunmap_local(vaddr);
>> +}
>> +
>>    void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
>>    {
>>    	size_t offset;
>> @@ -273,13 +272,13 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
>>    
>>    	lookup_iova(mr, iova, &m, &n, &offset);
>>    
>> -	if (offset + length > mr->map[m]->buf[n].size) {
>> +	if (offset + length > mr->ibmr.page_size) {
>>    		pr_warn("crosses page boundary\n");
>>    		addr = NULL;
>>    		goto out;
>>    	}
>>    
>> -	addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
>> +	addr = rxe_map_to_vaddr(mr, m, n, offset);
>>    
>>    out:
>>    	return addr;
>> @@ -294,8 +293,6 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
>>    	int			err;
>>    	int			bytes;
>>    	u8			*va;
>> -	struct rxe_map		**map;
>> -	struct rxe_phys_buf	*buf;
>>    	int			m;
>>    	int			i;
>>    	size_t			offset;
>> @@ -325,17 +322,14 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
>>    
>>    	lookup_iova(mr, iova, &m, &i, &offset);
>>    
>> -	map = mr->map + m;
>> -	buf	= map[0]->buf + i;
>> -
>>    	while (length > 0) {
>>    		u8 *src, *dest;
>>    
>> -		va	= (u8 *)(uintptr_t)buf->addr + offset;
>> +		va = (u8 *)rxe_map_to_vaddr(mr, m, i, offset);
>>    		src = (dir == RXE_TO_MR_OBJ) ? addr : va;
>>    		dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
>>    
>> -		bytes	= buf->size - offset;
>> +		bytes = mr->ibmr.page_size - offset;
>>    
>>    		if (bytes > length)
>>    			bytes = length;
>> @@ -346,14 +340,14 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
>>    		addr	+= bytes;
>>    
>>    		offset	= 0;
>> -		buf++;
>>    		i++;
>>    
>>    		if (i == RXE_BUF_PER_MAP) {
>>    			i = 0;
>> -			map++;
>> -			buf = map[0]->buf;
>> +			m++;
>>    		}
>> +
>> +		rxe_unmap_vaddr(mr, va);
>>    	}
>>    
>>    	return 0;
>> diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
>> index c32bc12cc82f..31f9ba11a921 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_resp.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_resp.c
>> @@ -652,6 +652,7 @@ static enum resp_states atomic_reply(struct rxe_qp *qp,
>>    
>>    	ret = RESPST_ACKNOWLEDGE;
>>    out:
>> +	rxe_unmap_vaddr(mr, vaddr);
>>    	return ret;
>>    }
>>    
>> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
>> index bcdfdadaebbc..13e4d660cb02 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_verbs.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
>> @@ -948,16 +948,12 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
>>    {
>>    	struct rxe_mr *mr = to_rmr(ibmr);
>>    	struct rxe_map *map;
>> -	struct rxe_phys_buf *buf;
>>    
>>    	if (unlikely(mr->nbuf == mr->num_buf))
>>    		return -ENOMEM;
>>    
>>    	map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];
>> -	buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];
>> -
>> -	buf->addr = addr;
>> -	buf->size = ibmr->page_size;
>> +	map->addrs[mr->nbuf % RXE_BUF_PER_MAP] = addr;
>>    	mr->nbuf++;
>>    
>>    	return 0;
>> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
>> index 22a299b0a9f0..d136f02d5b56 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
>> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
>> @@ -277,15 +277,10 @@ enum rxe_mr_lookup_type {
>>    	RXE_LOOKUP_REMOTE,
>>    };
>>    
>> -#define RXE_BUF_PER_MAP		(PAGE_SIZE / sizeof(struct rxe_phys_buf))
>> -
>> -struct rxe_phys_buf {
>> -	u64      addr;
>> -	u64      size;
>> -};
>> +#define RXE_BUF_PER_MAP		(PAGE_SIZE / sizeof(u64))
>>    
>>    struct rxe_map {
>> -	struct rxe_phys_buf	buf[RXE_BUF_PER_MAP];
>> +	u64 addrs[RXE_BUF_PER_MAP];
>>    };
>>    
>>    static inline int rkey_is_mw(u32 rkey)
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index c2a5c8814a48..a63d29156a66 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -68,6 +68,8 @@  void rxe_mr_init_dma(int access, struct rxe_mr *mr);
 int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
 		     int access, struct rxe_mr *mr);
 int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
+void *rxe_map_to_vaddr(struct rxe_mr *mr, int map_index, int addr_index, size_t offset);
+void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr);
 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 		enum rxe_mr_copy_dir dir);
 int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index bc081002bddc..4246b7f34a29 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -115,13 +115,10 @@  int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
 		     int access, struct rxe_mr *mr)
 {
 	struct rxe_map		**map;
-	struct rxe_phys_buf	*buf = NULL;
 	struct ib_umem		*umem;
 	struct sg_page_iter	sg_iter;
 	int			num_buf;
-	void			*vaddr;
 	int err;
-	int i;
 
 	umem = ib_umem_get(&rxe->ib_dev, start, length, access);
 	if (IS_ERR(umem)) {
@@ -144,32 +141,19 @@  int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
 
 	mr->page_shift = PAGE_SHIFT;
 	mr->page_mask = PAGE_SIZE - 1;
+	mr->ibmr.page_size = PAGE_SIZE;
 
-	num_buf			= 0;
+	num_buf = 0;
 	map = mr->map;
 	if (length > 0) {
-		buf = map[0]->buf;
-
 		for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
 			if (num_buf >= RXE_BUF_PER_MAP) {
 				map++;
-				buf = map[0]->buf;
 				num_buf = 0;
 			}
 
-			vaddr = page_address(sg_page_iter_page(&sg_iter));
-			if (!vaddr) {
-				pr_warn("%s: Unable to get virtual address\n",
-						__func__);
-				err = -ENOMEM;
-				goto err_cleanup_map;
-			}
-
-			buf->addr = (uintptr_t)vaddr;
-			buf->size = PAGE_SIZE;
+			map[0]->addrs[num_buf] = (uintptr_t)sg_page_iter_page(&sg_iter);
 			num_buf++;
-			buf++;
-
 		}
 	}
 
@@ -181,10 +165,6 @@  int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
 
 	return 0;
 
-err_cleanup_map:
-	for (i = 0; i < mr->num_map; i++)
-		kfree(mr->map[i]);
-	kfree(mr->map);
 err_release_umem:
 	ib_umem_release(umem);
 err_out:
@@ -216,9 +196,9 @@  static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
 			size_t *offset_out)
 {
 	size_t offset = iova - mr->ibmr.iova + mr->offset;
+	u64 length = mr->ibmr.page_size;
 	int			map_index;
-	int			buf_index;
-	u64			length;
+	int			addr_index;
 
 	if (likely(mr->page_shift)) {
 		*offset_out = offset & mr->page_mask;
@@ -227,27 +207,46 @@  static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
 		*m_out = offset >> mr->map_shift;
 	} else {
 		map_index = 0;
-		buf_index = 0;
-
-		length = mr->map[map_index]->buf[buf_index].size;
+		addr_index = 0;
 
 		while (offset >= length) {
 			offset -= length;
-			buf_index++;
+			addr_index++;
 
-			if (buf_index == RXE_BUF_PER_MAP) {
+			if (addr_index == RXE_BUF_PER_MAP) {
 				map_index++;
-				buf_index = 0;
+				addr_index = 0;
 			}
-			length = mr->map[map_index]->buf[buf_index].size;
 		}
 
 		*m_out = map_index;
-		*n_out = buf_index;
+		*n_out = addr_index;
 		*offset_out = offset;
 	}
 }
 
+void *rxe_map_to_vaddr(struct rxe_mr *mr, int map_index, int addr_index, size_t offset)
+{
+	void *vaddr = NULL;
+
+	if (mr->ibmr.type == IB_MR_TYPE_USER) {
+		vaddr = kmap_local_page((struct page *)mr->map[map_index]->addrs[addr_index]);
+		if (vaddr == NULL) {
+			pr_warn("Failed to map page");
+			return NULL;
+		}
+	} else
+		vaddr = (void *)(uintptr_t)mr->map[map_index]->addrs[addr_index];
+
+	return vaddr + offset;
+}
+
+void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr)
+{
+	if (mr->ibmr.type == IB_MR_TYPE_USER)
+		kunmap_local(vaddr);
+}
+
 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
 {
 	size_t offset;
@@ -273,13 +272,13 @@  void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
 
 	lookup_iova(mr, iova, &m, &n, &offset);
 
-	if (offset + length > mr->map[m]->buf[n].size) {
+	if (offset + length > mr->ibmr.page_size) {
 		pr_warn("crosses page boundary\n");
 		addr = NULL;
 		goto out;
 	}
 
-	addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
+	addr = rxe_map_to_vaddr(mr, m, n, offset);
 
 out:
 	return addr;
@@ -294,8 +293,6 @@  int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 	int			err;
 	int			bytes;
 	u8			*va;
-	struct rxe_map		**map;
-	struct rxe_phys_buf	*buf;
 	int			m;
 	int			i;
 	size_t			offset;
@@ -325,17 +322,14 @@  int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 
 	lookup_iova(mr, iova, &m, &i, &offset);
 
-	map = mr->map + m;
-	buf	= map[0]->buf + i;
-
 	while (length > 0) {
 		u8 *src, *dest;
 
-		va	= (u8 *)(uintptr_t)buf->addr + offset;
+		va = (u8 *)rxe_map_to_vaddr(mr, m, i, offset);
 		src = (dir == RXE_TO_MR_OBJ) ? addr : va;
 		dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
 
-		bytes	= buf->size - offset;
+		bytes = mr->ibmr.page_size - offset;
 
 		if (bytes > length)
 			bytes = length;
@@ -346,14 +340,14 @@  int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 		addr	+= bytes;
 
 		offset	= 0;
-		buf++;
 		i++;
 
 		if (i == RXE_BUF_PER_MAP) {
 			i = 0;
-			map++;
-			buf = map[0]->buf;
+			m++;
 		}
+
+		rxe_unmap_vaddr(mr, va);
 	}
 
 	return 0;
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index c32bc12cc82f..31f9ba11a921 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -652,6 +652,7 @@  static enum resp_states atomic_reply(struct rxe_qp *qp,
 
 	ret = RESPST_ACKNOWLEDGE;
 out:
+	rxe_unmap_vaddr(mr, vaddr);
 	return ret;
 }
 
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index bcdfdadaebbc..13e4d660cb02 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -948,16 +948,12 @@  static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
 {
 	struct rxe_mr *mr = to_rmr(ibmr);
 	struct rxe_map *map;
-	struct rxe_phys_buf *buf;
 
 	if (unlikely(mr->nbuf == mr->num_buf))
 		return -ENOMEM;
 
 	map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];
-	buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];
-
-	buf->addr = addr;
-	buf->size = ibmr->page_size;
+	map->addrs[mr->nbuf % RXE_BUF_PER_MAP] = addr;
 	mr->nbuf++;
 
 	return 0;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 22a299b0a9f0..d136f02d5b56 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -277,15 +277,10 @@  enum rxe_mr_lookup_type {
 	RXE_LOOKUP_REMOTE,
 };
 
-#define RXE_BUF_PER_MAP		(PAGE_SIZE / sizeof(struct rxe_phys_buf))
-
-struct rxe_phys_buf {
-	u64      addr;
-	u64      size;
-};
+#define RXE_BUF_PER_MAP		(PAGE_SIZE / sizeof(u64))
 
 struct rxe_map {
-	struct rxe_phys_buf	buf[RXE_BUF_PER_MAP];
+	u64 addrs[RXE_BUF_PER_MAP];
 };
 
 static inline int rkey_is_mw(u32 rkey)