Message ID | 1668136765-34-1-git-send-email-yangx.jy@fujitsu.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | RDMA/rxe: Replace page_address() with kmap_local_page() | expand |
On 2022/11/11 11:19, Xiao Yang wrote: > 1) Use kmap_local_page() for new in-kernel memory protection schemes. > 2) Do some cleanup(e.g. remove struct rxe_phys_buf). As the commit log above, it seems better to spilt it to two patches: Patch 1: Do some cleanup Patch 2: Use kmap_local_page() Alas, does not pin_user_pages_fast() in ib_umem_get() ensure the user memory is accessible in the kernel space, which means we can use page_address() safely? > > Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com> > --- > drivers/infiniband/sw/rxe/rxe_loc.h | 2 + > drivers/infiniband/sw/rxe/rxe_mr.c | 84 +++++++++++++-------------- > drivers/infiniband/sw/rxe/rxe_resp.c | 1 + > drivers/infiniband/sw/rxe/rxe_verbs.c | 6 +- > drivers/infiniband/sw/rxe/rxe_verbs.h | 9 +-- > 5 files changed, 45 insertions(+), 57 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h > index c2a5c8814a48..a63d29156a66 100644 > --- a/drivers/infiniband/sw/rxe/rxe_loc.h > +++ b/drivers/infiniband/sw/rxe/rxe_loc.h > @@ -68,6 +68,8 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr); > int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, > int access, struct rxe_mr *mr); > int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr); > +void *rxe_map_to_vaddr(struct rxe_mr *mr, int map_index, int addr_index, size_t offset); > +void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr); > int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, > enum rxe_mr_copy_dir dir); > int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma, > diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c > index bc081002bddc..4246b7f34a29 100644 > --- a/drivers/infiniband/sw/rxe/rxe_mr.c > +++ b/drivers/infiniband/sw/rxe/rxe_mr.c > @@ -115,13 +115,10 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, > int access, struct rxe_mr *mr) > { > struct rxe_map **map; > - struct rxe_phys_buf *buf = NULL; > struct ib_umem *umem; > struct sg_page_iter sg_iter; > int num_buf; > - void *vaddr; > int err; > - int i; > > umem = ib_umem_get(&rxe->ib_dev, start, length, access); > if (IS_ERR(umem)) { > @@ -144,32 +141,19 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, > > mr->page_shift = PAGE_SHIFT; > mr->page_mask = PAGE_SIZE - 1; > + mr->ibmr.page_size = PAGE_SIZE; > > - num_buf = 0; > + num_buf = 0; > map = mr->map; > if (length > 0) { > - buf = map[0]->buf; > - > for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) { > if (num_buf >= RXE_BUF_PER_MAP) { > map++; > - buf = map[0]->buf; > num_buf = 0; > } > > - vaddr = page_address(sg_page_iter_page(&sg_iter)); > - if (!vaddr) { > - pr_warn("%s: Unable to get virtual address\n", > - __func__); > - err = -ENOMEM; > - goto err_cleanup_map; > - } > - > - buf->addr = (uintptr_t)vaddr; > - buf->size = PAGE_SIZE; > + map[0]->addrs[num_buf] = (uintptr_t)sg_page_iter_page(&sg_iter); > num_buf++; > - buf++; > - > } > } > > @@ -181,10 +165,6 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, > > return 0; > > -err_cleanup_map: > - for (i = 0; i < mr->num_map; i++) > - kfree(mr->map[i]); > - kfree(mr->map); > err_release_umem: > ib_umem_release(umem); > err_out: > @@ -216,9 +196,9 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, > size_t *offset_out) > { > size_t offset = iova - mr->ibmr.iova + mr->offset; > + u64 length = mr->ibmr.page_size; > int map_index; > - int buf_index; > - u64 length; > + int addr_index; > > if (likely(mr->page_shift)) { > *offset_out = offset & mr->page_mask; > @@ -227,27 +207,46 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, > *m_out = offset >> mr->map_shift; > } else { > map_index = 0; > - buf_index = 0; > - > - length = mr->map[map_index]->buf[buf_index].size; > + addr_index = 0; > > while (offset >= length) { > offset -= length; > - buf_index++; > + addr_index++; > > - if (buf_index == RXE_BUF_PER_MAP) { > + if (addr_index == RXE_BUF_PER_MAP) { > map_index++; > - buf_index = 0; > + addr_index = 0; > } > - length = mr->map[map_index]->buf[buf_index].size; > } > > *m_out = map_index; > - *n_out = buf_index; > + *n_out = addr_index; > *offset_out = offset; > } > } > > +void *rxe_map_to_vaddr(struct rxe_mr *mr, int map_index, int addr_index, size_t offset) > +{ > + void *vaddr = NULL; > + > + if (mr->ibmr.type == IB_MR_TYPE_USER) { > + vaddr = kmap_local_page((struct page *)mr->map[map_index]->addrs[addr_index]); > + if (vaddr == NULL) { > + pr_warn("Failed to map page"); > + return NULL; > + } > + } else > + vaddr = (void *)(uintptr_t)mr->map[map_index]->addrs[addr_index]; > + > + return vaddr + offset; > +} > + > +void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr) > +{ > + if (mr->ibmr.type == IB_MR_TYPE_USER) > + kunmap_local(vaddr); > +} > + > void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) > { > size_t offset; > @@ -273,13 +272,13 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) > > lookup_iova(mr, iova, &m, &n, &offset); > > - if (offset + length > mr->map[m]->buf[n].size) { > + if (offset + length > mr->ibmr.page_size) { > pr_warn("crosses page boundary\n"); > addr = NULL; > goto out; > } > > - addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset; > + addr = rxe_map_to_vaddr(mr, m, n, offset); > > out: > return addr; > @@ -294,8 +293,6 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, > int err; > int bytes; > u8 *va; > - struct rxe_map **map; > - struct rxe_phys_buf *buf; > int m; > int i; > size_t offset; > @@ -325,17 +322,14 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, > > lookup_iova(mr, iova, &m, &i, &offset); > > - map = mr->map + m; > - buf = map[0]->buf + i; > - > while (length > 0) { > u8 *src, *dest; > > - va = (u8 *)(uintptr_t)buf->addr + offset; > + va = (u8 *)rxe_map_to_vaddr(mr, m, i, offset); > src = (dir == RXE_TO_MR_OBJ) ? addr : va; > dest = (dir == RXE_TO_MR_OBJ) ? va : addr; > > - bytes = buf->size - offset; > + bytes = mr->ibmr.page_size - offset; > > if (bytes > length) > bytes = length; > @@ -346,14 +340,14 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, > addr += bytes; > > offset = 0; > - buf++; > i++; > > if (i == RXE_BUF_PER_MAP) { > i = 0; > - map++; > - buf = map[0]->buf; > + m++; > } > + > + rxe_unmap_vaddr(mr, va); > } > > return 0; > diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c > index c32bc12cc82f..31f9ba11a921 100644 > --- a/drivers/infiniband/sw/rxe/rxe_resp.c > +++ b/drivers/infiniband/sw/rxe/rxe_resp.c > @@ -652,6 +652,7 @@ static enum resp_states atomic_reply(struct rxe_qp *qp, > > ret = RESPST_ACKNOWLEDGE; > out: > + rxe_unmap_vaddr(mr, vaddr); > return ret; > } > > diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c > index bcdfdadaebbc..13e4d660cb02 100644 > --- a/drivers/infiniband/sw/rxe/rxe_verbs.c > +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c > @@ -948,16 +948,12 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 addr) > { > struct rxe_mr *mr = to_rmr(ibmr); > struct rxe_map *map; > - struct rxe_phys_buf *buf; > > if (unlikely(mr->nbuf == mr->num_buf)) > return -ENOMEM; > > map = mr->map[mr->nbuf / RXE_BUF_PER_MAP]; > - buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP]; > - > - buf->addr = addr; > - buf->size = ibmr->page_size; > + map->addrs[mr->nbuf % RXE_BUF_PER_MAP] = addr; > mr->nbuf++; > > return 0; > diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h > index 22a299b0a9f0..d136f02d5b56 100644 > --- a/drivers/infiniband/sw/rxe/rxe_verbs.h > +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h > @@ -277,15 +277,10 @@ enum rxe_mr_lookup_type { > RXE_LOOKUP_REMOTE, > }; > > -#define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(struct rxe_phys_buf)) > - > -struct rxe_phys_buf { > - u64 addr; > - u64 size; > -}; > +#define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(u64)) > > struct rxe_map { > - struct rxe_phys_buf buf[RXE_BUF_PER_MAP]; > + u64 addrs[RXE_BUF_PER_MAP]; > }; > > static inline int rkey_is_mw(u32 rkey) >
Xiao, What a coincidence!! i aslo had a similar patches do the same thing. which would be more easy to be reviewed, i will post it as well. You are free to use any of this or ignore it. Thanks Zhijian On 11/11/2022 11:19, Xiao Yang wrote: > 1) Use kmap_local_page() for new in-kernel memory protection schemes. > 2) Do some cleanup(e.g. remove struct rxe_phys_buf). > > Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com> > --- > drivers/infiniband/sw/rxe/rxe_loc.h | 2 + > drivers/infiniband/sw/rxe/rxe_mr.c | 84 +++++++++++++-------------- > drivers/infiniband/sw/rxe/rxe_resp.c | 1 + > drivers/infiniband/sw/rxe/rxe_verbs.c | 6 +- > drivers/infiniband/sw/rxe/rxe_verbs.h | 9 +-- > 5 files changed, 45 insertions(+), 57 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h > index c2a5c8814a48..a63d29156a66 100644 > --- a/drivers/infiniband/sw/rxe/rxe_loc.h > +++ b/drivers/infiniband/sw/rxe/rxe_loc.h > @@ -68,6 +68,8 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr); > int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, > int access, struct rxe_mr *mr); > int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr); > +void *rxe_map_to_vaddr(struct rxe_mr *mr, int map_index, int addr_index, size_t offset); > +void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr); > int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, > enum rxe_mr_copy_dir dir); > int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma, > diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c > index bc081002bddc..4246b7f34a29 100644 > --- a/drivers/infiniband/sw/rxe/rxe_mr.c > +++ b/drivers/infiniband/sw/rxe/rxe_mr.c > @@ -115,13 +115,10 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, > int access, struct rxe_mr *mr) > { > struct rxe_map **map; > - struct rxe_phys_buf *buf = NULL; > struct ib_umem *umem; > struct sg_page_iter sg_iter; > int num_buf; > - void *vaddr; > int err; > - int i; > > umem = ib_umem_get(&rxe->ib_dev, start, length, access); > if (IS_ERR(umem)) { > @@ -144,32 +141,19 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, > > mr->page_shift = PAGE_SHIFT; > mr->page_mask = PAGE_SIZE - 1; > + mr->ibmr.page_size = PAGE_SIZE; > > - num_buf = 0; > + num_buf = 0; > map = mr->map; > if (length > 0) { > - buf = map[0]->buf; > - > for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) { > if (num_buf >= RXE_BUF_PER_MAP) { > map++; > - buf = map[0]->buf; > num_buf = 0; > } > > - vaddr = page_address(sg_page_iter_page(&sg_iter)); > - if (!vaddr) { > - pr_warn("%s: Unable to get virtual address\n", > - __func__); > - err = -ENOMEM; > - goto err_cleanup_map; > - } > - > - buf->addr = (uintptr_t)vaddr; > - buf->size = PAGE_SIZE; > + map[0]->addrs[num_buf] = (uintptr_t)sg_page_iter_page(&sg_iter); > num_buf++; > - buf++; > - > } > } > > @@ -181,10 +165,6 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, > > return 0; > > -err_cleanup_map: > - for (i = 0; i < mr->num_map; i++) > - kfree(mr->map[i]); > - kfree(mr->map); > err_release_umem: > ib_umem_release(umem); > err_out: > @@ -216,9 +196,9 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, > size_t *offset_out) > { > size_t offset = iova - mr->ibmr.iova + mr->offset; > + u64 length = mr->ibmr.page_size; > int map_index; > - int buf_index; > - u64 length; > + int addr_index; > > if (likely(mr->page_shift)) { > *offset_out = offset & mr->page_mask; > @@ -227,27 +207,46 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, > *m_out = offset >> mr->map_shift; > } else { > map_index = 0; > - buf_index = 0; > - > - length = mr->map[map_index]->buf[buf_index].size; > + addr_index = 0; > > while (offset >= length) { > offset -= length; > - buf_index++; > + addr_index++; > > - if (buf_index == RXE_BUF_PER_MAP) { > + if (addr_index == RXE_BUF_PER_MAP) { > map_index++; > - buf_index = 0; > + addr_index = 0; > } > - length = mr->map[map_index]->buf[buf_index].size; > } > > *m_out = map_index; > - *n_out = buf_index; > + *n_out = addr_index; > *offset_out = offset; > } > } > > +void *rxe_map_to_vaddr(struct rxe_mr *mr, int map_index, int addr_index, size_t offset) > +{ > + void *vaddr = NULL; > + > + if (mr->ibmr.type == IB_MR_TYPE_USER) { > + vaddr = kmap_local_page((struct page *)mr->map[map_index]->addrs[addr_index]); > + if (vaddr == NULL) { > + pr_warn("Failed to map page"); > + return NULL; > + } > + } else > + vaddr = (void *)(uintptr_t)mr->map[map_index]->addrs[addr_index]; > + > + return vaddr + offset; > +} > + > +void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr) > +{ > + if (mr->ibmr.type == IB_MR_TYPE_USER) > + kunmap_local(vaddr); > +} > + > void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) > { > size_t offset; > @@ -273,13 +272,13 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) > > lookup_iova(mr, iova, &m, &n, &offset); > > - if (offset + length > mr->map[m]->buf[n].size) { > + if (offset + length > mr->ibmr.page_size) { > pr_warn("crosses page boundary\n"); > addr = NULL; > goto out; > } > > - addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset; > + addr = rxe_map_to_vaddr(mr, m, n, offset); > > out: > return addr; > @@ -294,8 +293,6 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, > int err; > int bytes; > u8 *va; > - struct rxe_map **map; > - struct rxe_phys_buf *buf; > int m; > int i; > size_t offset; > @@ -325,17 +322,14 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, > > lookup_iova(mr, iova, &m, &i, &offset); > > - map = mr->map + m; > - buf = map[0]->buf + i; > - > while (length > 0) { > u8 *src, *dest; > > - va = (u8 *)(uintptr_t)buf->addr + offset; > + va = (u8 *)rxe_map_to_vaddr(mr, m, i, offset); > src = (dir == RXE_TO_MR_OBJ) ? addr : va; > dest = (dir == RXE_TO_MR_OBJ) ? va : addr; > > - bytes = buf->size - offset; > + bytes = mr->ibmr.page_size - offset; > > if (bytes > length) > bytes = length; > @@ -346,14 +340,14 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, > addr += bytes; > > offset = 0; > - buf++; > i++; > > if (i == RXE_BUF_PER_MAP) { > i = 0; > - map++; > - buf = map[0]->buf; > + m++; > } > + > + rxe_unmap_vaddr(mr, va); > } > > return 0; > diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c > index c32bc12cc82f..31f9ba11a921 100644 > --- a/drivers/infiniband/sw/rxe/rxe_resp.c > +++ b/drivers/infiniband/sw/rxe/rxe_resp.c > @@ -652,6 +652,7 @@ static enum resp_states atomic_reply(struct rxe_qp *qp, > > ret = RESPST_ACKNOWLEDGE; > out: > + rxe_unmap_vaddr(mr, vaddr); > return ret; > } > > diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c > index bcdfdadaebbc..13e4d660cb02 100644 > --- a/drivers/infiniband/sw/rxe/rxe_verbs.c > +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c > @@ -948,16 +948,12 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 addr) > { > struct rxe_mr *mr = to_rmr(ibmr); > struct rxe_map *map; > - struct rxe_phys_buf *buf; > > if (unlikely(mr->nbuf == mr->num_buf)) > return -ENOMEM; > > map = mr->map[mr->nbuf / RXE_BUF_PER_MAP]; > - buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP]; > - > - buf->addr = addr; > - buf->size = ibmr->page_size; > + map->addrs[mr->nbuf % RXE_BUF_PER_MAP] = addr; > mr->nbuf++; > > return 0; > diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h > index 22a299b0a9f0..d136f02d5b56 100644 > --- a/drivers/infiniband/sw/rxe/rxe_verbs.h > +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h > @@ -277,15 +277,10 @@ enum rxe_mr_lookup_type { > RXE_LOOKUP_REMOTE, > }; > > -#define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(struct rxe_phys_buf)) > - > -struct rxe_phys_buf { > - u64 addr; > - u64 size; > -}; > +#define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(u64)) > > struct rxe_map { > - struct rxe_phys_buf buf[RXE_BUF_PER_MAP]; > + u64 addrs[RXE_BUF_PER_MAP]; > }; > > static inline int rkey_is_mw(u32 rkey)
On 2022/11/11 11:34, Yunsheng Lin wrote: > On 2022/11/11 11:19, Xiao Yang wrote: >> 1) Use kmap_local_page() for new in-kernel memory protection schemes. >> 2) Do some cleanup(e.g. remove struct rxe_phys_buf). > > As the commit log above, it seems better to spilt it to two patches: > Patch 1: Do some cleanup > Patch 2: Use kmap_local_page() Hi Lin, It's fine for me to split the patchset. I will send v2 soon. > > > Alas, does not pin_user_pages_fast() in ib_umem_get() ensure the > user memory is accessible in the kernel space, which means we > can use page_address() safely? Now it's safe to call page_address() here but it will be broken when new in-kernel memory protection schemes(e.g. PKS[1]) are applied in the future. [1]: https://lore.kernel.org/lkml/20220419170649.1022246-1-ira.weiny@intel.com/ Jason suggested that we should replace page_address() with kmap_local_page(), please see the detailed discussion[2][3] [2]: https://lore.kernel.org/linux-rdma/20220121160654.GC773547@iweiny-DESK2.sc.intel.com/ [3]: https://lore.kernel.org/linux-rdma/Y1bFG%2FOM5zSOoWcr@nvidia.com/ Best Regards, Xiao Yang > > >> >> Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com> >> --- >> drivers/infiniband/sw/rxe/rxe_loc.h | 2 + >> drivers/infiniband/sw/rxe/rxe_mr.c | 84 +++++++++++++-------------- >> drivers/infiniband/sw/rxe/rxe_resp.c | 1 + >> drivers/infiniband/sw/rxe/rxe_verbs.c | 6 +- >> drivers/infiniband/sw/rxe/rxe_verbs.h | 9 +-- >> 5 files changed, 45 insertions(+), 57 deletions(-) >> >> diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h >> index c2a5c8814a48..a63d29156a66 100644 >> --- a/drivers/infiniband/sw/rxe/rxe_loc.h >> +++ b/drivers/infiniband/sw/rxe/rxe_loc.h >> @@ -68,6 +68,8 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr); >> int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, >> int access, struct rxe_mr *mr); >> int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr); >> +void *rxe_map_to_vaddr(struct rxe_mr *mr, int map_index, int addr_index, size_t offset); >> +void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr); >> int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, >> enum rxe_mr_copy_dir dir); >> int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma, >> diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c >> index bc081002bddc..4246b7f34a29 100644 >> --- a/drivers/infiniband/sw/rxe/rxe_mr.c >> +++ b/drivers/infiniband/sw/rxe/rxe_mr.c >> @@ -115,13 +115,10 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, >> int access, struct rxe_mr *mr) >> { >> struct rxe_map **map; >> - struct rxe_phys_buf *buf = NULL; >> struct ib_umem *umem; >> struct sg_page_iter sg_iter; >> int num_buf; >> - void *vaddr; >> int err; >> - int i; >> >> umem = ib_umem_get(&rxe->ib_dev, start, length, access); >> if (IS_ERR(umem)) { >> @@ -144,32 +141,19 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, >> >> mr->page_shift = PAGE_SHIFT; >> mr->page_mask = PAGE_SIZE - 1; >> + mr->ibmr.page_size = PAGE_SIZE; >> >> - num_buf = 0; >> + num_buf = 0; >> map = mr->map; >> if (length > 0) { >> - buf = map[0]->buf; >> - >> for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) { >> if (num_buf >= RXE_BUF_PER_MAP) { >> map++; >> - buf = map[0]->buf; >> num_buf = 0; >> } >> >> - vaddr = page_address(sg_page_iter_page(&sg_iter)); >> - if (!vaddr) { >> - pr_warn("%s: Unable to get virtual address\n", >> - __func__); >> - err = -ENOMEM; >> - goto err_cleanup_map; >> - } >> - >> - buf->addr = (uintptr_t)vaddr; >> - buf->size = PAGE_SIZE; >> + map[0]->addrs[num_buf] = (uintptr_t)sg_page_iter_page(&sg_iter); >> num_buf++; >> - buf++; >> - >> } >> } >> >> @@ -181,10 +165,6 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, >> >> return 0; >> >> -err_cleanup_map: >> - for (i = 0; i < mr->num_map; i++) >> - kfree(mr->map[i]); >> - kfree(mr->map); >> err_release_umem: >> ib_umem_release(umem); >> err_out: >> @@ -216,9 +196,9 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, >> size_t *offset_out) >> { >> size_t offset = iova - mr->ibmr.iova + mr->offset; >> + u64 length = mr->ibmr.page_size; >> int map_index; >> - int buf_index; >> - u64 length; >> + int addr_index; >> >> if (likely(mr->page_shift)) { >> *offset_out = offset & mr->page_mask; >> @@ -227,27 +207,46 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, >> *m_out = offset >> mr->map_shift; >> } else { >> map_index = 0; >> - buf_index = 0; >> - >> - length = mr->map[map_index]->buf[buf_index].size; >> + addr_index = 0; >> >> while (offset >= length) { >> offset -= length; >> - buf_index++; >> + addr_index++; >> >> - if (buf_index == RXE_BUF_PER_MAP) { >> + if (addr_index == RXE_BUF_PER_MAP) { >> map_index++; >> - buf_index = 0; >> + addr_index = 0; >> } >> - length = mr->map[map_index]->buf[buf_index].size; >> } >> >> *m_out = map_index; >> - *n_out = buf_index; >> + *n_out = addr_index; >> *offset_out = offset; >> } >> } >> >> +void *rxe_map_to_vaddr(struct rxe_mr *mr, int map_index, int addr_index, size_t offset) >> +{ >> + void *vaddr = NULL; >> + >> + if (mr->ibmr.type == IB_MR_TYPE_USER) { >> + vaddr = kmap_local_page((struct page *)mr->map[map_index]->addrs[addr_index]); >> + if (vaddr == NULL) { >> + pr_warn("Failed to map page"); >> + return NULL; >> + } >> + } else >> + vaddr = (void *)(uintptr_t)mr->map[map_index]->addrs[addr_index]; >> + >> + return vaddr + offset; >> +} >> + >> +void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr) >> +{ >> + if (mr->ibmr.type == IB_MR_TYPE_USER) >> + kunmap_local(vaddr); >> +} >> + >> void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) >> { >> size_t offset; >> @@ -273,13 +272,13 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) >> >> lookup_iova(mr, iova, &m, &n, &offset); >> >> - if (offset + length > mr->map[m]->buf[n].size) { >> + if (offset + length > mr->ibmr.page_size) { >> pr_warn("crosses page boundary\n"); >> addr = NULL; >> goto out; >> } >> >> - addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset; >> + addr = rxe_map_to_vaddr(mr, m, n, offset); >> >> out: >> return addr; >> @@ -294,8 +293,6 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, >> int err; >> int bytes; >> u8 *va; >> - struct rxe_map **map; >> - struct rxe_phys_buf *buf; >> int m; >> int i; >> size_t offset; >> @@ -325,17 +322,14 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, >> >> lookup_iova(mr, iova, &m, &i, &offset); >> >> - map = mr->map + m; >> - buf = map[0]->buf + i; >> - >> while (length > 0) { >> u8 *src, *dest; >> >> - va = (u8 *)(uintptr_t)buf->addr + offset; >> + va = (u8 *)rxe_map_to_vaddr(mr, m, i, offset); >> src = (dir == RXE_TO_MR_OBJ) ? addr : va; >> dest = (dir == RXE_TO_MR_OBJ) ? va : addr; >> >> - bytes = buf->size - offset; >> + bytes = mr->ibmr.page_size - offset; >> >> if (bytes > length) >> bytes = length; >> @@ -346,14 +340,14 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, >> addr += bytes; >> >> offset = 0; >> - buf++; >> i++; >> >> if (i == RXE_BUF_PER_MAP) { >> i = 0; >> - map++; >> - buf = map[0]->buf; >> + m++; >> } >> + >> + rxe_unmap_vaddr(mr, va); >> } >> >> return 0; >> diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c >> index c32bc12cc82f..31f9ba11a921 100644 >> --- a/drivers/infiniband/sw/rxe/rxe_resp.c >> +++ b/drivers/infiniband/sw/rxe/rxe_resp.c >> @@ -652,6 +652,7 @@ static enum resp_states atomic_reply(struct rxe_qp *qp, >> >> ret = RESPST_ACKNOWLEDGE; >> out: >> + rxe_unmap_vaddr(mr, vaddr); >> return ret; >> } >> >> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c >> index bcdfdadaebbc..13e4d660cb02 100644 >> --- a/drivers/infiniband/sw/rxe/rxe_verbs.c >> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c >> @@ -948,16 +948,12 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 addr) >> { >> struct rxe_mr *mr = to_rmr(ibmr); >> struct rxe_map *map; >> - struct rxe_phys_buf *buf; >> >> if (unlikely(mr->nbuf == mr->num_buf)) >> return -ENOMEM; >> >> map = mr->map[mr->nbuf / RXE_BUF_PER_MAP]; >> - buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP]; >> - >> - buf->addr = addr; >> - buf->size = ibmr->page_size; >> + map->addrs[mr->nbuf % RXE_BUF_PER_MAP] = addr; >> mr->nbuf++; >> >> return 0; >> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h >> index 22a299b0a9f0..d136f02d5b56 100644 >> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h >> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h >> @@ -277,15 +277,10 @@ enum rxe_mr_lookup_type { >> RXE_LOOKUP_REMOTE, >> }; >> >> -#define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(struct rxe_phys_buf)) >> - >> -struct rxe_phys_buf { >> - u64 addr; >> - u64 size; >> -}; >> +#define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(u64)) >> >> struct rxe_map { >> - struct rxe_phys_buf buf[RXE_BUF_PER_MAP]; >> + u64 addrs[RXE_BUF_PER_MAP]; >> }; >> >> static inline int rkey_is_mw(u32 rkey) >>
On 2022/11/11 11:52, Li, Zhijian/ζ ζΊε wrote: > Xiao, > > What a coincidence!! i aslo had a similar patches do the same thing. > which would be more easy to be reviewed, i will post it as well. You are > free to use any of this or ignore it. Hi Zhijian, Thanks a lot for your reference. Li Zhijian (5): RDMA/rxe: Remove rxe_phys_buf.size -> It has been included in my first patch[1]. RDMA/rxe: use iova_to_vaddr to transform iova for rxe_mr_copy -> I don't want to do the check (e.g. mr_check_range()) in loops. RDMA/rxe: iova_to_vaddr cleanup RDMA/rxe: refactor iova_to_vaddr RDMA/rxe: Rename iova_to_vaddr to rxe_map_iova -> I think these three are similar to my second patch[2]. [1]: https://lore.kernel.org/linux-rdma/1668153085-15-1-git-send-email-yangx.jy@fujitsu.com/T/#t [2]: https://lore.kernel.org/linux-rdma/1668153085-15-2-git-send-email-yangx.jy@fujitsu.com/T/#u Best Regards, Xiao Yang > > > Thanks > Zhijian > > > > > On 11/11/2022 11:19, Xiao Yang wrote: >> 1) Use kmap_local_page() for new in-kernel memory protection schemes. >> 2) Do some cleanup(e.g. remove struct rxe_phys_buf). >> >> Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com> >> --- >> drivers/infiniband/sw/rxe/rxe_loc.h | 2 + >> drivers/infiniband/sw/rxe/rxe_mr.c | 84 +++++++++++++-------------- >> drivers/infiniband/sw/rxe/rxe_resp.c | 1 + >> drivers/infiniband/sw/rxe/rxe_verbs.c | 6 +- >> drivers/infiniband/sw/rxe/rxe_verbs.h | 9 +-- >> 5 files changed, 45 insertions(+), 57 deletions(-) >> >> diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h >> index c2a5c8814a48..a63d29156a66 100644 >> --- a/drivers/infiniband/sw/rxe/rxe_loc.h >> +++ b/drivers/infiniband/sw/rxe/rxe_loc.h >> @@ -68,6 +68,8 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr); >> int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, >> int access, struct rxe_mr *mr); >> int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr); >> +void *rxe_map_to_vaddr(struct rxe_mr *mr, int map_index, int addr_index, size_t offset); >> +void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr); >> int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, >> enum rxe_mr_copy_dir dir); >> int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma, >> diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c >> index bc081002bddc..4246b7f34a29 100644 >> --- a/drivers/infiniband/sw/rxe/rxe_mr.c >> +++ b/drivers/infiniband/sw/rxe/rxe_mr.c >> @@ -115,13 +115,10 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, >> int access, struct rxe_mr *mr) >> { >> struct rxe_map **map; >> - struct rxe_phys_buf *buf = NULL; >> struct ib_umem *umem; >> struct sg_page_iter sg_iter; >> int num_buf; >> - void *vaddr; >> int err; >> - int i; >> >> umem = ib_umem_get(&rxe->ib_dev, start, length, access); >> if (IS_ERR(umem)) { >> @@ -144,32 +141,19 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, >> >> mr->page_shift = PAGE_SHIFT; >> mr->page_mask = PAGE_SIZE - 1; >> + mr->ibmr.page_size = PAGE_SIZE; >> >> - num_buf = 0; >> + num_buf = 0; >> map = mr->map; >> if (length > 0) { >> - buf = map[0]->buf; >> - >> for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) { >> if (num_buf >= RXE_BUF_PER_MAP) { >> map++; >> - buf = map[0]->buf; >> num_buf = 0; >> } >> >> - vaddr = page_address(sg_page_iter_page(&sg_iter)); >> - if (!vaddr) { >> - pr_warn("%s: Unable to get virtual address\n", >> - __func__); >> - err = -ENOMEM; >> - goto err_cleanup_map; >> - } >> - >> - buf->addr = (uintptr_t)vaddr; >> - buf->size = PAGE_SIZE; >> + map[0]->addrs[num_buf] = (uintptr_t)sg_page_iter_page(&sg_iter); >> num_buf++; >> - buf++; >> - >> } >> } >> >> @@ -181,10 +165,6 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, >> >> return 0; >> >> -err_cleanup_map: >> - for (i = 0; i < mr->num_map; i++) >> - kfree(mr->map[i]); >> - kfree(mr->map); >> err_release_umem: >> ib_umem_release(umem); >> err_out: >> @@ -216,9 +196,9 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, >> size_t *offset_out) >> { >> size_t offset = iova - mr->ibmr.iova + mr->offset; >> + u64 length = mr->ibmr.page_size; >> int map_index; >> - int buf_index; >> - u64 length; >> + int addr_index; >> >> if (likely(mr->page_shift)) { >> *offset_out = offset & mr->page_mask; >> @@ -227,27 +207,46 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, >> *m_out = offset >> mr->map_shift; >> } else { >> map_index = 0; >> - buf_index = 0; >> - >> - length = mr->map[map_index]->buf[buf_index].size; >> + addr_index = 0; >> >> while (offset >= length) { >> offset -= length; >> - buf_index++; >> + addr_index++; >> >> - if (buf_index == RXE_BUF_PER_MAP) { >> + if (addr_index == RXE_BUF_PER_MAP) { >> map_index++; >> - buf_index = 0; >> + addr_index = 0; >> } >> - length = mr->map[map_index]->buf[buf_index].size; >> } >> >> *m_out = map_index; >> - *n_out = buf_index; >> + *n_out = addr_index; >> *offset_out = offset; >> } >> } >> >> +void *rxe_map_to_vaddr(struct rxe_mr *mr, int map_index, int addr_index, size_t offset) >> +{ >> + void *vaddr = NULL; >> + >> + if (mr->ibmr.type == IB_MR_TYPE_USER) { >> + vaddr = kmap_local_page((struct page *)mr->map[map_index]->addrs[addr_index]); >> + if (vaddr == NULL) { >> + pr_warn("Failed to map page"); >> + return NULL; >> + } >> + } else >> + vaddr = (void *)(uintptr_t)mr->map[map_index]->addrs[addr_index]; >> + >> + return vaddr + offset; >> +} >> + >> +void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr) >> +{ >> + if (mr->ibmr.type == IB_MR_TYPE_USER) >> + kunmap_local(vaddr); >> +} >> + >> void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) >> { >> size_t offset; >> @@ -273,13 +272,13 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) >> >> lookup_iova(mr, iova, &m, &n, &offset); >> >> - if (offset + length > mr->map[m]->buf[n].size) { >> + if (offset + length > mr->ibmr.page_size) { >> pr_warn("crosses page boundary\n"); >> addr = NULL; >> goto out; >> } >> >> - addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset; >> + addr = rxe_map_to_vaddr(mr, m, n, offset); >> >> out: >> return addr; >> @@ -294,8 +293,6 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, >> int err; >> int bytes; >> u8 *va; >> - struct rxe_map **map; >> - struct rxe_phys_buf *buf; >> int m; >> int i; >> size_t offset; >> @@ -325,17 +322,14 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, >> >> lookup_iova(mr, iova, &m, &i, &offset); >> >> - map = mr->map + m; >> - buf = map[0]->buf + i; >> - >> while (length > 0) { >> u8 *src, *dest; >> >> - va = (u8 *)(uintptr_t)buf->addr + offset; >> + va = (u8 *)rxe_map_to_vaddr(mr, m, i, offset); >> src = (dir == RXE_TO_MR_OBJ) ? addr : va; >> dest = (dir == RXE_TO_MR_OBJ) ? va : addr; >> >> - bytes = buf->size - offset; >> + bytes = mr->ibmr.page_size - offset; >> >> if (bytes > length) >> bytes = length; >> @@ -346,14 +340,14 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, >> addr += bytes; >> >> offset = 0; >> - buf++; >> i++; >> >> if (i == RXE_BUF_PER_MAP) { >> i = 0; >> - map++; >> - buf = map[0]->buf; >> + m++; >> } >> + >> + rxe_unmap_vaddr(mr, va); >> } >> >> return 0; >> diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c >> index c32bc12cc82f..31f9ba11a921 100644 >> --- a/drivers/infiniband/sw/rxe/rxe_resp.c >> +++ b/drivers/infiniband/sw/rxe/rxe_resp.c >> @@ -652,6 +652,7 @@ static enum resp_states atomic_reply(struct rxe_qp *qp, >> >> ret = RESPST_ACKNOWLEDGE; >> out: >> + rxe_unmap_vaddr(mr, vaddr); >> return ret; >> } >> >> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c >> index bcdfdadaebbc..13e4d660cb02 100644 >> --- a/drivers/infiniband/sw/rxe/rxe_verbs.c >> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c >> @@ -948,16 +948,12 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 addr) >> { >> struct rxe_mr *mr = to_rmr(ibmr); >> struct rxe_map *map; >> - struct rxe_phys_buf *buf; >> >> if (unlikely(mr->nbuf == mr->num_buf)) >> return -ENOMEM; >> >> map = mr->map[mr->nbuf / RXE_BUF_PER_MAP]; >> - buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP]; >> - >> - buf->addr = addr; >> - buf->size = ibmr->page_size; >> + map->addrs[mr->nbuf % RXE_BUF_PER_MAP] = addr; >> mr->nbuf++; >> >> return 0; >> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h >> index 22a299b0a9f0..d136f02d5b56 100644 >> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h >> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h >> @@ -277,15 +277,10 @@ enum rxe_mr_lookup_type { >> RXE_LOOKUP_REMOTE, >> }; >> >> -#define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(struct rxe_phys_buf)) >> - >> -struct rxe_phys_buf { >> - u64 addr; >> - u64 size; >> -}; >> +#define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(u64)) >> >> struct rxe_map { >> - struct rxe_phys_buf buf[RXE_BUF_PER_MAP]; >> + u64 addrs[RXE_BUF_PER_MAP]; >> }; >> >> static inline int rkey_is_mw(u32 rkey)
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index c2a5c8814a48..a63d29156a66 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -68,6 +68,8 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr); int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, int access, struct rxe_mr *mr); int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr); +void *rxe_map_to_vaddr(struct rxe_mr *mr, int map_index, int addr_index, size_t offset); +void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr); int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, enum rxe_mr_copy_dir dir); int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma, diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index bc081002bddc..4246b7f34a29 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -115,13 +115,10 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, int access, struct rxe_mr *mr) { struct rxe_map **map; - struct rxe_phys_buf *buf = NULL; struct ib_umem *umem; struct sg_page_iter sg_iter; int num_buf; - void *vaddr; int err; - int i; umem = ib_umem_get(&rxe->ib_dev, start, length, access); if (IS_ERR(umem)) { @@ -144,32 +141,19 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, mr->page_shift = PAGE_SHIFT; mr->page_mask = PAGE_SIZE - 1; + mr->ibmr.page_size = PAGE_SIZE; - num_buf = 0; + num_buf = 0; map = mr->map; if (length > 0) { - buf = map[0]->buf; - for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) { if (num_buf >= RXE_BUF_PER_MAP) { map++; - buf = map[0]->buf; num_buf = 0; } - vaddr = page_address(sg_page_iter_page(&sg_iter)); - if (!vaddr) { - pr_warn("%s: Unable to get virtual address\n", - __func__); - err = -ENOMEM; - goto err_cleanup_map; - } - - buf->addr = (uintptr_t)vaddr; - buf->size = PAGE_SIZE; + map[0]->addrs[num_buf] = (uintptr_t)sg_page_iter_page(&sg_iter); num_buf++; - buf++; - } } @@ -181,10 +165,6 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, return 0; -err_cleanup_map: - for (i = 0; i < mr->num_map; i++) - kfree(mr->map[i]); - kfree(mr->map); err_release_umem: ib_umem_release(umem); err_out: @@ -216,9 +196,9 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, size_t *offset_out) { size_t offset = iova - mr->ibmr.iova + mr->offset; + u64 length = mr->ibmr.page_size; int map_index; - int buf_index; - u64 length; + int addr_index; if (likely(mr->page_shift)) { *offset_out = offset & mr->page_mask; @@ -227,27 +207,46 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, *m_out = offset >> mr->map_shift; } else { map_index = 0; - buf_index = 0; - - length = mr->map[map_index]->buf[buf_index].size; + addr_index = 0; while (offset >= length) { offset -= length; - buf_index++; + addr_index++; - if (buf_index == RXE_BUF_PER_MAP) { + if (addr_index == RXE_BUF_PER_MAP) { map_index++; - buf_index = 0; + addr_index = 0; } - length = mr->map[map_index]->buf[buf_index].size; } *m_out = map_index; - *n_out = buf_index; + *n_out = addr_index; *offset_out = offset; } } +void *rxe_map_to_vaddr(struct rxe_mr *mr, int map_index, int addr_index, size_t offset) +{ + void *vaddr = NULL; + + if (mr->ibmr.type == IB_MR_TYPE_USER) { + vaddr = kmap_local_page((struct page *)mr->map[map_index]->addrs[addr_index]); + if (vaddr == NULL) { + pr_warn("Failed to map page"); + return NULL; + } + } else + vaddr = (void *)(uintptr_t)mr->map[map_index]->addrs[addr_index]; + + return vaddr + offset; +} + +void rxe_unmap_vaddr(struct rxe_mr *mr, void *vaddr) +{ + if (mr->ibmr.type == IB_MR_TYPE_USER) + kunmap_local(vaddr); +} + void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) { size_t offset; @@ -273,13 +272,13 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) lookup_iova(mr, iova, &m, &n, &offset); - if (offset + length > mr->map[m]->buf[n].size) { + if (offset + length > mr->ibmr.page_size) { pr_warn("crosses page boundary\n"); addr = NULL; goto out; } - addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset; + addr = rxe_map_to_vaddr(mr, m, n, offset); out: return addr; @@ -294,8 +293,6 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, int err; int bytes; u8 *va; - struct rxe_map **map; - struct rxe_phys_buf *buf; int m; int i; size_t offset; @@ -325,17 +322,14 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, lookup_iova(mr, iova, &m, &i, &offset); - map = mr->map + m; - buf = map[0]->buf + i; - while (length > 0) { u8 *src, *dest; - va = (u8 *)(uintptr_t)buf->addr + offset; + va = (u8 *)rxe_map_to_vaddr(mr, m, i, offset); src = (dir == RXE_TO_MR_OBJ) ? addr : va; dest = (dir == RXE_TO_MR_OBJ) ? va : addr; - bytes = buf->size - offset; + bytes = mr->ibmr.page_size - offset; if (bytes > length) bytes = length; @@ -346,14 +340,14 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, addr += bytes; offset = 0; - buf++; i++; if (i == RXE_BUF_PER_MAP) { i = 0; - map++; - buf = map[0]->buf; + m++; } + + rxe_unmap_vaddr(mr, va); } return 0; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index c32bc12cc82f..31f9ba11a921 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -652,6 +652,7 @@ static enum resp_states atomic_reply(struct rxe_qp *qp, ret = RESPST_ACKNOWLEDGE; out: + rxe_unmap_vaddr(mr, vaddr); return ret; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index bcdfdadaebbc..13e4d660cb02 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -948,16 +948,12 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 addr) { struct rxe_mr *mr = to_rmr(ibmr); struct rxe_map *map; - struct rxe_phys_buf *buf; if (unlikely(mr->nbuf == mr->num_buf)) return -ENOMEM; map = mr->map[mr->nbuf / RXE_BUF_PER_MAP]; - buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP]; - - buf->addr = addr; - buf->size = ibmr->page_size; + map->addrs[mr->nbuf % RXE_BUF_PER_MAP] = addr; mr->nbuf++; return 0; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 22a299b0a9f0..d136f02d5b56 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -277,15 +277,10 @@ enum rxe_mr_lookup_type { RXE_LOOKUP_REMOTE, }; -#define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(struct rxe_phys_buf)) - -struct rxe_phys_buf { - u64 addr; - u64 size; -}; +#define RXE_BUF_PER_MAP (PAGE_SIZE / sizeof(u64)) struct rxe_map { - struct rxe_phys_buf buf[RXE_BUF_PER_MAP]; + u64 addrs[RXE_BUF_PER_MAP]; }; static inline int rkey_is_mw(u32 rkey)
1) Use kmap_local_page() for new in-kernel memory protection schemes. 2) Do some cleanup(e.g. remove struct rxe_phys_buf). Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com> --- drivers/infiniband/sw/rxe/rxe_loc.h | 2 + drivers/infiniband/sw/rxe/rxe_mr.c | 84 +++++++++++++-------------- drivers/infiniband/sw/rxe/rxe_resp.c | 1 + drivers/infiniband/sw/rxe/rxe_verbs.c | 6 +- drivers/infiniband/sw/rxe/rxe_verbs.h | 9 +-- 5 files changed, 45 insertions(+), 57 deletions(-)