Message ID | 20190528124618.77918-5-galpress@amazon.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | EFA updates 2019-05-28 | expand |
On Tue, May 28, 2019 at 03:46:16PM +0300, Gal Pressman wrote: > @@ -1500,13 +1443,17 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, > params.iova = virt_addr; > params.mr_length_in_bytes = length; > params.permissions = access_flags & 0x1; > - max_page_shift = fls64(dev->dev_attr.page_size_cap); > > - efa_cont_pages(mr->umem, start, max_page_shift, &npages, > - ¶ms.page_shift, ¶ms.page_num); > + pg_sz = ib_umem_find_best_pgsz(mr->umem, > + dev->dev_attr.page_size_cap, > + virt_addr); I think this needs to check pg_sz is not zero.. Jason
> Subject: [PATCH for-rc 4/6] RDMA/efa: Use API to get contiguous memory blocks > aligned to device supported page size > > Use the ib_umem_find_best_pgsz() and rdma_for_each_block() API when > registering an MR instead of coding it in the driver. > > ib_umem_find_best_pgsz() is used to find the best suitable page size which > replaces the existing efa_cont_pages() implementation. > rdma_for_each_block() is used to iterate the umem in aligned contiguous memory > blocks. > > Cc: Shiraz Saleem <shiraz.saleem@intel.com> > Reviewed-by: Firas JahJah <firasj@amazon.com> > Signed-off-by: Gal Pressman <galpress@amazon.com> > --- > drivers/infiniband/hw/efa/efa_verbs.c | 81 +++++---------------------- > 1 file changed, 14 insertions(+), 67 deletions(-) > > diff --git a/drivers/infiniband/hw/efa/efa_verbs.c > b/drivers/infiniband/hw/efa/efa_verbs.c > index 0640c2435f67..c1246c39f234 100644 > --- a/drivers/infiniband/hw/efa/efa_verbs.c > +++ b/drivers/infiniband/hw/efa/efa_verbs.c > @@ -1054,21 +1054,15 @@ static int umem_to_page_list(struct efa_dev *dev, > u8 hp_shift) > { > u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT); > - struct sg_dma_page_iter sg_iter; > - unsigned int page_idx = 0; > + struct ib_block_iter biter; > unsigned int hp_idx = 0; > > ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n", > hp_cnt, pages_in_hp); > > - for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) > { > - if (page_idx % pages_in_hp == 0) { > - page_list[hp_idx] = sg_page_iter_dma_address(&sg_iter); > - hp_idx++; > - } > - > - page_idx++; > - } > + rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, > + BIT(hp_shift)) > + page_list[hp_idx++] = rdma_block_iter_dma_address(&biter); > > return 0; > } > @@ -1402,56 +1396,6 @@ static int efa_create_pbl(struct efa_dev *dev, > return 0; > } > > -static void efa_cont_pages(struct ib_umem *umem, u64 addr, > - unsigned long max_page_shift, > - int *count, u8 *shift, u32 *ncont) > -{ > - struct scatterlist *sg; > - u64 base = ~0, p = 0; > - unsigned long tmp; > - unsigned long m; > - u64 len, pfn; > - int i = 0; > - int entry; > - > - addr = addr >> PAGE_SHIFT; > - tmp = (unsigned long)addr; > - m = find_first_bit(&tmp, BITS_PER_LONG); > - if (max_page_shift) > - m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m); > - > - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { > - len = DIV_ROUND_UP(sg_dma_len(sg), PAGE_SIZE); > - pfn = sg_dma_address(sg) >> PAGE_SHIFT; > - if (base + p != pfn) { > - /* > - * If either the offset or the new > - * base are unaligned update m > - */ > - tmp = (unsigned long)(pfn | p); > - if (!IS_ALIGNED(tmp, 1 << m)) > - m = find_first_bit(&tmp, BITS_PER_LONG); > - > - base = pfn; > - p = 0; > - } > - > - p += len; > - i += len; > - } > - > - if (i) { > - m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); > - *ncont = DIV_ROUND_UP(i, (1 << m)); > - } else { > - m = 0; > - *ncont = 0; > - } > - > - *shift = PAGE_SHIFT + m; > - *count = i; > -} > - Leon - perhaps mlx5_ib_cont_pages() can also be replaced with the new core helper? Reviewed-by: Shiraz Saleem <shiraz.saleem@intel.com>
> Subject: Re: [PATCH for-rc 4/6] RDMA/efa: Use API to get contiguous memory > blocks aligned to device supported page size > > On Tue, May 28, 2019 at 03:46:16PM +0300, Gal Pressman wrote: > > @@ -1500,13 +1443,17 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 > start, u64 length, > > params.iova = virt_addr; > > params.mr_length_in_bytes = length; > > params.permissions = access_flags & 0x1; > > - max_page_shift = fls64(dev->dev_attr.page_size_cap); > > > > - efa_cont_pages(mr->umem, start, max_page_shift, &npages, > > - ¶ms.page_shift, ¶ms.page_num); > > + pg_sz = ib_umem_find_best_pgsz(mr->umem, > > + dev->dev_attr.page_size_cap, > > + virt_addr); > > I think this needs to check pg_sz is not zero.. > What is the smallest page size this driver supports?
On 29/05/2019 20:20, Saleem, Shiraz wrote: >> Subject: Re: [PATCH for-rc 4/6] RDMA/efa: Use API to get contiguous memory >> blocks aligned to device supported page size >> >> On Tue, May 28, 2019 at 03:46:16PM +0300, Gal Pressman wrote: >>> @@ -1500,13 +1443,17 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 >> start, u64 length, >>> params.iova = virt_addr; >>> params.mr_length_in_bytes = length; >>> params.permissions = access_flags & 0x1; >>> - max_page_shift = fls64(dev->dev_attr.page_size_cap); >>> >>> - efa_cont_pages(mr->umem, start, max_page_shift, &npages, >>> - ¶ms.page_shift, ¶ms.page_num); >>> + pg_sz = ib_umem_find_best_pgsz(mr->umem, >>> + dev->dev_attr.page_size_cap, >>> + virt_addr); >> >> I think this needs to check pg_sz is not zero.. >> > > What is the smallest page size this driver supports? The page size capability is queried from the device, the smallest page size is currently 4k. Isn't PAGE_SIZE always supported? What did drivers do before ib_umem_find_best_pgsz() existed in case they didn't support PAGE_SIZE? I doubt there is/will be a real use-case where it matters for EFA, but I can add the check to be on the safe side.
On Wed, May 29, 2019 at 10:35:58PM +0300, Gal Pressman wrote: > On 29/05/2019 20:20, Saleem, Shiraz wrote: > >> Subject: Re: [PATCH for-rc 4/6] RDMA/efa: Use API to get contiguous memory > >> blocks aligned to device supported page size > >> > >> On Tue, May 28, 2019 at 03:46:16PM +0300, Gal Pressman wrote: > >>> @@ -1500,13 +1443,17 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 > >> start, u64 length, > >>> params.iova = virt_addr; > >>> params.mr_length_in_bytes = length; > >>> params.permissions = access_flags & 0x1; > >>> - max_page_shift = fls64(dev->dev_attr.page_size_cap); > >>> > >>> - efa_cont_pages(mr->umem, start, max_page_shift, &npages, > >>> - ¶ms.page_shift, ¶ms.page_num); > >>> + pg_sz = ib_umem_find_best_pgsz(mr->umem, > >>> + dev->dev_attr.page_size_cap, > >>> + virt_addr); > >> > >> I think this needs to check pg_sz is not zero.. > >> > > > > What is the smallest page size this driver supports? > > The page size capability is queried from the device, the smallest page size is > currently 4k. > > Isn't PAGE_SIZE always supported? No, PAGE_SIZE is only supported if the device supports PAGE_SIZE or smaller blocks. > What did drivers do before ib_umem_find_best_pgsz() existed in case > they didn't support PAGE_SIZE? Most malfunctioned. > I doubt there is/will be a real use-case where it matters for EFA, > but I can add the check to be on the safe side. Please, I want the reference implementations of this API to be correct for reference by others. Jason
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 0640c2435f67..c1246c39f234 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1054,21 +1054,15 @@ static int umem_to_page_list(struct efa_dev *dev, u8 hp_shift) { u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT); - struct sg_dma_page_iter sg_iter; - unsigned int page_idx = 0; + struct ib_block_iter biter; unsigned int hp_idx = 0; ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n", hp_cnt, pages_in_hp); - for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { - if (page_idx % pages_in_hp == 0) { - page_list[hp_idx] = sg_page_iter_dma_address(&sg_iter); - hp_idx++; - } - - page_idx++; - } + rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, + BIT(hp_shift)) + page_list[hp_idx++] = rdma_block_iter_dma_address(&biter); return 0; } @@ -1402,56 +1396,6 @@ static int efa_create_pbl(struct efa_dev *dev, return 0; } -static void efa_cont_pages(struct ib_umem *umem, u64 addr, - unsigned long max_page_shift, - int *count, u8 *shift, u32 *ncont) -{ - struct scatterlist *sg; - u64 base = ~0, p = 0; - unsigned long tmp; - unsigned long m; - u64 len, pfn; - int i = 0; - int entry; - - addr = addr >> PAGE_SHIFT; - tmp = (unsigned long)addr; - m = find_first_bit(&tmp, BITS_PER_LONG); - if (max_page_shift) - m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m); - - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = DIV_ROUND_UP(sg_dma_len(sg), PAGE_SIZE); - pfn = sg_dma_address(sg) >> PAGE_SHIFT; - if (base + p != pfn) { - /* - * If either the offset or the new - * base are unaligned update m - */ - tmp = (unsigned long)(pfn | p); - if (!IS_ALIGNED(tmp, 1 << m)) - m = find_first_bit(&tmp, BITS_PER_LONG); - - base = pfn; - p = 0; - } - - p += len; - i += len; - } - - if (i) { - m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); - *ncont = DIV_ROUND_UP(i, (1 << m)); - } else { - m = 0; - *ncont = 0; - } - - *shift = PAGE_SHIFT + m; - *count = i; -} - struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) @@ -1459,11 +1403,10 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, struct efa_dev *dev = to_edev(ibpd->device); struct efa_com_reg_mr_params params = {}; struct efa_com_reg_mr_result result = {}; - unsigned long max_page_shift; struct pbl_context pbl; + unsigned int pg_sz; struct efa_mr *mr; int inline_size; - int npages; int err; if (udata->inlen && @@ -1500,13 +1443,17 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, params.iova = virt_addr; params.mr_length_in_bytes = length; params.permissions = access_flags & 0x1; - max_page_shift = fls64(dev->dev_attr.page_size_cap); - efa_cont_pages(mr->umem, start, max_page_shift, &npages, - ¶ms.page_shift, ¶ms.page_num); + pg_sz = ib_umem_find_best_pgsz(mr->umem, + dev->dev_attr.page_size_cap, + virt_addr); + params.page_shift = __ffs(pg_sz); + params.page_num = DIV_ROUND_UP(length + (start & (pg_sz - 1)), + pg_sz); + ibdev_dbg(&dev->ibdev, - "start %#llx length %#llx npages %d params.page_shift %u params.page_num %u\n", - start, length, npages, params.page_shift, params.page_num); + "start %#llx length %#llx params.page_shift %u params.page_num %u\n", + start, length, params.page_shift, params.page_num); inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array); if (params.page_num <= inline_size) {