Message ID | 20181224223227.18016-4-shiraz.saleem@intel.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | Introduce APIs to get DMA addresses aligned to a HW supported page size | expand |
On Mon, Dec 24, 2018 at 04:32:24PM -0600, Shiraz Saleem wrote: > This helper iterates the SG list and returns suitable > HW aligned DMA addresses within a driver supported > page size. The implementation is intended to work for > HW that support single page sizes or mixed page sizes > in an MR. This avoids the need for having driver specific > algorithms to achieve the same thing and redundant walks > of the SG list. > > Suggested-by: Jason Gunthorpe <jgg@ziepe.ca> > Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com> > Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com> > drivers/infiniband/core/umem.c | 78 ++++++++++++++++++++++++++++++++++++++++++ > include/rdma/ib_umem.h | 23 +++++++++++++ > 2 files changed, 101 insertions(+) > > diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c > index b2f2d75..8f8a1f6 100644 > +++ b/drivers/infiniband/core/umem.c > @@ -204,6 +204,84 @@ unsigned long ib_umem_find_single_pg_size(struct ib_umem *umem, > } > EXPORT_SYMBOL(ib_umem_find_single_pg_size); > > +static unsigned int ib_umem_find_mixed_pg_bit(struct scatterlist *sgl_head, > + struct sg_phys_iter *sg_phys_iter) > +{ > + unsigned long dma_addr_start, dma_addr_end; > + > + dma_addr_start = sg_dma_address(sg_phys_iter->sg); > + dma_addr_end = sg_dma_address(sg_phys_iter->sg) + > + sg_dma_len(sg_phys_iter->sg); > + > + if (sg_phys_iter->sg == sgl_head) > + return ib_umem_find_pg_bit(dma_addr_end, > + sg_phys_iter->supported_pgsz); > + else if (sg_is_last(sg_phys_iter->sg)) > + return ib_umem_find_pg_bit(dma_addr_start, > + sg_phys_iter->supported_pgsz); > + else > + return ib_umem_find_pg_bit(sg_phys_iter->phyaddr, > + sg_phys_iter->supported_pgsz); > +} > +void ib_umem_start_phys_iter(struct ib_umem *umem, > + struct sg_phys_iter *sg_phys_iter, > + unsigned long supported_pgsz) > +{ > + memset(sg_phys_iter, 0, sizeof(struct sg_phys_iter)); > + sg_phys_iter->sg = umem->sg_head.sgl; > + sg_phys_iter->supported_pgsz = supported_pgsz; > + > + /* Single page support in MR */ > + if (hweight_long(supported_pgsz) == 1) > + sg_phys_iter->pg_bit = fls64(supported_pgsz) - 1; > + else > + sg_phys_iter->mixed_pg_support = true; > +} > +EXPORT_SYMBOL(ib_umem_start_phys_iter); > + > +/** > + * ib_umem_next_phys_iter - SG list iterator that returns aligned HW address > + * @umem: umem struct > + * @sg_phys_iter: SG HW address iterator > + * @supported_pgsz: bitmask of HW supported page sizes > + * > + * This helper iterates over the SG list and returns the HW > + * address aligned to a supported HW page size. I think more documentation is appropriate here.. Each true result returns a contiguous aligned block of memory such that: - pg_bit indicate the alignment of this block such that phyaddr & ((1 << pg_bit) - 1) == 0 - All blocks except the starting block has a zero offset. For the starting block offset indicates the first valid byte in the MR, HW should not permit access to bytes earlier that offset. [And how is this being done in i40iw? A side effect of the user virtual bad address?] - For all blocks except the last len + offset equals 1 << pg_bit False is returned when iteration is completed and all blocks have been seen. > + */ > +bool ib_umem_next_phys_iter(struct ib_umem *umem, > + struct sg_phys_iter *sg_phys_iter) > +{ > + unsigned long pg_mask; > + > + if (!sg_phys_iter->sg || !sg_phys_iter->supported_pgsz) > + return false; > + > + if (sg_phys_iter->remaining) { > + sg_phys_iter->phyaddr += (sg_phys_iter->len + sg_phys_iter->offset); > + } else { > + sg_phys_iter->phyaddr = sg_dma_address(sg_phys_iter->sg); > + sg_phys_iter->remaining = sg_dma_len(sg_phys_iter->sg); > + } > + > + /* Mixed page support in MR*/ > + if (sg_phys_iter->mixed_pg_support) > + sg_phys_iter->pg_bit = ib_umem_find_mixed_pg_bit(umem->sg_head.sgl, > + sg_phys_iter); > + > + pg_mask = ~(BIT_ULL(sg_phys_iter->pg_bit) - 1); > + > + sg_phys_iter->offset = sg_phys_iter->phyaddr & ~pg_mask; > + sg_phys_iter->phyaddr = sg_phys_iter->phyaddr & pg_mask; > + sg_phys_iter->len = min_t(unsigned long, sg_phys_iter->remaining, > + BIT_ULL(sg_phys_iter->pg_bit) - sg_phys_iter->offset); Maybe drop len and offset? Nothing uses them in this series, right? Seems fine otherwise Jason
On Wed, Jan 02, 2019 at 05:19:55PM -0700, Jason Gunthorpe wrote: > On Mon, Dec 24, 2018 at 04:32:24PM -0600, Shiraz Saleem wrote: > > This helper iterates the SG list and returns suitable > > HW aligned DMA addresses within a driver supported > > page size. The implementation is intended to work for > > HW that support single page sizes or mixed page sizes > > in an MR. This avoids the need for having driver specific > > algorithms to achieve the same thing and redundant walks > > of the SG list. > > [..] > > + * ib_umem_next_phys_iter - SG list iterator that returns aligned HW address > > + * @umem: umem struct > > + * @sg_phys_iter: SG HW address iterator > > + * @supported_pgsz: bitmask of HW supported page sizes > > + * > > + * This helper iterates over the SG list and returns the HW > > + * address aligned to a supported HW page size. > > I think more documentation is appropriate here.. OK. > Each true result returns a contiguous aligned block of memory such > that: > > - pg_bit indicate the alignment of this block such that > phyaddr & ((1 << pg_bit) - 1) == 0 > > - All blocks except the starting block has a zero offset. > For the starting block offset indicates the first valid byte in the > MR, HW should not permit access to bytes earlier that offset. > > [And how is this being done in i40iw? A side effect of the user > virtual bad address?] Yes we have that restriction for the start offsets to align for user-VA buf and physical buf. And use the least significant bits of VA. > - For all blocks except the last len + offset equals 1 << pg_bit > > False is returned when iteration is completed and all blocks have been > seen. > > > + */ > > +bool ib_umem_next_phys_iter(struct ib_umem *umem, > > + struct sg_phys_iter *sg_phys_iter) > > +{ > > + unsigned long pg_mask; > > + > > + if (!sg_phys_iter->sg || !sg_phys_iter->supported_pgsz) > > + return false; > > + > > + if (sg_phys_iter->remaining) { > > + sg_phys_iter->phyaddr += (sg_phys_iter->len + sg_phys_iter->offset); > > + } else { > > + sg_phys_iter->phyaddr = sg_dma_address(sg_phys_iter->sg); > > + sg_phys_iter->remaining = sg_dma_len(sg_phys_iter->sg); > > + } > > + > > + /* Mixed page support in MR*/ > > + if (sg_phys_iter->mixed_pg_support) > > + sg_phys_iter->pg_bit = ib_umem_find_mixed_pg_bit(umem->sg_head.sgl, > > + sg_phys_iter); > > + > > + pg_mask = ~(BIT_ULL(sg_phys_iter->pg_bit) - 1); > > + > > + sg_phys_iter->offset = sg_phys_iter->phyaddr & ~pg_mask; > > + sg_phys_iter->phyaddr = sg_phys_iter->phyaddr & pg_mask; > > + sg_phys_iter->len = min_t(unsigned long, sg_phys_iter->remaining, > > + BIT_ULL(sg_phys_iter->pg_bit) - sg_phys_iter->offset); > > Maybe drop len and offset? Nothing uses them in this series, right? > The individual drivers dont use them in the series, but could in the future to get len/offset of each aligned block. They are currenlty just internal tracking fields that in my purview improve readability in this function. Shiraz
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index b2f2d75..8f8a1f6 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -204,6 +204,84 @@ unsigned long ib_umem_find_single_pg_size(struct ib_umem *umem, } EXPORT_SYMBOL(ib_umem_find_single_pg_size); +static unsigned int ib_umem_find_mixed_pg_bit(struct scatterlist *sgl_head, + struct sg_phys_iter *sg_phys_iter) +{ + unsigned long dma_addr_start, dma_addr_end; + + dma_addr_start = sg_dma_address(sg_phys_iter->sg); + dma_addr_end = sg_dma_address(sg_phys_iter->sg) + + sg_dma_len(sg_phys_iter->sg); + + if (sg_phys_iter->sg == sgl_head) + return ib_umem_find_pg_bit(dma_addr_end, + sg_phys_iter->supported_pgsz); + else if (sg_is_last(sg_phys_iter->sg)) + return ib_umem_find_pg_bit(dma_addr_start, + sg_phys_iter->supported_pgsz); + else + return ib_umem_find_pg_bit(sg_phys_iter->phyaddr, + sg_phys_iter->supported_pgsz); +} +void ib_umem_start_phys_iter(struct ib_umem *umem, + struct sg_phys_iter *sg_phys_iter, + unsigned long supported_pgsz) +{ + memset(sg_phys_iter, 0, sizeof(struct sg_phys_iter)); + sg_phys_iter->sg = umem->sg_head.sgl; + sg_phys_iter->supported_pgsz = supported_pgsz; + + /* Single page support in MR */ + if (hweight_long(supported_pgsz) == 1) + sg_phys_iter->pg_bit = fls64(supported_pgsz) - 1; + else + sg_phys_iter->mixed_pg_support = true; +} +EXPORT_SYMBOL(ib_umem_start_phys_iter); + +/** + * ib_umem_next_phys_iter - SG list iterator that returns aligned HW address + * @umem: umem struct + * @sg_phys_iter: SG HW address iterator + * @supported_pgsz: bitmask of HW supported page sizes + * + * This helper iterates over the SG list and returns the HW + * address aligned to a supported HW page size. + */ +bool ib_umem_next_phys_iter(struct ib_umem *umem, + struct sg_phys_iter *sg_phys_iter) +{ + unsigned long pg_mask; + + if (!sg_phys_iter->sg || !sg_phys_iter->supported_pgsz) + return false; + + if (sg_phys_iter->remaining) { + sg_phys_iter->phyaddr += (sg_phys_iter->len + sg_phys_iter->offset); + } else { + sg_phys_iter->phyaddr = sg_dma_address(sg_phys_iter->sg); + sg_phys_iter->remaining = sg_dma_len(sg_phys_iter->sg); + } + + /* Mixed page support in MR*/ + if (sg_phys_iter->mixed_pg_support) + sg_phys_iter->pg_bit = ib_umem_find_mixed_pg_bit(umem->sg_head.sgl, + sg_phys_iter); + + pg_mask = ~(BIT_ULL(sg_phys_iter->pg_bit) - 1); + + sg_phys_iter->offset = sg_phys_iter->phyaddr & ~pg_mask; + sg_phys_iter->phyaddr = sg_phys_iter->phyaddr & pg_mask; + sg_phys_iter->len = min_t(unsigned long, sg_phys_iter->remaining, + BIT_ULL(sg_phys_iter->pg_bit) - sg_phys_iter->offset); + sg_phys_iter->remaining -= sg_phys_iter->len; + if (!sg_phys_iter->remaining) + sg_phys_iter->sg = sg_next(sg_phys_iter->sg); + + return true; +} +EXPORT_SYMBOL(ib_umem_next_phys_iter); + /** * ib_umem_get - Pin and DMA map userspace memory. * diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 3e8e1ed..57cc621 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -55,6 +55,17 @@ struct ib_umem { int npages; }; +struct sg_phys_iter { + struct scatterlist *sg; + unsigned long phyaddr; + unsigned long len; + unsigned long offset; + unsigned long supported_pgsz; + unsigned long remaining; + unsigned int pg_bit; + u8 mixed_pg_support; +}; + /* Returns the offset of the umem start relative to the first page. */ static inline int ib_umem_offset(struct ib_umem *umem) { @@ -89,6 +100,11 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, unsigned long ib_umem_find_single_pg_size(struct ib_umem *umem, unsigned long supported_pgsz, unsigned long uvirt_addr); +void ib_umem_start_phys_iter(struct ib_umem *umem, + struct sg_phys_iter *sg_phys_iter, + unsigned long supported_pgsz); +bool ib_umem_next_phys_iter(struct ib_umem *umem, + struct sg_phys_iter *sg_phys_iter); #else /* CONFIG_INFINIBAND_USER_MEM */ @@ -110,6 +126,13 @@ static inline int ib_umem_find_single_pg_size(struct ib_umem *umem, unsigned long uvirt_addr) { return -EINVAL; } +static inline void ib_umem_start_phys_iter(struct ib_umem *umem, + struct sg_phys_iter *sg_phys_iter, + unsigned long supported_pgsz) { } +static inline bool ib_umem_next_phys_iter(struct ib_umem *umem, + struct sg_phys_iter *sg_phys_iter) { + return false; +} #endif /* CONFIG_INFINIBAND_USER_MEM */