Message ID | 20200619103636.11974-14-m.szyprowski@samsung.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | None | expand |
On Fri, Jun 19, 2020 at 3:37 AM Marek Szyprowski <m.szyprowski@samsung.com> wrote: > > The Documentation/DMA-API-HOWTO.txt states that the dma_map_sg() function > returns the number of the created entries in the DMA address space. > However the subsequent calls to the dma_sync_sg_for_{device,cpu}() and > dma_unmap_sg must be called with the original number of the entries > passed to the dma_map_sg(). > > struct sg_table is a common structure used for describing a non-contiguous > memory buffer, used commonly in the DRM and graphics subsystems. It > consists of a scatterlist with memory pages and DMA addresses (sgl entry), > as well as the number of scatterlist entries: CPU pages (orig_nents entry) > and DMA mapped pages (nents entry). > > It turned out that it was a common mistake to misuse nents and orig_nents > entries, calling DMA-mapping functions with a wrong number of entries or > ignoring the number of mapped entries returned by the dma_map_sg() > function. > > To avoid such issues, lets use a common dma-mapping wrappers operating > directly on the struct sg_table objects and use scatterlist page > iterators where possible. This, almost always, hides references to the > nents and orig_nents entries, making the code robust, easier to follow > and copy/paste safe. > > Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com> Acked-by: Rob Clark <robdclark@gmail.com> (let me know if you want me to take this one in via msm-next or if the plan is to take the series via drm-misc) > --- > drivers/gpu/drm/msm/msm_gem.c | 13 +++++-------- > drivers/gpu/drm/msm/msm_gpummu.c | 14 ++++++-------- > drivers/gpu/drm/msm/msm_iommu.c | 2 +- > 3 files changed, 12 insertions(+), 17 deletions(-) > > diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c > index 38b0c0e1f83e..e0d5fd36ea8f 100644 > --- a/drivers/gpu/drm/msm/msm_gem.c > +++ b/drivers/gpu/drm/msm/msm_gem.c > @@ -53,11 +53,10 @@ static void sync_for_device(struct msm_gem_object *msm_obj) > struct device *dev = msm_obj->base.dev->dev; > > if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) { > - dma_sync_sg_for_device(dev, msm_obj->sgt->sgl, > - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); > + dma_sync_sgtable_for_device(dev, msm_obj->sgt, > + DMA_BIDIRECTIONAL); > } else { > - dma_map_sg(dev, msm_obj->sgt->sgl, > - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); > + dma_map_sgtable(dev, msm_obj->sgt, DMA_BIDIRECTIONAL, 0); > } > } > > @@ -66,11 +65,9 @@ static void sync_for_cpu(struct msm_gem_object *msm_obj) > struct device *dev = msm_obj->base.dev->dev; > > if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) { > - dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl, > - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); > + dma_sync_sgtable_for_cpu(dev, msm_obj->sgt, DMA_BIDIRECTIONAL); > } else { > - dma_unmap_sg(dev, msm_obj->sgt->sgl, > - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); > + dma_unmap_sgtable(dev, msm_obj->sgt, DMA_BIDIRECTIONAL, 0); > } > } > > diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c > index 310a31b05faa..319f06c28235 100644 > --- a/drivers/gpu/drm/msm/msm_gpummu.c > +++ b/drivers/gpu/drm/msm/msm_gpummu.c > @@ -30,21 +30,19 @@ static int msm_gpummu_map(struct msm_mmu *mmu, uint64_t iova, > { > struct msm_gpummu *gpummu = to_msm_gpummu(mmu); > unsigned idx = (iova - GPUMMU_VA_START) / GPUMMU_PAGE_SIZE; > - struct scatterlist *sg; > + struct sg_dma_page_iter dma_iter; > unsigned prot_bits = 0; > - unsigned i, j; > > if (prot & IOMMU_WRITE) > prot_bits |= 1; > if (prot & IOMMU_READ) > prot_bits |= 2; > > - for_each_sg(sgt->sgl, sg, sgt->nents, i) { > - dma_addr_t addr = sg->dma_address; > - for (j = 0; j < sg->length / GPUMMU_PAGE_SIZE; j++, idx++) { > - gpummu->table[idx] = addr | prot_bits; > - addr += GPUMMU_PAGE_SIZE; > - } > + for_each_sgtable_dma_page(sgt, &dma_iter, 0) { > + dma_addr_t addr = sg_page_iter_dma_address(&dma_iter); > + > + BUILD_BUG_ON(GPUMMU_PAGE_SIZE != PAGE_SIZE); > + gpummu->table[idx++] = addr | prot_bits; > } > > /* we can improve by deferring flush for multiple map() */ > diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c > index 3a381a9674c9..6c31e65834c6 100644 > --- a/drivers/gpu/drm/msm/msm_iommu.c > +++ b/drivers/gpu/drm/msm/msm_iommu.c > @@ -36,7 +36,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova, > struct msm_iommu *iommu = to_msm_iommu(mmu); > size_t ret; > > - ret = iommu_map_sg(iommu->domain, iova, sgt->sgl, sgt->nents, prot); > + ret = iommu_map_sgtable(iommu->domain, iova, sgt, prot); > WARN_ON(!ret); > > return (ret == len) ? 0 : -EINVAL; > -- > 2.17.1 >
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 38b0c0e1f83e..e0d5fd36ea8f 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -53,11 +53,10 @@ static void sync_for_device(struct msm_gem_object *msm_obj) struct device *dev = msm_obj->base.dev->dev; if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) { - dma_sync_sg_for_device(dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + dma_sync_sgtable_for_device(dev, msm_obj->sgt, + DMA_BIDIRECTIONAL); } else { - dma_map_sg(dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + dma_map_sgtable(dev, msm_obj->sgt, DMA_BIDIRECTIONAL, 0); } } @@ -66,11 +65,9 @@ static void sync_for_cpu(struct msm_gem_object *msm_obj) struct device *dev = msm_obj->base.dev->dev; if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) { - dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + dma_sync_sgtable_for_cpu(dev, msm_obj->sgt, DMA_BIDIRECTIONAL); } else { - dma_unmap_sg(dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + dma_unmap_sgtable(dev, msm_obj->sgt, DMA_BIDIRECTIONAL, 0); } } diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c index 310a31b05faa..319f06c28235 100644 --- a/drivers/gpu/drm/msm/msm_gpummu.c +++ b/drivers/gpu/drm/msm/msm_gpummu.c @@ -30,21 +30,19 @@ static int msm_gpummu_map(struct msm_mmu *mmu, uint64_t iova, { struct msm_gpummu *gpummu = to_msm_gpummu(mmu); unsigned idx = (iova - GPUMMU_VA_START) / GPUMMU_PAGE_SIZE; - struct scatterlist *sg; + struct sg_dma_page_iter dma_iter; unsigned prot_bits = 0; - unsigned i, j; if (prot & IOMMU_WRITE) prot_bits |= 1; if (prot & IOMMU_READ) prot_bits |= 2; - for_each_sg(sgt->sgl, sg, sgt->nents, i) { - dma_addr_t addr = sg->dma_address; - for (j = 0; j < sg->length / GPUMMU_PAGE_SIZE; j++, idx++) { - gpummu->table[idx] = addr | prot_bits; - addr += GPUMMU_PAGE_SIZE; - } + for_each_sgtable_dma_page(sgt, &dma_iter, 0) { + dma_addr_t addr = sg_page_iter_dma_address(&dma_iter); + + BUILD_BUG_ON(GPUMMU_PAGE_SIZE != PAGE_SIZE); + gpummu->table[idx++] = addr | prot_bits; } /* we can improve by deferring flush for multiple map() */ diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 3a381a9674c9..6c31e65834c6 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -36,7 +36,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova, struct msm_iommu *iommu = to_msm_iommu(mmu); size_t ret; - ret = iommu_map_sg(iommu->domain, iova, sgt->sgl, sgt->nents, prot); + ret = iommu_map_sgtable(iommu->domain, iova, sgt, prot); WARN_ON(!ret); return (ret == len) ? 0 : -EINVAL;
The Documentation/DMA-API-HOWTO.txt states that the dma_map_sg() function returns the number of the created entries in the DMA address space. However the subsequent calls to the dma_sync_sg_for_{device,cpu}() and dma_unmap_sg must be called with the original number of the entries passed to the dma_map_sg(). struct sg_table is a common structure used for describing a non-contiguous memory buffer, used commonly in the DRM and graphics subsystems. It consists of a scatterlist with memory pages and DMA addresses (sgl entry), as well as the number of scatterlist entries: CPU pages (orig_nents entry) and DMA mapped pages (nents entry). It turned out that it was a common mistake to misuse nents and orig_nents entries, calling DMA-mapping functions with a wrong number of entries or ignoring the number of mapped entries returned by the dma_map_sg() function. To avoid such issues, lets use a common dma-mapping wrappers operating directly on the struct sg_table objects and use scatterlist page iterators where possible. This, almost always, hides references to the nents and orig_nents entries, making the code robust, easier to follow and copy/paste safe. Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com> --- drivers/gpu/drm/msm/msm_gem.c | 13 +++++-------- drivers/gpu/drm/msm/msm_gpummu.c | 14 ++++++-------- drivers/gpu/drm/msm/msm_iommu.c | 2 +- 3 files changed, 12 insertions(+), 17 deletions(-)