diff mbox series

[v2,1/1] drm: msm: Replace dma_map_sg with dma_sync_sg*

Message ID 20181126213710.3084-1-vivek.gautam@codeaurora.org (mailing list archive)
State New, archived
Headers show
Series [v2,1/1] drm: msm: Replace dma_map_sg with dma_sync_sg* | expand

Commit Message

Vivek Gautam Nov. 26, 2018, 9:37 p.m. UTC
dma_map_sg() expects a DMA domain. However, the drm devices
have been traditionally using unmanaged iommu domain which
is non-dma type. Using dma mapping APIs with that domain is bad.

Replace dma_map_sg() calls with dma_sync_sg_for_device{|cpu}()
to do the cache maintenance.

Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
Suggested-by: Tomasz Figa <tfiga@chromium.org>
Cc: Rob Clark <robdclark@gmail.com>
Cc: Jordan Crouse <jcrouse@codeaurora.org> 
Cc: Sean Paul <seanpaul@chromium.org>
---

Changes since v1:
 - Addressed Jordan's and Tomasz's comments for
   - moving sg dma addresses preparation out of the coditional
     check to the main path so we do it irrespective of whether
     the buffer is cached or uncached.
   - Enhance the comment to explain this dma addresses prepartion.

 drivers/gpu/drm/msm/msm_gem.c | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

Comments

Tomasz Figa Nov. 28, 2018, 3:09 a.m. UTC | #1
Hi Vivek,

On Tue, Nov 27, 2018 at 6:37 AM Vivek Gautam
<vivek.gautam@codeaurora.org> wrote:
>
> dma_map_sg() expects a DMA domain. However, the drm devices
> have been traditionally using unmanaged iommu domain which
> is non-dma type. Using dma mapping APIs with that domain is bad.
>
> Replace dma_map_sg() calls with dma_sync_sg_for_device{|cpu}()
> to do the cache maintenance.
>
> Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
> Suggested-by: Tomasz Figa <tfiga@chromium.org>
> Cc: Rob Clark <robdclark@gmail.com>
> Cc: Jordan Crouse <jcrouse@codeaurora.org>
> Cc: Sean Paul <seanpaul@chromium.org>
> ---
>
> Changes since v1:
>  - Addressed Jordan's and Tomasz's comments for
>    - moving sg dma addresses preparation out of the coditional
>      check to the main path so we do it irrespective of whether
>      the buffer is cached or uncached.
>    - Enhance the comment to explain this dma addresses prepartion.
>

Thanks for the patch. Some comments inline.

>  drivers/gpu/drm/msm/msm_gem.c | 31 ++++++++++++++++++++++---------
>  1 file changed, 22 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
> index 00c795ced02c..1811ac23a31c 100644
> --- a/drivers/gpu/drm/msm/msm_gem.c
> +++ b/drivers/gpu/drm/msm/msm_gem.c
> @@ -81,6 +81,8 @@ static struct page **get_pages(struct drm_gem_object *obj)
>                 struct drm_device *dev = obj->dev;
>                 struct page **p;
>                 int npages = obj->size >> PAGE_SHIFT;
> +               struct scatterlist *s;
> +               int i;
>
>                 if (use_pages(obj))
>                         p = drm_gem_get_pages(obj);
> @@ -104,12 +106,21 @@ static struct page **get_pages(struct drm_gem_object *obj)
>                         return ptr;
>                 }
>
> -               /* For non-cached buffers, ensure the new pages are clean
> +               /*
> +                * dma_sync_sg_*() flush the physical pages, so point
> +                * sg->dma_address to the physical ones for the right behavior.

The two halves of the sequence don't really relate to each other. An
sglist has the `page` field for the purpose of pointing to physical
pages. The `dma_address` field is for DMA addresses, which are not
equivalent to physical addresses. I'd rewrite it like this;

/*
 * Some implementations of the DMA mapping ops expect
 * physical addresses of the pages to be stored as DMA
 * addresses of the sglist entries. To work around it,
 * set them here explicitly.
 */

> +                */
> +               for_each_sg(msm_obj->sgt->sgl, s, msm_obj->sgt->nents, i)
> +                       sg_dma_address(s) = sg_phys(s);
> +
> +               /*
> +                * For non-cached buffers, ensure the new pages are clean
>                  * because display controller, GPU, etc. are not coherent:
>                  */
> -               if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
> -                       dma_map_sg(dev->dev, msm_obj->sgt->sgl,
> -                                       msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
> +               if (msm_obj->flags & (MSM_BO_WC | MSM_BO_UNCACHED))
> +                       dma_sync_sg_for_device(dev->dev, msm_obj->sgt->sgl,
> +                                              msm_obj->sgt->nents,
> +                                              DMA_TO_DEVICE);

Why changing from DMA_BIDIRECTIONAL?

>         }
>
>         return msm_obj->pages;
> @@ -133,14 +144,16 @@ static void put_pages(struct drm_gem_object *obj)
>
>         if (msm_obj->pages) {
>                 if (msm_obj->sgt) {
> -                       /* For non-cached buffers, ensure the new
> +                       /*
> +                        * For non-cached buffers, ensure the new
>                          * pages are clean because display controller,
>                          * GPU, etc. are not coherent:
>                          */
> -                       if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
> -                               dma_unmap_sg(obj->dev->dev, msm_obj->sgt->sgl,
> -                                            msm_obj->sgt->nents,
> -                                            DMA_BIDIRECTIONAL);
> +                       if (msm_obj->flags & (MSM_BO_WC | MSM_BO_UNCACHED))
> +                               dma_sync_sg_for_cpu(obj->dev->dev,
> +                                                   msm_obj->sgt->sgl,
> +                                                   msm_obj->sgt->nents,
> +                                                   DMA_FROM_DEVICE);

Ditto.

Best regards,
Tomasz
Christoph Hellwig Nov. 28, 2018, 7:39 a.m. UTC | #2
> +		/*
> +		 * dma_sync_sg_*() flush the physical pages, so point
> +		 * sg->dma_address to the physical ones for the right behavior.
> +		 */
> +		for_each_sg(msm_obj->sgt->sgl, s, msm_obj->sgt->nents, i)
> +			sg_dma_address(s) = sg_phys(s);
> +

I'm sorry, but this is completely bogus and not acceptable.

The only place that is allowed to initialize sg_dma_address is
dma_map_sg.  If the default dma ops don't work for your setup we have
major a problem and need to fix the dma api / iommu integration instead
of hacking around it.
Vivek Gautam Nov. 28, 2018, 9:02 a.m. UTC | #3
Hi Christoph ,

On Wed, Nov 28, 2018 at 1:10 PM Christoph Hellwig <hch@infradead.org> wrote:
>
> > +             /*
> > +              * dma_sync_sg_*() flush the physical pages, so point
> > +              * sg->dma_address to the physical ones for the right behavior.
> > +              */
> > +             for_each_sg(msm_obj->sgt->sgl, s, msm_obj->sgt->nents, i)
> > +                     sg_dma_address(s) = sg_phys(s);
> > +
>
> I'm sorry, but this is completely bogus and not acceptable.
>
> The only place that is allowed to initialize sg_dma_address is
> dma_map_sg.  If the default dma ops don't work for your setup we have
> major a problem and need to fix the dma api / iommu integration instead
> of hacking around it.

Thanks for reviewing this change.
From what I understand the things in drm, we don't use the default
iommu-dma domain
for drm devices. Rather we allocate a new iommu domain, and therefore we can't
use the default dma ops. Hence we need separate dma_sync_sg*() to
flush/invalidate
the cache. For this we need to initialize the sg's addresses.

Please correct me if my understanding is incomplete.

Best regards
Vivek



--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
Rob Clark Nov. 28, 2018, 12:38 p.m. UTC | #4
On Wed, Nov 28, 2018 at 2:39 AM Christoph Hellwig <hch@infradead.org> wrote:
>
> > +             /*
> > +              * dma_sync_sg_*() flush the physical pages, so point
> > +              * sg->dma_address to the physical ones for the right behavior.
> > +              */
> > +             for_each_sg(msm_obj->sgt->sgl, s, msm_obj->sgt->nents, i)
> > +                     sg_dma_address(s) = sg_phys(s);
> > +
>
> I'm sorry, but this is completely bogus and not acceptable.
>
> The only place that is allowed to initialize sg_dma_address is
> dma_map_sg.  If the default dma ops don't work for your setup we have
> major a problem and need to fix the dma api / iommu integration instead
> of hacking around it.

I agree that the dma/iommu integration is very problematic for drm (in
particular, gpu drivers that use the iommu as an gpu mmu)..  Really we
need a way that a driver can opt-out of this, and access the cpu cache
APIs directly, skipping the dma API entirely.  But as it is, we've had
to hack around the dma API.  I'm not really sure this hack is any
worse than abusing dma_(un)map_sg() for doing cache operations.

I probably should have paid more attention and nak'd the dma/iommu
integration before it landed.  But given that now we are stuck in this
situation, while I'm certainly interested if anyone has some ideas
about how to let drivers opt out of the dma/iommu integration and
bypass the dma API layer, I'm ok with replacing a hack with a less-bad
hack.

BR,
-R
Lukas Wunner Nov. 28, 2018, 2:19 p.m. UTC | #5
On Tue, Nov 27, 2018 at 11:39:40PM -0800, Christoph Hellwig wrote:
> > +		/*
> > +		 * dma_sync_sg_*() flush the physical pages, so point
> > +		 * sg->dma_address to the physical ones for the right behavior.
> > +		 */
> > +		for_each_sg(msm_obj->sgt->sgl, s, msm_obj->sgt->nents, i)
> > +			sg_dma_address(s) = sg_phys(s);
> > +
> 
> I'm sorry, but this is completely bogus and not acceptable.
> 
> The only place that is allowed to initialize sg_dma_address is
> dma_map_sg.  If the default dma ops don't work for your setup we have
> major a problem and need to fix the dma api / iommu integration instead
> of hacking around it.

Somewhat related, I recently submitted a patch for the spi-bcm2835.c
driver to overcome a limitation of the DMA engine and the spi0 master
on the Raspberry Pi:

http://lists.infradead.org/pipermail/linux-rpi-kernel/2018-November/008275.html

If a buffer needs to be sent or received which was vmalloc'ed (hence is
not contiguous in physical memory) and starts in the middle of a page
with an offset not a multiple of 4, the buffer could not be transmitted
with DMA so far because the spi0 master requires 4 byte writes to the
FIFO and the DMA engine is incapable of combining multiple sglist
entries of arbitrary length into a continuous stream of 4 byte chunks.

The solution I've found is to transfer the first few bytes of the first
sglist entry with programmed I/O such that the remainder has a length
which is a multiple of 4.  For this to work, I have to massage the first
sglist entry, i.e. I have to add the number of bytes transmitted with
PIO to the DMA address and subtract it from the sglist entry's length.
After the DMA engine has done its job, I undo those changes to the
sglist.

Could you comment on the acceptibility of this approach?  If you do not
deem it acceptable, could you make a suggestion how the dma-mapping API
shall be amended to support such use cases?

(I sure hope the approach *is* acceptable, getting this to work at all
required endless fiddling.  The BCM2835 is in some ways a not so great
SoC and could do with better documentation, to put it mildly.)

(Note: I've since amended the above-linked patch to sync only the modified
cachelines back to memory, not the full first sglist entry.  I've also
switched to the sg_dma_address() and sg_dma_len() syntax instead of
referencing the sglist entry members directly, this appears to be more
common in the tree even for l-values.  I was going to post these changes
as v2 shortly.)

Thanks,

Lukas
Vivek Gautam Nov. 29, 2018, 9:43 a.m. UTC | #6
Hi Tomasz,

On Wed, Nov 28, 2018 at 8:39 AM Tomasz Figa <tfiga@chromium.org> wrote:
>
> Hi Vivek,
>
> On Tue, Nov 27, 2018 at 6:37 AM Vivek Gautam
> <vivek.gautam@codeaurora.org> wrote:
> >
> > dma_map_sg() expects a DMA domain. However, the drm devices
> > have been traditionally using unmanaged iommu domain which
> > is non-dma type. Using dma mapping APIs with that domain is bad.
> >
> > Replace dma_map_sg() calls with dma_sync_sg_for_device{|cpu}()
> > to do the cache maintenance.
> >
> > Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
> > Suggested-by: Tomasz Figa <tfiga@chromium.org>
> > Cc: Rob Clark <robdclark@gmail.com>
> > Cc: Jordan Crouse <jcrouse@codeaurora.org>
> > Cc: Sean Paul <seanpaul@chromium.org>
> > ---
> >
> > Changes since v1:
> >  - Addressed Jordan's and Tomasz's comments for
> >    - moving sg dma addresses preparation out of the coditional
> >      check to the main path so we do it irrespective of whether
> >      the buffer is cached or uncached.
> >    - Enhance the comment to explain this dma addresses prepartion.
> >
>
> Thanks for the patch. Some comments inline.
>
> >  drivers/gpu/drm/msm/msm_gem.c | 31 ++++++++++++++++++++++---------
> >  1 file changed, 22 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
> > index 00c795ced02c..1811ac23a31c 100644
> > --- a/drivers/gpu/drm/msm/msm_gem.c
> > +++ b/drivers/gpu/drm/msm/msm_gem.c
> > @@ -81,6 +81,8 @@ static struct page **get_pages(struct drm_gem_object *obj)
> >                 struct drm_device *dev = obj->dev;
> >                 struct page **p;
> >                 int npages = obj->size >> PAGE_SHIFT;
> > +               struct scatterlist *s;
> > +               int i;
> >
> >                 if (use_pages(obj))
> >                         p = drm_gem_get_pages(obj);
> > @@ -104,12 +106,21 @@ static struct page **get_pages(struct drm_gem_object *obj)
> >                         return ptr;
> >                 }
> >
> > -               /* For non-cached buffers, ensure the new pages are clean
> > +               /*
> > +                * dma_sync_sg_*() flush the physical pages, so point
> > +                * sg->dma_address to the physical ones for the right behavior.
>
> The two halves of the sequence don't really relate to each other. An
> sglist has the `page` field for the purpose of pointing to physical
> pages. The `dma_address` field is for DMA addresses, which are not
> equivalent to physical addresses. I'd rewrite it like this;

I guess I was lenient in using the physical pages and dma address words.

>
> /*
>  * Some implementations of the DMA mapping ops expect
>  * physical addresses of the pages to be stored as DMA
>  * addresses of the sglist entries. To work around it,
>  * set them here explicitly.
>  */

Will update as suggested.

> > +                */
> > +               for_each_sg(msm_obj->sgt->sgl, s, msm_obj->sgt->nents, i)
> > +                       sg_dma_address(s) = sg_phys(s);
> > +
> > +               /*
> > +                * For non-cached buffers, ensure the new pages are clean
> >                  * because display controller, GPU, etc. are not coherent:
> >                  */
> > -               if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
> > -                       dma_map_sg(dev->dev, msm_obj->sgt->sgl,
> > -                                       msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
> > +               if (msm_obj->flags & (MSM_BO_WC | MSM_BO_UNCACHED))
> > +                       dma_sync_sg_for_device(dev->dev, msm_obj->sgt->sgl,
> > +                                              msm_obj->sgt->nents,
> > +                                              DMA_TO_DEVICE);
>
> Why changing from DMA_BIDIRECTIONAL?

Yea, I went back and checked that we wanted to do this for both.
Will keep DMA_BIDIRECTIONAL intact.

>
> >         }
> >
> >         return msm_obj->pages;
> > @@ -133,14 +144,16 @@ static void put_pages(struct drm_gem_object *obj)
> >
> >         if (msm_obj->pages) {
> >                 if (msm_obj->sgt) {
> > -                       /* For non-cached buffers, ensure the new
> > +                       /*
> > +                        * For non-cached buffers, ensure the new
> >                          * pages are clean because display controller,
> >                          * GPU, etc. are not coherent:
> >                          */
> > -                       if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
> > -                               dma_unmap_sg(obj->dev->dev, msm_obj->sgt->sgl,
> > -                                            msm_obj->sgt->nents,
> > -                                            DMA_BIDIRECTIONAL);
> > +                       if (msm_obj->flags & (MSM_BO_WC | MSM_BO_UNCACHED))
> > +                               dma_sync_sg_for_cpu(obj->dev->dev,
> > +                                                   msm_obj->sgt->sgl,
> > +                                                   msm_obj->sgt->nents,
> > +                                                   DMA_FROM_DEVICE);
>
> Ditto.

Sure, will change this as well.

Thanks & Regards
Vivek

>
> Best regards,
> Tomasz



--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
Vivek Gautam Nov. 29, 2018, 9:44 a.m. UTC | #7
On Wed, Nov 28, 2018 at 6:09 PM Rob Clark <robdclark@gmail.com> wrote:
>
> On Wed, Nov 28, 2018 at 2:39 AM Christoph Hellwig <hch@infradead.org> wrote:
> >
> > > +             /*
> > > +              * dma_sync_sg_*() flush the physical pages, so point
> > > +              * sg->dma_address to the physical ones for the right behavior.
> > > +              */
> > > +             for_each_sg(msm_obj->sgt->sgl, s, msm_obj->sgt->nents, i)
> > > +                     sg_dma_address(s) = sg_phys(s);
> > > +
> >
> > I'm sorry, but this is completely bogus and not acceptable.
> >
> > The only place that is allowed to initialize sg_dma_address is
> > dma_map_sg.  If the default dma ops don't work for your setup we have
> > major a problem and need to fix the dma api / iommu integration instead
> > of hacking around it.
>
> I agree that the dma/iommu integration is very problematic for drm (in
> particular, gpu drivers that use the iommu as an gpu mmu)..  Really we
> need a way that a driver can opt-out of this, and access the cpu cache
> APIs directly, skipping the dma API entirely.  But as it is, we've had
> to hack around the dma API.  I'm not really sure this hack is any
> worse than abusing dma_(un)map_sg() for doing cache operations.
>
> I probably should have paid more attention and nak'd the dma/iommu
> integration before it landed.  But given that now we are stuck in this
> situation, while I'm certainly interested if anyone has some ideas
> about how to let drivers opt out of the dma/iommu integration and
> bypass the dma API layer, I'm ok with replacing a hack with a less-bad
> hack.

May I take it as a positive nod to respin the next version?

Regards
Vivek

>
> BR,
> -R
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 00c795ced02c..1811ac23a31c 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -81,6 +81,8 @@  static struct page **get_pages(struct drm_gem_object *obj)
 		struct drm_device *dev = obj->dev;
 		struct page **p;
 		int npages = obj->size >> PAGE_SHIFT;
+		struct scatterlist *s;
+		int i;
 
 		if (use_pages(obj))
 			p = drm_gem_get_pages(obj);
@@ -104,12 +106,21 @@  static struct page **get_pages(struct drm_gem_object *obj)
 			return ptr;
 		}
 
-		/* For non-cached buffers, ensure the new pages are clean
+		/*
+		 * dma_sync_sg_*() flush the physical pages, so point
+		 * sg->dma_address to the physical ones for the right behavior.
+		 */
+		for_each_sg(msm_obj->sgt->sgl, s, msm_obj->sgt->nents, i)
+			sg_dma_address(s) = sg_phys(s);
+
+		/*
+		 * For non-cached buffers, ensure the new pages are clean
 		 * because display controller, GPU, etc. are not coherent:
 		 */
-		if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
-			dma_map_sg(dev->dev, msm_obj->sgt->sgl,
-					msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
+		if (msm_obj->flags & (MSM_BO_WC | MSM_BO_UNCACHED))
+			dma_sync_sg_for_device(dev->dev, msm_obj->sgt->sgl,
+					       msm_obj->sgt->nents,
+					       DMA_TO_DEVICE);
 	}
 
 	return msm_obj->pages;
@@ -133,14 +144,16 @@  static void put_pages(struct drm_gem_object *obj)
 
 	if (msm_obj->pages) {
 		if (msm_obj->sgt) {
-			/* For non-cached buffers, ensure the new
+			/*
+			 * For non-cached buffers, ensure the new
 			 * pages are clean because display controller,
 			 * GPU, etc. are not coherent:
 			 */
-			if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
-				dma_unmap_sg(obj->dev->dev, msm_obj->sgt->sgl,
-					     msm_obj->sgt->nents,
-					     DMA_BIDIRECTIONAL);
+			if (msm_obj->flags & (MSM_BO_WC | MSM_BO_UNCACHED))
+				dma_sync_sg_for_cpu(obj->dev->dev,
+						    msm_obj->sgt->sgl,
+						    msm_obj->sgt->nents,
+						    DMA_FROM_DEVICE);
 
 			sg_free_table(msm_obj->sgt);
 			kfree(msm_obj->sgt);