diff mbox series

drm/panfrost: DMA map all pages shared with the GPU

Message ID 20191014151616.14099-1-steven.price@arm.com (mailing list archive)
State New, archived
Headers show
Series drm/panfrost: DMA map all pages shared with the GPU | expand

Commit Message

Steven Price Oct. 14, 2019, 3:16 p.m. UTC
Pages shared with the GPU are (often) not cache coherent with the CPU so
cache maintenance is required to flush the CPU's caches. This was
already done when mapping pages on fault, but wasn't previously done
when mapping a freshly allocated page.

Fix this by moving the call to dma_map_sg() into mmu_map_sg() ensuring
that it is always called when pages are mapped onto the GPU. Since
mmu_map_sg() can now fail the code also now has to handle an error
return.

Not performing this cache maintenance can cause errors in the GPU output
(CPU caches are later flushed and overwrite the GPU output). In theory
it also allows the GPU (and by extension user space) to observe the
memory contents prior to sanitization.

Fixes: f3ba91228e8e ("drm/panfrost: Add initial panfrost driver")
Signed-off-by: Steven Price <steven.price@arm.com>
---
 drivers/gpu/drm/panfrost/panfrost_mmu.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

Comments

Robin Murphy Oct. 14, 2019, 3:46 p.m. UTC | #1
On 14/10/2019 16:16, Steven Price wrote:
> Pages shared with the GPU are (often) not cache coherent with the CPU so
> cache maintenance is required to flush the CPU's caches. This was
> already done when mapping pages on fault, but wasn't previously done
> when mapping a freshly allocated page.
> 
> Fix this by moving the call to dma_map_sg() into mmu_map_sg() ensuring
> that it is always called when pages are mapped onto the GPU. Since
> mmu_map_sg() can now fail the code also now has to handle an error
> return.
> 
> Not performing this cache maintenance can cause errors in the GPU output
> (CPU caches are later flushed and overwrite the GPU output). In theory
> it also allows the GPU (and by extension user space) to observe the
> memory contents prior to sanitization.

For the non-heap case, aren't the pages already supposed to be mapped by 
drm_gem_shmem_get_pages_sgt()?

(Hmm, maybe I should try hooking up the GPU SMMU on my Juno to serve as 
a cheeky DMA-API-mishap detector...)

Robin.

> Fixes: f3ba91228e8e ("drm/panfrost: Add initial panfrost driver")
> Signed-off-by: Steven Price <steven.price@arm.com>
> ---
>   drivers/gpu/drm/panfrost/panfrost_mmu.c | 20 ++++++++++++--------
>   1 file changed, 12 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> index bdd990568476..0495e48c238d 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> @@ -248,6 +248,9 @@ static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
>   	struct io_pgtable_ops *ops = mmu->pgtbl_ops;
>   	u64 start_iova = iova;
>   
> +	if (!dma_map_sg(pfdev->dev, sgt->sgl, sgt->nents, DMA_BIDIRECTIONAL))
> +		return -EINVAL;
> +
>   	for_each_sg(sgt->sgl, sgl, sgt->nents, count) {
>   		unsigned long paddr = sg_dma_address(sgl);
>   		size_t len = sg_dma_len(sgl);
> @@ -275,6 +278,7 @@ int panfrost_mmu_map(struct panfrost_gem_object *bo)
>   	struct panfrost_device *pfdev = to_panfrost_device(obj->dev);
>   	struct sg_table *sgt;
>   	int prot = IOMMU_READ | IOMMU_WRITE;
> +	int ret;
>   
>   	if (WARN_ON(bo->is_mapped))
>   		return 0;
> @@ -286,10 +290,12 @@ int panfrost_mmu_map(struct panfrost_gem_object *bo)
>   	if (WARN_ON(IS_ERR(sgt)))
>   		return PTR_ERR(sgt);
>   
> -	mmu_map_sg(pfdev, bo->mmu, bo->node.start << PAGE_SHIFT, prot, sgt);
> -	bo->is_mapped = true;
> +	ret = mmu_map_sg(pfdev, bo->mmu, bo->node.start << PAGE_SHIFT,
> +			 prot, sgt);
> +	if (ret == 0)
> +		bo->is_mapped = true;
>   
> -	return 0;
> +	return ret;
>   }
>   
>   void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
> @@ -503,12 +509,10 @@ int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr)
>   	if (ret)
>   		goto err_pages;
>   
> -	if (!dma_map_sg(pfdev->dev, sgt->sgl, sgt->nents, DMA_BIDIRECTIONAL)) {
> -		ret = -EINVAL;
> +	ret = mmu_map_sg(pfdev, bo->mmu, addr,
> +			 IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt);
> +	if (ret)
>   		goto err_map;
> -	}
> -
> -	mmu_map_sg(pfdev, bo->mmu, addr, IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt);
>   
>   	bo->is_mapped = true;
>   
>
Steven Price Oct. 14, 2019, 3:55 p.m. UTC | #2
On 14/10/2019 16:46, Robin Murphy wrote:
> On 14/10/2019 16:16, Steven Price wrote:
>> Pages shared with the GPU are (often) not cache coherent with the CPU so
>> cache maintenance is required to flush the CPU's caches. This was
>> already done when mapping pages on fault, but wasn't previously done
>> when mapping a freshly allocated page.
>>
>> Fix this by moving the call to dma_map_sg() into mmu_map_sg() ensuring
>> that it is always called when pages are mapped onto the GPU. Since
>> mmu_map_sg() can now fail the code also now has to handle an error
>> return.
>>
>> Not performing this cache maintenance can cause errors in the GPU output
>> (CPU caches are later flushed and overwrite the GPU output). In theory
>> it also allows the GPU (and by extension user space) to observe the
>> memory contents prior to sanitization.
> 
> For the non-heap case, aren't the pages already supposed to be mapped by
> drm_gem_shmem_get_pages_sgt()?

Hmm, well yes - it looks like it *should* work - but I was definitely
seeing cache artefacts until I did this change... let me do some more
testing. It's possible that this is actually only affecting buffers
imported from another driver. Perhaps it's
drm_gem_shmem_prime_import_sg_table() that needs fixing.

> (Hmm, maybe I should try hooking up the GPU SMMU on my Juno to serve as
> a cheeky DMA-API-mishap detector...)

Yes I was wondering about that - it would certainly give some confidence
there's no missing cases.

Steve

> Robin.
> 
>> Fixes: f3ba91228e8e ("drm/panfrost: Add initial panfrost driver")
>> Signed-off-by: Steven Price <steven.price@arm.com>
>> ---
>>   drivers/gpu/drm/panfrost/panfrost_mmu.c | 20 ++++++++++++--------
>>   1 file changed, 12 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c
>> b/drivers/gpu/drm/panfrost/panfrost_mmu.c
>> index bdd990568476..0495e48c238d 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
>> +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
>> @@ -248,6 +248,9 @@ static int mmu_map_sg(struct panfrost_device
>> *pfdev, struct panfrost_mmu *mmu,
>>       struct io_pgtable_ops *ops = mmu->pgtbl_ops;
>>       u64 start_iova = iova;
>>   +    if (!dma_map_sg(pfdev->dev, sgt->sgl, sgt->nents,
>> DMA_BIDIRECTIONAL))
>> +        return -EINVAL;
>> +
>>       for_each_sg(sgt->sgl, sgl, sgt->nents, count) {
>>           unsigned long paddr = sg_dma_address(sgl);
>>           size_t len = sg_dma_len(sgl);
>> @@ -275,6 +278,7 @@ int panfrost_mmu_map(struct panfrost_gem_object *bo)
>>       struct panfrost_device *pfdev = to_panfrost_device(obj->dev);
>>       struct sg_table *sgt;
>>       int prot = IOMMU_READ | IOMMU_WRITE;
>> +    int ret;
>>         if (WARN_ON(bo->is_mapped))
>>           return 0;
>> @@ -286,10 +290,12 @@ int panfrost_mmu_map(struct panfrost_gem_object
>> *bo)
>>       if (WARN_ON(IS_ERR(sgt)))
>>           return PTR_ERR(sgt);
>>   -    mmu_map_sg(pfdev, bo->mmu, bo->node.start << PAGE_SHIFT, prot,
>> sgt);
>> -    bo->is_mapped = true;
>> +    ret = mmu_map_sg(pfdev, bo->mmu, bo->node.start << PAGE_SHIFT,
>> +             prot, sgt);
>> +    if (ret == 0)
>> +        bo->is_mapped = true;
>>   -    return 0;
>> +    return ret;
>>   }
>>     void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
>> @@ -503,12 +509,10 @@ int panfrost_mmu_map_fault_addr(struct
>> panfrost_device *pfdev, int as, u64 addr)
>>       if (ret)
>>           goto err_pages;
>>   -    if (!dma_map_sg(pfdev->dev, sgt->sgl, sgt->nents,
>> DMA_BIDIRECTIONAL)) {
>> -        ret = -EINVAL;
>> +    ret = mmu_map_sg(pfdev, bo->mmu, addr,
>> +             IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt);
>> +    if (ret)
>>           goto err_map;
>> -    }
>> -
>> -    mmu_map_sg(pfdev, bo->mmu, addr, IOMMU_WRITE | IOMMU_READ |
>> IOMMU_NOEXEC, sgt);
>>         bo->is_mapped = true;
>>  
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Steven Price Oct. 14, 2019, 4:28 p.m. UTC | #3
On 14/10/2019 16:55, Steven Price wrote:
> On 14/10/2019 16:46, Robin Murphy wrote:
>> On 14/10/2019 16:16, Steven Price wrote:
>>> Pages shared with the GPU are (often) not cache coherent with the CPU so
>>> cache maintenance is required to flush the CPU's caches. This was
>>> already done when mapping pages on fault, but wasn't previously done
>>> when mapping a freshly allocated page.
>>>
>>> Fix this by moving the call to dma_map_sg() into mmu_map_sg() ensuring
>>> that it is always called when pages are mapped onto the GPU. Since
>>> mmu_map_sg() can now fail the code also now has to handle an error
>>> return.
>>>
>>> Not performing this cache maintenance can cause errors in the GPU output
>>> (CPU caches are later flushed and overwrite the GPU output). In theory
>>> it also allows the GPU (and by extension user space) to observe the
>>> memory contents prior to sanitization.
>>
>> For the non-heap case, aren't the pages already supposed to be mapped by
>> drm_gem_shmem_get_pages_sgt()?
> 
> Hmm, well yes - it looks like it *should* work - but I was definitely
> seeing cache artefacts until I did this change... let me do some more
> testing. It's possible that this is actually only affecting buffers
> imported from another driver. Perhaps it's
> drm_gem_shmem_prime_import_sg_table() that needs fixing.

Yes this does appear to only affect imported buffers from what I can
tell. Looking through the code there is something suspicious in
drm_gem_map_dma_buf(). The following "fixes" the problem. But I'm not
sure the reasoning behind DMA_ATTR_SKIP_CPU_SYNC being specified -
presumably someone though it was a good idea! I'm not sure which driver's
responsibility it is to ensure the caches are flushed.

---8<----
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 0a2316e0e812..1f7353abd255 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -625,7 +625,7 @@ struct sg_table *drm_gem_map_dma_buf(struct dma_buf_attachment *attach,
 		sgt = obj->dev->driver->gem_prime_get_sg_table(obj);
 
 	if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
-			      DMA_ATTR_SKIP_CPU_SYNC)) {
+			      0)) {
 		sg_free_table(sgt);
 		kfree(sgt);
 		sgt = ERR_PTR(-ENOMEM);
diff mbox series

Patch

diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index bdd990568476..0495e48c238d 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -248,6 +248,9 @@  static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,
 	struct io_pgtable_ops *ops = mmu->pgtbl_ops;
 	u64 start_iova = iova;
 
+	if (!dma_map_sg(pfdev->dev, sgt->sgl, sgt->nents, DMA_BIDIRECTIONAL))
+		return -EINVAL;
+
 	for_each_sg(sgt->sgl, sgl, sgt->nents, count) {
 		unsigned long paddr = sg_dma_address(sgl);
 		size_t len = sg_dma_len(sgl);
@@ -275,6 +278,7 @@  int panfrost_mmu_map(struct panfrost_gem_object *bo)
 	struct panfrost_device *pfdev = to_panfrost_device(obj->dev);
 	struct sg_table *sgt;
 	int prot = IOMMU_READ | IOMMU_WRITE;
+	int ret;
 
 	if (WARN_ON(bo->is_mapped))
 		return 0;
@@ -286,10 +290,12 @@  int panfrost_mmu_map(struct panfrost_gem_object *bo)
 	if (WARN_ON(IS_ERR(sgt)))
 		return PTR_ERR(sgt);
 
-	mmu_map_sg(pfdev, bo->mmu, bo->node.start << PAGE_SHIFT, prot, sgt);
-	bo->is_mapped = true;
+	ret = mmu_map_sg(pfdev, bo->mmu, bo->node.start << PAGE_SHIFT,
+			 prot, sgt);
+	if (ret == 0)
+		bo->is_mapped = true;
 
-	return 0;
+	return ret;
 }
 
 void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
@@ -503,12 +509,10 @@  int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr)
 	if (ret)
 		goto err_pages;
 
-	if (!dma_map_sg(pfdev->dev, sgt->sgl, sgt->nents, DMA_BIDIRECTIONAL)) {
-		ret = -EINVAL;
+	ret = mmu_map_sg(pfdev, bo->mmu, addr,
+			 IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt);
+	if (ret)
 		goto err_map;
-	}
-
-	mmu_map_sg(pfdev, bo->mmu, addr, IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt);
 
 	bo->is_mapped = true;