diff mbox series

drm/gem-shmem: Optimize DMA mapping for exported buffers

Message ID 20250325133744.23805-1-jacek.lawrynowicz@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series drm/gem-shmem: Optimize DMA mapping for exported buffers | expand

Commit Message

Jacek Lawrynowicz March 25, 2025, 1:37 p.m. UTC
Use DMA_ATTR_SKIP_CPU_SYNC flag for exported buffers during DMA mapping.
The same flag is already used by drm_gem_map_dma_buf() for imported
buffers.

Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
---
 drivers/gpu/drm/drm_gem_shmem_helper.c |  8 ++++++--
 include/drm/drm_gem.h                  | 12 ++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

Comments

Christian König March 25, 2025, 1:53 p.m. UTC | #1
Am 25.03.25 um 14:37 schrieb Jacek Lawrynowicz:
> Use DMA_ATTR_SKIP_CPU_SYNC flag for exported buffers during DMA mapping.
> The same flag is already used by drm_gem_map_dma_buf() for imported
> buffers.
>
> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
> ---
>  drivers/gpu/drm/drm_gem_shmem_helper.c |  8 ++++++--
>  include/drm/drm_gem.h                  | 12 ++++++++++++
>  2 files changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
> index d99dee67353a1..8938d8e3de52f 100644
> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
> @@ -699,7 +699,7 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table);
>  static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_object *shmem)
>  {
>  	struct drm_gem_object *obj = &shmem->base;
> -	int ret;
> +	int ret, flags = 0;
>  	struct sg_table *sgt;
>  
>  	if (shmem->sgt)
> @@ -716,8 +716,12 @@ static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_
>  		ret = PTR_ERR(sgt);
>  		goto err_put_pages;
>  	}
> +
> +	if (drm_gem_is_exported())
> +		flags |= DMA_ATTR_SKIP_CPU_SYNC;

We should probably just unconditionally set this flag or not at all.

Otherwise we could run into quite some surprises.

Regards,
Christian.

> +
>  	/* Map the pages for use by the h/w. */
> -	ret = dma_map_sgtable(obj->dev->dev, sgt, DMA_BIDIRECTIONAL, 0);
> +	ret = dma_map_sgtable(obj->dev->dev, sgt, DMA_BIDIRECTIONAL, flags);
>  	if (ret)
>  		goto err_free_sgt;
>  
> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
> index 2bf893eabb4b2..7c355a44d0a49 100644
> --- a/include/drm/drm_gem.h
> +++ b/include/drm/drm_gem.h
> @@ -589,6 +589,18 @@ static inline bool drm_gem_is_imported(const struct drm_gem_object *obj)
>  	return obj->dma_buf && (obj->dma_buf->priv != obj);
>  }
>  
> +/**
> + * drm_gem_is_exported() - Tests if GEM object's buffer has been exported
> + * @obj: the GEM object
> + *
> + * Returns:
> + * True if the GEM object's buffer has been exported, false otherwise
> + */
> +static inline bool drm_gem_is_exported(const struct drm_gem_object *obj)
> +{
> +	return obj->dma_buf && (obj->dma_buf->priv == obj);
> +}
> +
>  #ifdef CONFIG_LOCKDEP
>  /**
>   * drm_gem_gpuva_set_lock() - Set the lock protecting accesses to the gpuva list.
Thomas Zimmermann March 25, 2025, 2:14 p.m. UTC | #2
Hi

Am 25.03.25 um 14:53 schrieb Christian König:
> Am 25.03.25 um 14:37 schrieb Jacek Lawrynowicz:
>> Use DMA_ATTR_SKIP_CPU_SYNC flag for exported buffers during DMA mapping.
>> The same flag is already used by drm_gem_map_dma_buf() for imported
>> buffers.
>>
>> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
>> ---
>>   drivers/gpu/drm/drm_gem_shmem_helper.c |  8 ++++++--
>>   include/drm/drm_gem.h                  | 12 ++++++++++++
>>   2 files changed, 18 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
>> index d99dee67353a1..8938d8e3de52f 100644
>> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
>> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
>> @@ -699,7 +699,7 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table);
>>   static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_object *shmem)
>>   {
>>   	struct drm_gem_object *obj = &shmem->base;
>> -	int ret;
>> +	int ret, flags = 0;
>>   	struct sg_table *sgt;
>>   
>>   	if (shmem->sgt)
>> @@ -716,8 +716,12 @@ static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_
>>   		ret = PTR_ERR(sgt);
>>   		goto err_put_pages;
>>   	}
>> +
>> +	if (drm_gem_is_exported())
>> +		flags |= DMA_ATTR_SKIP_CPU_SYNC;
> We should probably just unconditionally set this flag or not at all.

A question to both of you: does this have an effect on performance? I'm 
asking because i have a bug report where someone exports a buffer from 
gem-shmem with miserable performance. Would this flag make a difference?

Best regards
Thomas

>
> Otherwise we could run into quite some surprises.
>
> Regards,
> Christian.
>
>> +
>>   	/* Map the pages for use by the h/w. */
>> -	ret = dma_map_sgtable(obj->dev->dev, sgt, DMA_BIDIRECTIONAL, 0);
>> +	ret = dma_map_sgtable(obj->dev->dev, sgt, DMA_BIDIRECTIONAL, flags);
>>   	if (ret)
>>   		goto err_free_sgt;
>>   
>> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
>> index 2bf893eabb4b2..7c355a44d0a49 100644
>> --- a/include/drm/drm_gem.h
>> +++ b/include/drm/drm_gem.h
>> @@ -589,6 +589,18 @@ static inline bool drm_gem_is_imported(const struct drm_gem_object *obj)
>>   	return obj->dma_buf && (obj->dma_buf->priv != obj);
>>   }
>>   
>> +/**
>> + * drm_gem_is_exported() - Tests if GEM object's buffer has been exported
>> + * @obj: the GEM object
>> + *
>> + * Returns:
>> + * True if the GEM object's buffer has been exported, false otherwise
>> + */
>> +static inline bool drm_gem_is_exported(const struct drm_gem_object *obj)
>> +{
>> +	return obj->dma_buf && (obj->dma_buf->priv == obj);
>> +}
>> +
>>   #ifdef CONFIG_LOCKDEP
>>   /**
>>    * drm_gem_gpuva_set_lock() - Set the lock protecting accesses to the gpuva list.
Christian König March 25, 2025, 2:35 p.m. UTC | #3
Am 25.03.25 um 15:14 schrieb Thomas Zimmermann:
> Hi
>
> Am 25.03.25 um 14:53 schrieb Christian König:
>> Am 25.03.25 um 14:37 schrieb Jacek Lawrynowicz:
>>> Use DMA_ATTR_SKIP_CPU_SYNC flag for exported buffers during DMA mapping.
>>> The same flag is already used by drm_gem_map_dma_buf() for imported
>>> buffers.
>>>
>>> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
>>> ---
>>>   drivers/gpu/drm/drm_gem_shmem_helper.c |  8 ++++++--
>>>   include/drm/drm_gem.h                  | 12 ++++++++++++
>>>   2 files changed, 18 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
>>> index d99dee67353a1..8938d8e3de52f 100644
>>> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
>>> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
>>> @@ -699,7 +699,7 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table);
>>>   static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_object *shmem)
>>>   {
>>>       struct drm_gem_object *obj = &shmem->base;
>>> -    int ret;
>>> +    int ret, flags = 0;
>>>       struct sg_table *sgt;
>>>         if (shmem->sgt)
>>> @@ -716,8 +716,12 @@ static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_
>>>           ret = PTR_ERR(sgt);
>>>           goto err_put_pages;
>>>       }
>>> +
>>> +    if (drm_gem_is_exported())
>>> +        flags |= DMA_ATTR_SKIP_CPU_SYNC;
>> We should probably just unconditionally set this flag or not at all.
>
> A question to both of you: does this have an effect on performance? I'm asking because i have a bug report where someone exports a buffer from gem-shmem with miserable performance. Would this flag make a difference?

You can usually skip the CPU sync when you are sure your device is coherent to the CPU cache. And that saves time since you don't need to wait for cache flushes to complete.

But that is completely independent of if a buffer is exported or not.

Regards,
Christian.

>
> Best regards
> Thomas
>
>>
>> Otherwise we could run into quite some surprises.
>>
>> Regards,
>> Christian.
>>
>>> +
>>>       /* Map the pages for use by the h/w. */
>>> -    ret = dma_map_sgtable(obj->dev->dev, sgt, DMA_BIDIRECTIONAL, 0);
>>> +    ret = dma_map_sgtable(obj->dev->dev, sgt, DMA_BIDIRECTIONAL, flags);
>>>       if (ret)
>>>           goto err_free_sgt;
>>>   diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
>>> index 2bf893eabb4b2..7c355a44d0a49 100644
>>> --- a/include/drm/drm_gem.h
>>> +++ b/include/drm/drm_gem.h
>>> @@ -589,6 +589,18 @@ static inline bool drm_gem_is_imported(const struct drm_gem_object *obj)
>>>       return obj->dma_buf && (obj->dma_buf->priv != obj);
>>>   }
>>>   +/**
>>> + * drm_gem_is_exported() - Tests if GEM object's buffer has been exported
>>> + * @obj: the GEM object
>>> + *
>>> + * Returns:
>>> + * True if the GEM object's buffer has been exported, false otherwise
>>> + */
>>> +static inline bool drm_gem_is_exported(const struct drm_gem_object *obj)
>>> +{
>>> +    return obj->dma_buf && (obj->dma_buf->priv == obj);
>>> +}
>>> +
>>>   #ifdef CONFIG_LOCKDEP
>>>   /**
>>>    * drm_gem_gpuva_set_lock() - Set the lock protecting accesses to the gpuva list.
>
Jacek Lawrynowicz March 25, 2025, 3:33 p.m. UTC | #4
Hi,

On 3/25/2025 2:53 PM, Christian König wrote:
> Am 25.03.25 um 14:37 schrieb Jacek Lawrynowicz:
>> Use DMA_ATTR_SKIP_CPU_SYNC flag for exported buffers during DMA mapping.
>> The same flag is already used by drm_gem_map_dma_buf() for imported
>> buffers.
>>
>> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
>> ---
>>  drivers/gpu/drm/drm_gem_shmem_helper.c |  8 ++++++--
>>  include/drm/drm_gem.h                  | 12 ++++++++++++
>>  2 files changed, 18 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
>> index d99dee67353a1..8938d8e3de52f 100644
>> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
>> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
>> @@ -699,7 +699,7 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table);
>>  static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_object *shmem)
>>  {
>>  	struct drm_gem_object *obj = &shmem->base;
>> -	int ret;
>> +	int ret, flags = 0;
>>  	struct sg_table *sgt;
>>  
>>  	if (shmem->sgt)
>> @@ -716,8 +716,12 @@ static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_
>>  		ret = PTR_ERR(sgt);
>>  		goto err_put_pages;
>>  	}
>> +
>> +	if (drm_gem_is_exported())
>> +		flags |= DMA_ATTR_SKIP_CPU_SYNC;
> 
> We should probably just unconditionally set this flag or not at all.
> 
> Otherwise we could run into quite some surprises.

I see that this flag is usually set in drm_gem_map_dma_buf() and similar callbacks across drm drivers.
Shouldn't it be the default on x86?

Regards,
Jacek
Jacek Lawrynowicz March 25, 2025, 3:36 p.m. UTC | #5
Hi,


On 3/25/2025 3:14 PM, Thomas Zimmermann wrote:
> Hi
> 
> Am 25.03.25 um 14:53 schrieb Christian König:
>> Am 25.03.25 um 14:37 schrieb Jacek Lawrynowicz:
>>> Use DMA_ATTR_SKIP_CPU_SYNC flag for exported buffers during DMA mapping.
>>> The same flag is already used by drm_gem_map_dma_buf() for imported
>>> buffers.
>>>
>>> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
>>> ---
>>>   drivers/gpu/drm/drm_gem_shmem_helper.c |  8 ++++++--
>>>   include/drm/drm_gem.h                  | 12 ++++++++++++
>>>   2 files changed, 18 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
>>> index d99dee67353a1..8938d8e3de52f 100644
>>> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
>>> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
>>> @@ -699,7 +699,7 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table);
>>>   static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_object *shmem)
>>>   {
>>>       struct drm_gem_object *obj = &shmem->base;
>>> -    int ret;
>>> +    int ret, flags = 0;
>>>       struct sg_table *sgt;
>>>         if (shmem->sgt)
>>> @@ -716,8 +716,12 @@ static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_
>>>           ret = PTR_ERR(sgt);
>>>           goto err_put_pages;
>>>       }
>>> +
>>> +    if (drm_gem_is_exported())
>>> +        flags |= DMA_ATTR_SKIP_CPU_SYNC;
>> We should probably just unconditionally set this flag or not at all.
> 
> A question to both of you: does this have an effect on performance? I'm asking because i have a bug report where someone exports a buffer from gem-shmem with miserable performance. Would this flag make a difference?

On x86 this has no effect on performance. I'm not sure about other archs.

Regards,
Jacek
Christian König March 25, 2025, 3:38 p.m. UTC | #6
Am 25.03.25 um 16:33 schrieb Jacek Lawrynowicz:
> Hi,
>
> On 3/25/2025 2:53 PM, Christian König wrote:
>> Am 25.03.25 um 14:37 schrieb Jacek Lawrynowicz:
>>> Use DMA_ATTR_SKIP_CPU_SYNC flag for exported buffers during DMA mapping.
>>> The same flag is already used by drm_gem_map_dma_buf() for imported
>>> buffers.
>>>
>>> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
>>> ---
>>>  drivers/gpu/drm/drm_gem_shmem_helper.c |  8 ++++++--
>>>  include/drm/drm_gem.h                  | 12 ++++++++++++
>>>  2 files changed, 18 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
>>> index d99dee67353a1..8938d8e3de52f 100644
>>> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
>>> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
>>> @@ -699,7 +699,7 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table);
>>>  static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_object *shmem)
>>>  {
>>>  	struct drm_gem_object *obj = &shmem->base;
>>> -	int ret;
>>> +	int ret, flags = 0;
>>>  	struct sg_table *sgt;
>>>  
>>>  	if (shmem->sgt)
>>> @@ -716,8 +716,12 @@ static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_
>>>  		ret = PTR_ERR(sgt);
>>>  		goto err_put_pages;
>>>  	}
>>> +
>>> +	if (drm_gem_is_exported())
>>> +		flags |= DMA_ATTR_SKIP_CPU_SYNC;
>> We should probably just unconditionally set this flag or not at all.
>>
>> Otherwise we could run into quite some surprises.
> I see that this flag is usually set in drm_gem_map_dma_buf() and similar callbacks across drm drivers.
> Shouldn't it be the default on x86?

Not really, no. If you can skip CPU sync depends on the platform.

On x86 it's correct that almost all devices are coherent with the CPU cache, but that isn't always the case.

The device driver using the GEM shmem helper needs to decide if that is ok or not.

Regards,
Christian.

>
> Regards,
> Jacek
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index d99dee67353a1..8938d8e3de52f 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -699,7 +699,7 @@  EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table);
 static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_object *shmem)
 {
 	struct drm_gem_object *obj = &shmem->base;
-	int ret;
+	int ret, flags = 0;
 	struct sg_table *sgt;
 
 	if (shmem->sgt)
@@ -716,8 +716,12 @@  static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_
 		ret = PTR_ERR(sgt);
 		goto err_put_pages;
 	}
+
+	if (drm_gem_is_exported())
+		flags |= DMA_ATTR_SKIP_CPU_SYNC;
+
 	/* Map the pages for use by the h/w. */
-	ret = dma_map_sgtable(obj->dev->dev, sgt, DMA_BIDIRECTIONAL, 0);
+	ret = dma_map_sgtable(obj->dev->dev, sgt, DMA_BIDIRECTIONAL, flags);
 	if (ret)
 		goto err_free_sgt;
 
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index 2bf893eabb4b2..7c355a44d0a49 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h
@@ -589,6 +589,18 @@  static inline bool drm_gem_is_imported(const struct drm_gem_object *obj)
 	return obj->dma_buf && (obj->dma_buf->priv != obj);
 }
 
+/**
+ * drm_gem_is_exported() - Tests if GEM object's buffer has been exported
+ * @obj: the GEM object
+ *
+ * Returns:
+ * True if the GEM object's buffer has been exported, false otherwise
+ */
+static inline bool drm_gem_is_exported(const struct drm_gem_object *obj)
+{
+	return obj->dma_buf && (obj->dma_buf->priv == obj);
+}
+
 #ifdef CONFIG_LOCKDEP
 /**
  * drm_gem_gpuva_set_lock() - Set the lock protecting accesses to the gpuva list.