diff mbox series

[RFC,3/5] drm/ttm: Use drm_memcpy_from_wc for TTM bo moves

Message ID 20210520150947.803891-4-thomas.hellstrom@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Core TTM changes for i915 TTM enabling | expand

Commit Message

Thomas Hellstrom May 20, 2021, 3:09 p.m. UTC
Use fast wc memcpy for reading out of wc memory for TTM bo moves.

Cc: Dave Airlie <airlied@gmail.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

Comments

Christian König May 21, 2021, 8:10 a.m. UTC | #1
Am 20.05.21 um 17:09 schrieb Thomas Hellström:
> Use fast wc memcpy for reading out of wc memory for TTM bo moves.
>
> Cc: Dave Airlie <airlied@gmail.com>
> Cc: Christian König <christian.koenig@amd.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>

Oh, yes I really wanted to have that in TTM for quite some time.

But I'm wondering if we shouldn't fix the memremap stuff first.

Christian.

> ---
>   drivers/gpu/drm/ttm/ttm_bo_util.c | 18 +++++++++++++++++-
>   1 file changed, 17 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index bad9b16e96ba..919ee03f7eb3 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -31,6 +31,7 @@
>   
>   #include <drm/ttm/ttm_bo_driver.h>
>   #include <drm/ttm/ttm_placement.h>
> +#include <drm/drm_memcpy.h>
>   #include <drm/drm_vma_manager.h>
>   #include <linux/dma-buf-map.h>
>   #include <linux/io.h>
> @@ -185,6 +186,7 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo,
>   	struct ttm_resource *old_mem = &bo->mem;
>   	struct ttm_resource_manager *old_man = ttm_manager_type(bdev, old_mem->mem_type);
>   	struct dma_buf_map old_map, new_map;
> +	bool wc_memcpy;
>   	pgoff_t i;
>   
>   	/* Single TTM move. NOP */
> @@ -208,11 +210,25 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo,
>   		return;
>   	}
>   
> +	wc_memcpy = ((!old_man->use_tt || bo->ttm->caching != ttm_cached) &&
> +		     drm_has_memcpy_from_wc());
> +
> +	/*
> +	 * We use some nasty aliasing for drm_memcpy_from_wc, but assuming
> +	 * that we can move to memremapping in the not too distant future,
> +	 * reduce the fragility for now with a build assert.
> +	 */
> +	BUILD_BUG_ON(offsetof(typeof(old_map), vaddr) !=
> +		     offsetof(typeof(old_map), vaddr_iomem));
> +
>   	for (i = 0; i < new_mem->num_pages; ++i) {
>   		new_iter->ops->kmap_local(new_iter, &new_map, i);
>   		old_iter->ops->kmap_local(old_iter, &old_map, i);
>   
> -		if (!old_map.is_iomem && !new_map.is_iomem) {
> +		if (wc_memcpy) {
> +			drm_memcpy_from_wc(new_map.vaddr, old_map.vaddr,
> +					   PAGE_SIZE);
> +		} else if (!old_map.is_iomem && !new_map.is_iomem) {
>   			memcpy(new_map.vaddr, old_map.vaddr, PAGE_SIZE);
>   		} else if (!old_map.is_iomem) {
>   			dma_buf_map_memcpy_to(&new_map, old_map.vaddr,
Thomas Hellstrom May 21, 2021, 8:30 a.m. UTC | #2
On 5/21/21 10:10 AM, Christian König wrote:
> Am 20.05.21 um 17:09 schrieb Thomas Hellström:
>> Use fast wc memcpy for reading out of wc memory for TTM bo moves.
>>
>> Cc: Dave Airlie <airlied@gmail.com>
>> Cc: Christian König <christian.koenig@amd.com>
>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>
> Oh, yes I really wanted to have that in TTM for quite some time.
We should use it for swap copy from WC as well IMO. A todo-task for 
somebody.
>
> But I'm wondering if we shouldn't fix the memremap stuff first.

Using memremap all over is a fairly big change probably with lots of 
opinions involved all over the place.
What I can do for now is to add a dma_buf_map interface to the memcpy 
itself, to move the aliasing out of TTM to the arch specific code that 
knows what it's doing?

/Thomas


>
> Christian.
>
>> ---
>>   drivers/gpu/drm/ttm/ttm_bo_util.c | 18 +++++++++++++++++-
>>   1 file changed, 17 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
>> b/drivers/gpu/drm/ttm/ttm_bo_util.c
>> index bad9b16e96ba..919ee03f7eb3 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
>> @@ -31,6 +31,7 @@
>>     #include <drm/ttm/ttm_bo_driver.h>
>>   #include <drm/ttm/ttm_placement.h>
>> +#include <drm/drm_memcpy.h>
>>   #include <drm/drm_vma_manager.h>
>>   #include <linux/dma-buf-map.h>
>>   #include <linux/io.h>
>> @@ -185,6 +186,7 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo,
>>       struct ttm_resource *old_mem = &bo->mem;
>>       struct ttm_resource_manager *old_man = ttm_manager_type(bdev, 
>> old_mem->mem_type);
>>       struct dma_buf_map old_map, new_map;
>> +    bool wc_memcpy;
>>       pgoff_t i;
>>         /* Single TTM move. NOP */
>> @@ -208,11 +210,25 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo,
>>           return;
>>       }
>>   +    wc_memcpy = ((!old_man->use_tt || bo->ttm->caching != 
>> ttm_cached) &&
>> +             drm_has_memcpy_from_wc());
>> +
>> +    /*
>> +     * We use some nasty aliasing for drm_memcpy_from_wc, but assuming
>> +     * that we can move to memremapping in the not too distant future,
>> +     * reduce the fragility for now with a build assert.
>> +     */
>> +    BUILD_BUG_ON(offsetof(typeof(old_map), vaddr) !=
>> +             offsetof(typeof(old_map), vaddr_iomem));
>> +
>>       for (i = 0; i < new_mem->num_pages; ++i) {
>>           new_iter->ops->kmap_local(new_iter, &new_map, i);
>>           old_iter->ops->kmap_local(old_iter, &old_map, i);
>>   -        if (!old_map.is_iomem && !new_map.is_iomem) {
>> +        if (wc_memcpy) {
>> +            drm_memcpy_from_wc(new_map.vaddr, old_map.vaddr,
>> +                       PAGE_SIZE);
>> +        } else if (!old_map.is_iomem && !new_map.is_iomem) {
>>               memcpy(new_map.vaddr, old_map.vaddr, PAGE_SIZE);
>>           } else if (!old_map.is_iomem) {
>>               dma_buf_map_memcpy_to(&new_map, old_map.vaddr,
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index bad9b16e96ba..919ee03f7eb3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -31,6 +31,7 @@ 
 
 #include <drm/ttm/ttm_bo_driver.h>
 #include <drm/ttm/ttm_placement.h>
+#include <drm/drm_memcpy.h>
 #include <drm/drm_vma_manager.h>
 #include <linux/dma-buf-map.h>
 #include <linux/io.h>
@@ -185,6 +186,7 @@  void ttm_move_memcpy(struct ttm_buffer_object *bo,
 	struct ttm_resource *old_mem = &bo->mem;
 	struct ttm_resource_manager *old_man = ttm_manager_type(bdev, old_mem->mem_type);
 	struct dma_buf_map old_map, new_map;
+	bool wc_memcpy;
 	pgoff_t i;
 
 	/* Single TTM move. NOP */
@@ -208,11 +210,25 @@  void ttm_move_memcpy(struct ttm_buffer_object *bo,
 		return;
 	}
 
+	wc_memcpy = ((!old_man->use_tt || bo->ttm->caching != ttm_cached) &&
+		     drm_has_memcpy_from_wc());
+
+	/*
+	 * We use some nasty aliasing for drm_memcpy_from_wc, but assuming
+	 * that we can move to memremapping in the not too distant future,
+	 * reduce the fragility for now with a build assert.
+	 */
+	BUILD_BUG_ON(offsetof(typeof(old_map), vaddr) !=
+		     offsetof(typeof(old_map), vaddr_iomem));
+
 	for (i = 0; i < new_mem->num_pages; ++i) {
 		new_iter->ops->kmap_local(new_iter, &new_map, i);
 		old_iter->ops->kmap_local(old_iter, &old_map, i);
 
-		if (!old_map.is_iomem && !new_map.is_iomem) {
+		if (wc_memcpy) {
+			drm_memcpy_from_wc(new_map.vaddr, old_map.vaddr,
+					   PAGE_SIZE);
+		} else if (!old_map.is_iomem && !new_map.is_iomem) {
 			memcpy(new_map.vaddr, old_map.vaddr, PAGE_SIZE);
 		} else if (!old_map.is_iomem) {
 			dma_buf_map_memcpy_to(&new_map, old_map.vaddr,