diff mbox

[7/7] drm/ttm: optimize ttm_mem_evict_first v2

Message ID 20171109085909.1653-7-christian.koenig@amd.com (mailing list archive)
State New, archived
Headers show

Commit Message

Christian König Nov. 9, 2017, 8:59 a.m. UTC
Deleted BOs with the same reservation object can be reaped even if they
can't be reserved.

v2: rebase and we still need to remove/add the BO from/to the LRU.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 39 +++++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 8 deletions(-)

Comments

Michel Dänzer Nov. 9, 2017, 6:04 p.m. UTC | #1
On 09/11/17 09:59 AM, Christian König wrote:
> Deleted BOs with the same reservation object can be reaped even if they
> can't be reserved.
> 
> v2: rebase and we still need to remove/add the BO from/to the LRU.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/ttm/ttm_bo.c | 39 +++++++++++++++++++++++++++++++--------
>  1 file changed, 31 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 50a678b504f3..6545c4344684 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -735,20 +735,37 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
>  EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>  
>  static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> -				uint32_t mem_type,
> -				const struct ttm_place *place,
> -				bool interruptible,
> -				bool no_wait_gpu)
> +			       struct reservation_object *resv,
> +			       uint32_t mem_type,
> +			       const struct ttm_place *place,
> +			       bool interruptible,
> +			       bool no_wait_gpu)
>  {
>  	struct ttm_bo_global *glob = bdev->glob;
>  	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>  	struct ttm_buffer_object *bo;
>  	int ret = -EBUSY;
> +	bool locked;
>  	unsigned i;
>  
>  	spin_lock(&glob->lru_lock);
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>  		list_for_each_entry(bo, &man->lru[i], lru) {
> +			if (bo->resv == resv) {
> +				if (list_empty(&bo->ddestroy))
> +					continue;
> +
> +				if (place &&
> +				    !bdev->driver->eviction_valuable(bo, place))
> +					continue;
> +
> +				ttm_bo_del_from_lru(bo);

Is this necessary, despite the existing ttm_bo_del_from_lru call before
unlocking the LRU lock? If yes, why isn't this necessary in the bo->resv
!= resv case?
Chunming Zhou Nov. 10, 2017, 7:22 a.m. UTC | #2
On 2017年11月09日 16:59, Christian König wrote:
> Deleted BOs with the same reservation object can be reaped even if they
> can't be reserved.
>
> v2: rebase and we still need to remove/add the BO from/to the LRU.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c | 39 +++++++++++++++++++++++++++++++--------
>   1 file changed, 31 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 50a678b504f3..6545c4344684 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -735,20 +735,37 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
>   EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>   
>   static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> -				uint32_t mem_type,
> -				const struct ttm_place *place,
> -				bool interruptible,
> -				bool no_wait_gpu)
> +			       struct reservation_object *resv,
> +			       uint32_t mem_type,
> +			       const struct ttm_place *place,
> +			       bool interruptible,
> +			       bool no_wait_gpu)
>   {
>   	struct ttm_bo_global *glob = bdev->glob;
>   	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>   	struct ttm_buffer_object *bo;
>   	int ret = -EBUSY;
> +	bool locked;
>   	unsigned i;
>   
>   	spin_lock(&glob->lru_lock);
>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>   		list_for_each_entry(bo, &man->lru[i], lru) {
> +			if (bo->resv == resv) {
> +				if (list_empty(&bo->ddestroy))
> +					continue;
I don't think only destroying BO can be evicted under per-vm-bo case, 
but also normal BO should as well.
I'd give an example:
1. vm-A allocates all vram;
2. vm-B also try to allocate full vram, so the BOs of vram in vm-A will 
be evicted to GTT.
3. vm-A is trying to allocate all GTT, if we don't allow eviction or 
swap, then will fail here.

As above, we shouldn't disallow eviction and swap during allocation, we 
aren't able to predict what case happen.
For over limit allocation, at worst, they will be returned with failed 
status while doing its CS.
If you think the allocation shouldn't be over limitation of memory, we 
can add the checking condition before allocation every time, but not 
disallow eviction and swap in allocation, which really breaks the used TTM.

Regards,
David Zhou

> +
> +				if (place &&
> +				    !bdev->driver->eviction_valuable(bo, place))
> +					continue;
> +
> +				ttm_bo_del_from_lru(bo);
> +
> +				ret = 0;
> +				locked = false;
> +				break;
> +			}
> +
>   			ret = reservation_object_trylock(bo->resv) ? 0 : -EBUSY;
>   			if (ret)
>   				continue;
> @@ -760,6 +777,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>   				continue;
>   			}
>   
> +			locked = true;
>   			break;
>   		}
>   
> @@ -775,7 +793,8 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>   	kref_get(&bo->list_kref);
>   
>   	if (!list_empty(&bo->ddestroy)) {
> -		ret = ttm_bo_cleanup_refs(bo, interruptible, no_wait_gpu, true);
> +		ret = ttm_bo_cleanup_refs(bo, interruptible, no_wait_gpu,
> +					  locked);
>   		kref_put(&bo->list_kref, ttm_bo_release_list);
>   		return ret;
>   	}
> @@ -786,7 +805,10 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>   	BUG_ON(ret != 0);
>   
>   	ret = ttm_bo_evict(bo, interruptible, no_wait_gpu);
> -	ttm_bo_unreserve(bo);
> +	if (locked)
> +		ttm_bo_unreserve(bo);
> +	else
> +		ttm_bo_add_to_lru(bo);
>   
>   	kref_put(&bo->list_kref, ttm_bo_release_list);
>   	return ret;
> @@ -850,7 +872,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
>   			return ret;
>   		if (mem->mm_node)
>   			break;
> -		ret = ttm_mem_evict_first(bdev, mem_type, place,
> +		ret = ttm_mem_evict_first(bdev, bo->resv, mem_type, place,
>   					  interruptible, no_wait_gpu);
>   		if (unlikely(ret != 0))
>   			return ret;
> @@ -1353,7 +1375,8 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>   		while (!list_empty(&man->lru[i])) {
>   			spin_unlock(&glob->lru_lock);
> -			ret = ttm_mem_evict_first(bdev, mem_type, NULL, false, false);
> +			ret = ttm_mem_evict_first(bdev, NULL, mem_type, NULL,
> +						  false, false);
>   			if (ret)
>   				return ret;
>   			spin_lock(&glob->lru_lock);
Christian König Nov. 12, 2017, 9:08 a.m. UTC | #3
Am 10.11.2017 um 08:22 schrieb Chunming Zhou:
>
>
> On 2017年11月09日 16:59, Christian König wrote:
>> Deleted BOs with the same reservation object can be reaped even if they
>> can't be reserved.
>>
>> v2: rebase and we still need to remove/add the BO from/to the LRU.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/ttm/ttm_bo.c | 39 
>> +++++++++++++++++++++++++++++++--------
>>   1 file changed, 31 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index 50a678b504f3..6545c4344684 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -735,20 +735,37 @@ bool ttm_bo_eviction_valuable(struct 
>> ttm_buffer_object *bo,
>>   EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>>     static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>> -                uint32_t mem_type,
>> -                const struct ttm_place *place,
>> -                bool interruptible,
>> -                bool no_wait_gpu)
>> +                   struct reservation_object *resv,
>> +                   uint32_t mem_type,
>> +                   const struct ttm_place *place,
>> +                   bool interruptible,
>> +                   bool no_wait_gpu)
>>   {
>>       struct ttm_bo_global *glob = bdev->glob;
>>       struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>>       struct ttm_buffer_object *bo;
>>       int ret = -EBUSY;
>> +    bool locked;
>>       unsigned i;
>>         spin_lock(&glob->lru_lock);
>>       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>           list_for_each_entry(bo, &man->lru[i], lru) {
>> +            if (bo->resv == resv) {
>> +                if (list_empty(&bo->ddestroy))
>> +                    continue;
> I don't think only destroying BO can be evicted under per-vm-bo case, 
> but also normal BO should as well.
> I'd give an example:
> 1. vm-A allocates all vram;
> 2. vm-B also try to allocate full vram, so the BOs of vram in vm-A 
> will be evicted to GTT.
> 3. vm-A is trying to allocate all GTT, if we don't allow eviction or 
> swap, then will fail here.

That is a really good example, thanks.

>
> As above, we shouldn't disallow eviction and swap during allocation, 
> we aren't able to predict what case happen.
> For over limit allocation, at worst, they will be returned with failed 
> status while doing its CS.
> If you think the allocation shouldn't be over limitation of memory, we 
> can add the checking condition before allocation every time, but not 
> disallow eviction and swap in allocation, which really breaks the used 
> TTM.

Ok, you convinced me. The case above indeed needs a better handling.

I will reactivate my operation context patch set for TTM. Shouldn't be 
to much work to get this going.

Regards,
Christian.

>
> Regards,
> David Zhou
>
>> +
>> +                if (place &&
>> +                    !bdev->driver->eviction_valuable(bo, place))
>> +                    continue;
>> +
>> +                ttm_bo_del_from_lru(bo);
>> +
>> +                ret = 0;
>> +                locked = false;
>> +                break;
>> +            }
>> +
>>               ret = reservation_object_trylock(bo->resv) ? 0 : -EBUSY;
>>               if (ret)
>>                   continue;
>> @@ -760,6 +777,7 @@ static int ttm_mem_evict_first(struct 
>> ttm_bo_device *bdev,
>>                   continue;
>>               }
>>   +            locked = true;
>>               break;
>>           }
>>   @@ -775,7 +793,8 @@ static int ttm_mem_evict_first(struct 
>> ttm_bo_device *bdev,
>>       kref_get(&bo->list_kref);
>>         if (!list_empty(&bo->ddestroy)) {
>> -        ret = ttm_bo_cleanup_refs(bo, interruptible, no_wait_gpu, 
>> true);
>> +        ret = ttm_bo_cleanup_refs(bo, interruptible, no_wait_gpu,
>> +                      locked);
>>           kref_put(&bo->list_kref, ttm_bo_release_list);
>>           return ret;
>>       }
>> @@ -786,7 +805,10 @@ static int ttm_mem_evict_first(struct 
>> ttm_bo_device *bdev,
>>       BUG_ON(ret != 0);
>>         ret = ttm_bo_evict(bo, interruptible, no_wait_gpu);
>> -    ttm_bo_unreserve(bo);
>> +    if (locked)
>> +        ttm_bo_unreserve(bo);
>> +    else
>> +        ttm_bo_add_to_lru(bo);
>>         kref_put(&bo->list_kref, ttm_bo_release_list);
>>       return ret;
>> @@ -850,7 +872,7 @@ static int ttm_bo_mem_force_space(struct 
>> ttm_buffer_object *bo,
>>               return ret;
>>           if (mem->mm_node)
>>               break;
>> -        ret = ttm_mem_evict_first(bdev, mem_type, place,
>> +        ret = ttm_mem_evict_first(bdev, bo->resv, mem_type, place,
>>                         interruptible, no_wait_gpu);
>>           if (unlikely(ret != 0))
>>               return ret;
>> @@ -1353,7 +1375,8 @@ static int ttm_bo_force_list_clean(struct 
>> ttm_bo_device *bdev,
>>       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>           while (!list_empty(&man->lru[i])) {
>>               spin_unlock(&glob->lru_lock);
>> -            ret = ttm_mem_evict_first(bdev, mem_type, NULL, false, 
>> false);
>> +            ret = ttm_mem_evict_first(bdev, NULL, mem_type, NULL,
>> +                          false, false);
>>               if (ret)
>>                   return ret;
>>               spin_lock(&glob->lru_lock);
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 50a678b504f3..6545c4344684 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -735,20 +735,37 @@  bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 EXPORT_SYMBOL(ttm_bo_eviction_valuable);
 
 static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
-				uint32_t mem_type,
-				const struct ttm_place *place,
-				bool interruptible,
-				bool no_wait_gpu)
+			       struct reservation_object *resv,
+			       uint32_t mem_type,
+			       const struct ttm_place *place,
+			       bool interruptible,
+			       bool no_wait_gpu)
 {
 	struct ttm_bo_global *glob = bdev->glob;
 	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
 	struct ttm_buffer_object *bo;
 	int ret = -EBUSY;
+	bool locked;
 	unsigned i;
 
 	spin_lock(&glob->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		list_for_each_entry(bo, &man->lru[i], lru) {
+			if (bo->resv == resv) {
+				if (list_empty(&bo->ddestroy))
+					continue;
+
+				if (place &&
+				    !bdev->driver->eviction_valuable(bo, place))
+					continue;
+
+				ttm_bo_del_from_lru(bo);
+
+				ret = 0;
+				locked = false;
+				break;
+			}
+
 			ret = reservation_object_trylock(bo->resv) ? 0 : -EBUSY;
 			if (ret)
 				continue;
@@ -760,6 +777,7 @@  static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 				continue;
 			}
 
+			locked = true;
 			break;
 		}
 
@@ -775,7 +793,8 @@  static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 	kref_get(&bo->list_kref);
 
 	if (!list_empty(&bo->ddestroy)) {
-		ret = ttm_bo_cleanup_refs(bo, interruptible, no_wait_gpu, true);
+		ret = ttm_bo_cleanup_refs(bo, interruptible, no_wait_gpu,
+					  locked);
 		kref_put(&bo->list_kref, ttm_bo_release_list);
 		return ret;
 	}
@@ -786,7 +805,10 @@  static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 	BUG_ON(ret != 0);
 
 	ret = ttm_bo_evict(bo, interruptible, no_wait_gpu);
-	ttm_bo_unreserve(bo);
+	if (locked)
+		ttm_bo_unreserve(bo);
+	else
+		ttm_bo_add_to_lru(bo);
 
 	kref_put(&bo->list_kref, ttm_bo_release_list);
 	return ret;
@@ -850,7 +872,7 @@  static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
 			return ret;
 		if (mem->mm_node)
 			break;
-		ret = ttm_mem_evict_first(bdev, mem_type, place,
+		ret = ttm_mem_evict_first(bdev, bo->resv, mem_type, place,
 					  interruptible, no_wait_gpu);
 		if (unlikely(ret != 0))
 			return ret;
@@ -1353,7 +1375,8 @@  static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		while (!list_empty(&man->lru[i])) {
 			spin_unlock(&glob->lru_lock);
-			ret = ttm_mem_evict_first(bdev, mem_type, NULL, false, false);
+			ret = ttm_mem_evict_first(bdev, NULL, mem_type, NULL,
+						  false, false);
 			if (ret)
 				return ret;
 			spin_lock(&glob->lru_lock);