diff mbox series

[v2,09/15] drm/ttm, drm/amdgpu: Allow the driver some control over swapping

Message ID 20210518082701.997251-10-thomas.hellstrom@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: Move LMEM (VRAM) management over to TTM | expand

Commit Message

Thomas Hellstrom May 18, 2021, 8:26 a.m. UTC
We are calling the eviction_valuable driver callback at eviction time to
determine whether we actually can evict a buffer object.
The upcoming i915 TTM backend needs the same functionality for swapout,
and that might actually be beneficial to other drivers as well.

Add an eviction_valuable call also in the swapout path. Try to keep the
current behaviour for all drivers by returning true if the buffer object
is already in the TTM_PL_SYSTEM placement. We change behaviour for the
case where a buffer object is in a TT backed placement when swapped out,
in which case the drivers normal eviction_valuable path is run.

Finally export ttm_tt_unpopulate() and don't swap out bos
that are not populated. This allows a driver to purge a bo at
swapout time if its content is no longer valuable rather than to
have TTM swap the contents out.

Cc: Christian König <christian.koenig@amd.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  4 +++
 drivers/gpu/drm/ttm/ttm_bo.c            | 41 +++++++++++++++----------
 drivers/gpu/drm/ttm/ttm_tt.c            |  4 +++
 3 files changed, 33 insertions(+), 16 deletions(-)

Comments

Maarten Lankhorst May 18, 2021, 12:19 p.m. UTC | #1
Op 18-05-2021 om 10:26 schreef Thomas Hellström:
> We are calling the eviction_valuable driver callback at eviction time to
> determine whether we actually can evict a buffer object.
> The upcoming i915 TTM backend needs the same functionality for swapout,
> and that might actually be beneficial to other drivers as well.
>
> Add an eviction_valuable call also in the swapout path. Try to keep the
> current behaviour for all drivers by returning true if the buffer object
> is already in the TTM_PL_SYSTEM placement. We change behaviour for the
> case where a buffer object is in a TT backed placement when swapped out,
> in which case the drivers normal eviction_valuable path is run.
>
> Finally export ttm_tt_unpopulate() and don't swap out bos
> that are not populated. This allows a driver to purge a bo at
> swapout time if its content is no longer valuable rather than to
> have TTM swap the contents out.
>
> Cc: Christian König <christian.koenig@amd.com>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  4 +++
>  drivers/gpu/drm/ttm/ttm_bo.c            | 41 +++++++++++++++----------
>  drivers/gpu/drm/ttm/ttm_tt.c            |  4 +++
>  3 files changed, 33 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 8c7ec09eb1a4..d5a9d7a88315 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1399,6 +1399,10 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
>  	struct dma_fence *f;
>  	int i;
>  
> +	/* Swapout? */
> +	if (bo->mem.mem_type == TTM_PL_SYSTEM)
> +		return true;
> +
>  	if (bo->type == ttm_bo_type_kernel &&
>  	    !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo)))
>  		return false;
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 4479c55aaa1d..6a3f3112f62a 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -531,6 +531,10 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo,
>  bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
>  			      const struct ttm_place *place)
>  {
> +	dma_resv_assert_held(bo->base.resv);
> +	if (bo->mem.mem_type == TTM_PL_SYSTEM)
> +		return true;
> +
>  	/* Don't evict this BO if it's outside of the
>  	 * requested placement range
>  	 */
> @@ -553,7 +557,9 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>   * b. Otherwise, trylock it.
>   */
>  static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
> -			struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
> +					   struct ttm_operation_ctx *ctx,
> +					   const struct ttm_place *place,
> +					   bool *locked, bool *busy)
>  {
>  	bool ret = false;
>  
> @@ -571,6 +577,12 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
>  			*busy = !ret;
>  	}
>  
> +	if (ret && place && !bo->bdev->funcs->eviction_valuable(bo, place)) {
> +		ret = false;
> +		if (locked)
> +			dma_resv_unlock(bo->base.resv);
> +	}

Probably meant to check and clear *locked here?

With that fixed:

Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>

> +
>  	return ret;
>  }
>  
> @@ -625,20 +637,14 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>  		list_for_each_entry(bo, &man->lru[i], lru) {
>  			bool busy;
>  
> -			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
> -							    &busy)) {
> +			if (!ttm_bo_evict_swapout_allowable(bo, ctx, place,
> +							    &locked, &busy)) {
>  				if (busy && !busy_bo && ticket !=
>  				    dma_resv_locking_ctx(bo->base.resv))
>  					busy_bo = bo;
>  				continue;
>  			}
>  
> -			if (place && !bdev->funcs->eviction_valuable(bo,
> -								      place)) {
> -				if (locked)
> -					dma_resv_unlock(bo->base.resv);
> -				continue;
> -			}
>  			if (!ttm_bo_get_unless_zero(bo)) {
>  				if (locked)
>  					dma_resv_unlock(bo->base.resv);
> @@ -1138,10 +1144,18 @@ EXPORT_SYMBOL(ttm_bo_wait);
>  int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>  		   gfp_t gfp_flags)
>  {
> +	struct ttm_place place = {};
>  	bool locked;
>  	int ret;
>  
> -	if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
> +	/*
> +	 * While the bo may already reside in SYSTEM placement, set
> +	 * SYSTEM as new placement to cover also the move further below.
> +	 * The driver may use the fact that we're moving from SYSTEM
> +	 * as an indication that we're about to swap out.
> +	 */
> +	place.mem_type = TTM_PL_SYSTEM;
> +	if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place, &locked, NULL))
>  		return -EBUSY;
>  
>  	if (!ttm_bo_get_unless_zero(bo)) {
> @@ -1166,12 +1180,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>  	if (bo->mem.mem_type != TTM_PL_SYSTEM) {
>  		struct ttm_operation_ctx ctx = { false, false };
>  		struct ttm_resource evict_mem;
> -		struct ttm_place place, hop;
> -
> -		memset(&place, 0, sizeof(place));
> -		memset(&hop, 0, sizeof(hop));
> -
> -		place.mem_type = TTM_PL_SYSTEM;
> +		struct ttm_place hop = {};
>  
>  		ret = ttm_resource_alloc(bo, &place, &evict_mem);
>  		if (unlikely(ret))
> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> index 539e0232cb3b..7878ca4876c5 100644
> --- a/drivers/gpu/drm/ttm/ttm_tt.c
> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> @@ -258,6 +258,9 @@ int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt *ttm,
>  	struct page *to_page;
>  	int i, ret;
>  
> +	if (!ttm_tt_is_populated(ttm))
> +		return 0;
> +
>  	swap_storage = shmem_file_setup("ttm swap", size, 0);
>  	if (IS_ERR(swap_storage)) {
>  		pr_err("Failed allocating swap storage\n");
> @@ -399,6 +402,7 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
>  
>  	ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
>  }
> +EXPORT_SYMBOL(ttm_tt_unpopulate);
>  
>  #ifdef CONFIG_DEBUG_FS
>
Thomas Hellstrom May 18, 2021, 3:15 p.m. UTC | #2
On 5/18/21 10:26 AM, Thomas Hellström wrote:
> We are calling the eviction_valuable driver callback at eviction time to
> determine whether we actually can evict a buffer object.
> The upcoming i915 TTM backend needs the same functionality for swapout,
> and that might actually be beneficial to other drivers as well.
>
> Add an eviction_valuable call also in the swapout path. Try to keep the
> current behaviour for all drivers by returning true if the buffer object
> is already in the TTM_PL_SYSTEM placement. We change behaviour for the
> case where a buffer object is in a TT backed placement when swapped out,
> in which case the drivers normal eviction_valuable path is run.
>
> Finally export ttm_tt_unpopulate() and don't swap out bos
> that are not populated. This allows a driver to purge a bo at
> swapout time if its content is no longer valuable rather than to
> have TTM swap the contents out.
>
> Cc: Christian König <christian.koenig@amd.com>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>

Christian,

Here we have a ttm_tt_unpopulate() export as well at the end. I figure 
you will push back on that one. What we really need is a functionality 
to just drop the bo contents and end up in system memory unpopulated. 
Should I perhaps add a utility function to do that instead? like 
ttm_bo_purge()?

Thanks,

Thomas
Christian König May 18, 2021, 3:18 p.m. UTC | #3
Am 18.05.21 um 17:15 schrieb Thomas Hellström:
>
> On 5/18/21 10:26 AM, Thomas Hellström wrote:
>> We are calling the eviction_valuable driver callback at eviction time to
>> determine whether we actually can evict a buffer object.
>> The upcoming i915 TTM backend needs the same functionality for swapout,
>> and that might actually be beneficial to other drivers as well.
>>
>> Add an eviction_valuable call also in the swapout path. Try to keep the
>> current behaviour for all drivers by returning true if the buffer object
>> is already in the TTM_PL_SYSTEM placement. We change behaviour for the
>> case where a buffer object is in a TT backed placement when swapped out,
>> in which case the drivers normal eviction_valuable path is run.
>>
>> Finally export ttm_tt_unpopulate() and don't swap out bos
>> that are not populated. This allows a driver to purge a bo at
>> swapout time if its content is no longer valuable rather than to
>> have TTM swap the contents out.
>>
>> Cc: Christian König <christian.koenig@amd.com>
>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>
> Christian,
>
> Here we have a ttm_tt_unpopulate() export as well at the end. I figure 
> you will push back on that one. What we really need is a functionality 
> to just drop the bo contents and end up in system memory unpopulated. 
> Should I perhaps add a utility function to do that instead? like 
> ttm_bo_purge()?

We already have that. Just call ttm_bo_validate() without any place to 
put the buffer.

See how ttm_bo_pipeline_gutting() is used.

Christian.

>
> Thanks,
>
> Thomas
>
>
Thomas Hellstrom May 18, 2021, 3:20 p.m. UTC | #4
On 5/18/21 5:18 PM, Christian König wrote:
>
>
> Am 18.05.21 um 17:15 schrieb Thomas Hellström:
>>
>> On 5/18/21 10:26 AM, Thomas Hellström wrote:
>>> We are calling the eviction_valuable driver callback at eviction 
>>> time to
>>> determine whether we actually can evict a buffer object.
>>> The upcoming i915 TTM backend needs the same functionality for swapout,
>>> and that might actually be beneficial to other drivers as well.
>>>
>>> Add an eviction_valuable call also in the swapout path. Try to keep the
>>> current behaviour for all drivers by returning true if the buffer 
>>> object
>>> is already in the TTM_PL_SYSTEM placement. We change behaviour for the
>>> case where a buffer object is in a TT backed placement when swapped 
>>> out,
>>> in which case the drivers normal eviction_valuable path is run.
>>>
>>> Finally export ttm_tt_unpopulate() and don't swap out bos
>>> that are not populated. This allows a driver to purge a bo at
>>> swapout time if its content is no longer valuable rather than to
>>> have TTM swap the contents out.
>>>
>>> Cc: Christian König <christian.koenig@amd.com>
>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>
>> Christian,
>>
>> Here we have a ttm_tt_unpopulate() export as well at the end. I 
>> figure you will push back on that one. What we really need is a 
>> functionality to just drop the bo contents and end up in system 
>> memory unpopulated. Should I perhaps add a utility function to do 
>> that instead? like ttm_bo_purge()?
>
> We already have that. Just call ttm_bo_validate() without any place to 
> put the buffer.
>
> See how ttm_bo_pipeline_gutting() is used.
>
> Christian.

OK, so is that reentrant from the move() or swap_notify() callback.

/Thomas



>
>>
>> Thanks,
>>
>> Thomas
>>
>>
>
Christian König May 18, 2021, 3:28 p.m. UTC | #5
Am 18.05.21 um 17:20 schrieb Thomas Hellström:
>
> On 5/18/21 5:18 PM, Christian König wrote:
>>
>>
>> Am 18.05.21 um 17:15 schrieb Thomas Hellström:
>>>
>>> On 5/18/21 10:26 AM, Thomas Hellström wrote:
>>>> We are calling the eviction_valuable driver callback at eviction 
>>>> time to
>>>> determine whether we actually can evict a buffer object.
>>>> The upcoming i915 TTM backend needs the same functionality for 
>>>> swapout,
>>>> and that might actually be beneficial to other drivers as well.
>>>>
>>>> Add an eviction_valuable call also in the swapout path. Try to keep 
>>>> the
>>>> current behaviour for all drivers by returning true if the buffer 
>>>> object
>>>> is already in the TTM_PL_SYSTEM placement. We change behaviour for the
>>>> case where a buffer object is in a TT backed placement when swapped 
>>>> out,
>>>> in which case the drivers normal eviction_valuable path is run.
>>>>
>>>> Finally export ttm_tt_unpopulate() and don't swap out bos
>>>> that are not populated. This allows a driver to purge a bo at
>>>> swapout time if its content is no longer valuable rather than to
>>>> have TTM swap the contents out.
>>>>
>>>> Cc: Christian König <christian.koenig@amd.com>
>>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>>
>>> Christian,
>>>
>>> Here we have a ttm_tt_unpopulate() export as well at the end. I 
>>> figure you will push back on that one. What we really need is a 
>>> functionality to just drop the bo contents and end up in system 
>>> memory unpopulated. Should I perhaps add a utility function to do 
>>> that instead? like ttm_bo_purge()?
>>
>> We already have that. Just call ttm_bo_validate() without any place 
>> to put the buffer.
>>
>> See how ttm_bo_pipeline_gutting() is used.
>>
>> Christian.
>
> OK, so is that reentrant from the move() or swap_notify() callback.

That sounds like a design bug to me since you should never need to do this.

When you want to destroy the backing store of a buffer during eviction 
you should just do this by returning an empty placement from the 
evict_flags callback.

It is TTMs job to deal with the buffer placement and drivers are no 
longer allowed to mess with that.

Regards,
Christian.

>
> /Thomas
>
>
>
>>
>>>
>>> Thanks,
>>>
>>> Thomas
>>>
>>>
>>
Thomas Hellstrom May 18, 2021, 3:38 p.m. UTC | #6
On 5/18/21 5:28 PM, Christian König wrote:
> Am 18.05.21 um 17:20 schrieb Thomas Hellström:
>>
>> On 5/18/21 5:18 PM, Christian König wrote:
>>>
>>>
>>> Am 18.05.21 um 17:15 schrieb Thomas Hellström:
>>>>
>>>> On 5/18/21 10:26 AM, Thomas Hellström wrote:
>>>>> We are calling the eviction_valuable driver callback at eviction 
>>>>> time to
>>>>> determine whether we actually can evict a buffer object.
>>>>> The upcoming i915 TTM backend needs the same functionality for 
>>>>> swapout,
>>>>> and that might actually be beneficial to other drivers as well.
>>>>>
>>>>> Add an eviction_valuable call also in the swapout path. Try to 
>>>>> keep the
>>>>> current behaviour for all drivers by returning true if the buffer 
>>>>> object
>>>>> is already in the TTM_PL_SYSTEM placement. We change behaviour for 
>>>>> the
>>>>> case where a buffer object is in a TT backed placement when 
>>>>> swapped out,
>>>>> in which case the drivers normal eviction_valuable path is run.
>>>>>
>>>>> Finally export ttm_tt_unpopulate() and don't swap out bos
>>>>> that are not populated. This allows a driver to purge a bo at
>>>>> swapout time if its content is no longer valuable rather than to
>>>>> have TTM swap the contents out.
>>>>>
>>>>> Cc: Christian König <christian.koenig@amd.com>
>>>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>>>
>>>> Christian,
>>>>
>>>> Here we have a ttm_tt_unpopulate() export as well at the end. I 
>>>> figure you will push back on that one. What we really need is a 
>>>> functionality to just drop the bo contents and end up in system 
>>>> memory unpopulated. Should I perhaps add a utility function to do 
>>>> that instead? like ttm_bo_purge()?
>>>
>>> We already have that. Just call ttm_bo_validate() without any place 
>>> to put the buffer.
>>>
>>> See how ttm_bo_pipeline_gutting() is used.
>>>
>>> Christian.
>>
>> OK, so is that reentrant from the move() or swap_notify() callback.
>
> That sounds like a design bug to me since you should never need to do 
> this.
>
> When you want to destroy the backing store of a buffer during eviction 
> you should just do this by returning an empty placement from the 
> evict_flags callback.

So this is for the functionality where the user has indicated that the 
contents is no longer of value, but the buffer itself
is cached until evicted or swapped out for performance reasons. So the 
above would work for eviction, but what about swapout. Could we add some 
similar functionality there?

/Thomas

>
> Regards,
> Christian.
>
>>
>> /Thomas
>>
>>
>>
>>>
>>>>
>>>> Thanks,
>>>>
>>>> Thomas
>>>>
>>>>
>>>
>
Christian König May 18, 2021, 3:42 p.m. UTC | #7
Am 18.05.21 um 17:38 schrieb Thomas Hellström:
>
> On 5/18/21 5:28 PM, Christian König wrote:
>> Am 18.05.21 um 17:20 schrieb Thomas Hellström:
>>>
>>> On 5/18/21 5:18 PM, Christian König wrote:
>>>>
>>>>
>>>> Am 18.05.21 um 17:15 schrieb Thomas Hellström:
>>>>>
>>>>> On 5/18/21 10:26 AM, Thomas Hellström wrote:
>>>>>> We are calling the eviction_valuable driver callback at eviction 
>>>>>> time to
>>>>>> determine whether we actually can evict a buffer object.
>>>>>> The upcoming i915 TTM backend needs the same functionality for 
>>>>>> swapout,
>>>>>> and that might actually be beneficial to other drivers as well.
>>>>>>
>>>>>> Add an eviction_valuable call also in the swapout path. Try to 
>>>>>> keep the
>>>>>> current behaviour for all drivers by returning true if the buffer 
>>>>>> object
>>>>>> is already in the TTM_PL_SYSTEM placement. We change behaviour 
>>>>>> for the
>>>>>> case where a buffer object is in a TT backed placement when 
>>>>>> swapped out,
>>>>>> in which case the drivers normal eviction_valuable path is run.
>>>>>>
>>>>>> Finally export ttm_tt_unpopulate() and don't swap out bos
>>>>>> that are not populated. This allows a driver to purge a bo at
>>>>>> swapout time if its content is no longer valuable rather than to
>>>>>> have TTM swap the contents out.
>>>>>>
>>>>>> Cc: Christian König <christian.koenig@amd.com>
>>>>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>>>>
>>>>> Christian,
>>>>>
>>>>> Here we have a ttm_tt_unpopulate() export as well at the end. I 
>>>>> figure you will push back on that one. What we really need is a 
>>>>> functionality to just drop the bo contents and end up in system 
>>>>> memory unpopulated. Should I perhaps add a utility function to do 
>>>>> that instead? like ttm_bo_purge()?
>>>>
>>>> We already have that. Just call ttm_bo_validate() without any place 
>>>> to put the buffer.
>>>>
>>>> See how ttm_bo_pipeline_gutting() is used.
>>>>
>>>> Christian.
>>>
>>> OK, so is that reentrant from the move() or swap_notify() callback.
>>
>> That sounds like a design bug to me since you should never need to do 
>> this.
>>
>> When you want to destroy the backing store of a buffer during 
>> eviction you should just do this by returning an empty placement from 
>> the evict_flags callback.
>
> So this is for the functionality where the user has indicated that the 
> contents is no longer of value, but the buffer itself
> is cached until evicted or swapped out for performance reasons. So the 
> above would work for eviction, but what about swapout. Could we add 
> some similar functionality there?

Amdgpu has the same functionality and you don't need to handle swap at all.

Just return from the evict_flags that you want to drop the backing store 
as soon as the BO leaves the GTT domain.

Christian.

>
> /Thomas
>
>>
>> Regards,
>> Christian.
>>
>>>
>>> /Thomas
>>>
>>>
>>>
>>>>
>>>>>
>>>>> Thanks,
>>>>>
>>>>> Thomas
>>>>>
>>>>>
>>>>
>>
Thomas Hellstrom May 18, 2021, 4:07 p.m. UTC | #8
On 5/18/21 5:42 PM, Christian König wrote:
> Am 18.05.21 um 17:38 schrieb Thomas Hellström:
>>
>> On 5/18/21 5:28 PM, Christian König wrote:
>>> Am 18.05.21 um 17:20 schrieb Thomas Hellström:
>>>>
>>>> On 5/18/21 5:18 PM, Christian König wrote:
>>>>>
>>>>>
>>>>> Am 18.05.21 um 17:15 schrieb Thomas Hellström:
>>>>>>
>>>>>> On 5/18/21 10:26 AM, Thomas Hellström wrote:
>>>>>>> We are calling the eviction_valuable driver callback at eviction 
>>>>>>> time to
>>>>>>> determine whether we actually can evict a buffer object.
>>>>>>> The upcoming i915 TTM backend needs the same functionality for 
>>>>>>> swapout,
>>>>>>> and that might actually be beneficial to other drivers as well.
>>>>>>>
>>>>>>> Add an eviction_valuable call also in the swapout path. Try to 
>>>>>>> keep the
>>>>>>> current behaviour for all drivers by returning true if the 
>>>>>>> buffer object
>>>>>>> is already in the TTM_PL_SYSTEM placement. We change behaviour 
>>>>>>> for the
>>>>>>> case where a buffer object is in a TT backed placement when 
>>>>>>> swapped out,
>>>>>>> in which case the drivers normal eviction_valuable path is run.
>>>>>>>
>>>>>>> Finally export ttm_tt_unpopulate() and don't swap out bos
>>>>>>> that are not populated. This allows a driver to purge a bo at
>>>>>>> swapout time if its content is no longer valuable rather than to
>>>>>>> have TTM swap the contents out.
>>>>>>>
>>>>>>> Cc: Christian König <christian.koenig@amd.com>
>>>>>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>>>>>
>>>>>> Christian,
>>>>>>
>>>>>> Here we have a ttm_tt_unpopulate() export as well at the end. I 
>>>>>> figure you will push back on that one. What we really need is a 
>>>>>> functionality to just drop the bo contents and end up in system 
>>>>>> memory unpopulated. Should I perhaps add a utility function to do 
>>>>>> that instead? like ttm_bo_purge()?
>>>>>
>>>>> We already have that. Just call ttm_bo_validate() without any 
>>>>> place to put the buffer.
>>>>>
>>>>> See how ttm_bo_pipeline_gutting() is used.
>>>>>
>>>>> Christian.
>>>>
>>>> OK, so is that reentrant from the move() or swap_notify() callback.
>>>
>>> That sounds like a design bug to me since you should never need to 
>>> do this.
>>>
>>> When you want to destroy the backing store of a buffer during 
>>> eviction you should just do this by returning an empty placement 
>>> from the evict_flags callback.
>>
>> So this is for the functionality where the user has indicated that 
>> the contents is no longer of value, but the buffer itself
>> is cached until evicted or swapped out for performance reasons. So 
>> the above would work for eviction, but what about swapout. Could we 
>> add some similar functionality there?
>
> Amdgpu has the same functionality and you don't need to handle swap at 
> all.
>
> Just return from the evict_flags that you want to drop the backing 
> store as soon as the BO leaves the GTT domain.

Hmm, the pipeline_gutting function seems ok, but overly complex if the 
bo is already idle, Am I allowed to optimize it slightly for the latter 
case?

/Thomas


>
> Christian.
>
>>
>> /Thomas
>>
>>>
>>> Regards,
>>> Christian.
>>>
>>>>
>>>> /Thomas
>>>>
>>>>
>>>>
>>>>>
>>>>>>
>>>>>> Thanks,
>>>>>>
>>>>>> Thomas
>>>>>>
>>>>>>
>>>>>
>>>
>
Christian König May 18, 2021, 4:30 p.m. UTC | #9
Am 18.05.21 um 18:07 schrieb Thomas Hellström:
>
> On 5/18/21 5:42 PM, Christian König wrote:
>> Am 18.05.21 um 17:38 schrieb Thomas Hellström:
>>>
>>> On 5/18/21 5:28 PM, Christian König wrote:
>>>> Am 18.05.21 um 17:20 schrieb Thomas Hellström:
>>>>>
>>>>> On 5/18/21 5:18 PM, Christian König wrote:
>>>>>>
>>>>>>
>>>>>> Am 18.05.21 um 17:15 schrieb Thomas Hellström:
>>>>>>>
>>>>>>> On 5/18/21 10:26 AM, Thomas Hellström wrote:
>>>>>>>> We are calling the eviction_valuable driver callback at 
>>>>>>>> eviction time to
>>>>>>>> determine whether we actually can evict a buffer object.
>>>>>>>> The upcoming i915 TTM backend needs the same functionality for 
>>>>>>>> swapout,
>>>>>>>> and that might actually be beneficial to other drivers as well.
>>>>>>>>
>>>>>>>> Add an eviction_valuable call also in the swapout path. Try to 
>>>>>>>> keep the
>>>>>>>> current behaviour for all drivers by returning true if the 
>>>>>>>> buffer object
>>>>>>>> is already in the TTM_PL_SYSTEM placement. We change behaviour 
>>>>>>>> for the
>>>>>>>> case where a buffer object is in a TT backed placement when 
>>>>>>>> swapped out,
>>>>>>>> in which case the drivers normal eviction_valuable path is run.
>>>>>>>>
>>>>>>>> Finally export ttm_tt_unpopulate() and don't swap out bos
>>>>>>>> that are not populated. This allows a driver to purge a bo at
>>>>>>>> swapout time if its content is no longer valuable rather than to
>>>>>>>> have TTM swap the contents out.
>>>>>>>>
>>>>>>>> Cc: Christian König <christian.koenig@amd.com>
>>>>>>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>>>>>>
>>>>>>> Christian,
>>>>>>>
>>>>>>> Here we have a ttm_tt_unpopulate() export as well at the end. I 
>>>>>>> figure you will push back on that one. What we really need is a 
>>>>>>> functionality to just drop the bo contents and end up in system 
>>>>>>> memory unpopulated. Should I perhaps add a utility function to 
>>>>>>> do that instead? like ttm_bo_purge()?
>>>>>>
>>>>>> We already have that. Just call ttm_bo_validate() without any 
>>>>>> place to put the buffer.
>>>>>>
>>>>>> See how ttm_bo_pipeline_gutting() is used.
>>>>>>
>>>>>> Christian.
>>>>>
>>>>> OK, so is that reentrant from the move() or swap_notify() callback.
>>>>
>>>> That sounds like a design bug to me since you should never need to 
>>>> do this.
>>>>
>>>> When you want to destroy the backing store of a buffer during 
>>>> eviction you should just do this by returning an empty placement 
>>>> from the evict_flags callback.
>>>
>>> So this is for the functionality where the user has indicated that 
>>> the contents is no longer of value, but the buffer itself
>>> is cached until evicted or swapped out for performance reasons. So 
>>> the above would work for eviction, but what about swapout. Could we 
>>> add some similar functionality there?
>>
>> Amdgpu has the same functionality and you don't need to handle swap 
>> at all.
>>
>> Just return from the evict_flags that you want to drop the backing 
>> store as soon as the BO leaves the GTT domain.
>
> Hmm, the pipeline_gutting function seems ok, but overly complex if the 
> bo is already idle, Am I allowed to optimize it slightly for the 
> latter case?

Yeah, sure. We just never hat that use case so far.

Christian.

>
> /Thomas
>
>
>>
>> Christian.
>>
>>>
>>> /Thomas
>>>
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>>
>>>>> /Thomas
>>>>>
>>>>>
>>>>>
>>>>>>
>>>>>>>
>>>>>>> Thanks,
>>>>>>>
>>>>>>> Thomas
>>>>>>>
>>>>>>>
>>>>>>
>>>>
>>
Thomas Hellstrom May 19, 2021, 6:27 a.m. UTC | #10
On 5/18/21 6:30 PM, Christian König wrote:
> Am 18.05.21 um 18:07 schrieb Thomas Hellström:
>>
>> On 5/18/21 5:42 PM, Christian König wrote:
>>> Am 18.05.21 um 17:38 schrieb Thomas Hellström:
>>>>
>>>> On 5/18/21 5:28 PM, Christian König wrote:
>>>>> Am 18.05.21 um 17:20 schrieb Thomas Hellström:
>>>>>>
>>>>>> On 5/18/21 5:18 PM, Christian König wrote:
>>>>>>>
>>>>>>>
>>>>>>> Am 18.05.21 um 17:15 schrieb Thomas Hellström:
>>>>>>>>
>>>>>>>> On 5/18/21 10:26 AM, Thomas Hellström wrote:
>>>>>>>>> We are calling the eviction_valuable driver callback at 
>>>>>>>>> eviction time to
>>>>>>>>> determine whether we actually can evict a buffer object.
>>>>>>>>> The upcoming i915 TTM backend needs the same functionality for 
>>>>>>>>> swapout,
>>>>>>>>> and that might actually be beneficial to other drivers as well.
>>>>>>>>>
>>>>>>>>> Add an eviction_valuable call also in the swapout path. Try to 
>>>>>>>>> keep the
>>>>>>>>> current behaviour for all drivers by returning true if the 
>>>>>>>>> buffer object
>>>>>>>>> is already in the TTM_PL_SYSTEM placement. We change behaviour 
>>>>>>>>> for the
>>>>>>>>> case where a buffer object is in a TT backed placement when 
>>>>>>>>> swapped out,
>>>>>>>>> in which case the drivers normal eviction_valuable path is run.
>>>>>>>>>
>>>>>>>>> Finally export ttm_tt_unpopulate() and don't swap out bos
>>>>>>>>> that are not populated. This allows a driver to purge a bo at
>>>>>>>>> swapout time if its content is no longer valuable rather than to
>>>>>>>>> have TTM swap the contents out.
>>>>>>>>>
>>>>>>>>> Cc: Christian König <christian.koenig@amd.com>
>>>>>>>>> Signed-off-by: Thomas Hellström 
>>>>>>>>> <thomas.hellstrom@linux.intel.com>
>>>>>>>>
>>>>>>>> Christian,
>>>>>>>>
>>>>>>>> Here we have a ttm_tt_unpopulate() export as well at the end. I 
>>>>>>>> figure you will push back on that one. What we really need is a 
>>>>>>>> functionality to just drop the bo contents and end up in system 
>>>>>>>> memory unpopulated. Should I perhaps add a utility function to 
>>>>>>>> do that instead? like ttm_bo_purge()?
>>>>>>>
>>>>>>> We already have that. Just call ttm_bo_validate() without any 
>>>>>>> place to put the buffer.
>>>>>>>
>>>>>>> See how ttm_bo_pipeline_gutting() is used.
>>>>>>>
>>>>>>> Christian.
>>>>>>
>>>>>> OK, so is that reentrant from the move() or swap_notify() callback.
>>>>>
>>>>> That sounds like a design bug to me since you should never need to 
>>>>> do this.
>>>>>
>>>>> When you want to destroy the backing store of a buffer during 
>>>>> eviction you should just do this by returning an empty placement 
>>>>> from the evict_flags callback.
>>>>
>>>> So this is for the functionality where the user has indicated that 
>>>> the contents is no longer of value, but the buffer itself
>>>> is cached until evicted or swapped out for performance reasons. So 
>>>> the above would work for eviction, but what about swapout. Could we 
>>>> add some similar functionality there?
>>>
>>> Amdgpu has the same functionality and you don't need to handle swap 
>>> at all.
>>>
>>> Just return from the evict_flags that you want to drop the backing 
>>> store as soon as the BO leaves the GTT domain.
>>
>> Hmm, the pipeline_gutting function seems ok, but overly complex if 
>> the bo is already idle, Am I allowed to optimize it slightly for the 
>> latter case?
>
> Yeah, sure. We just never hat that use case so far.

One thing about the code here that makes me worried is that the 
"destination" ttm_tt is allocated *after* pipeline_gutting. We're not 
really allowed to fail here because that would leave the BO in a state 
where codepaths (fault for example) try to access a NULL ttm_tt. While 
the idle case can get away with ttm_tt_unpopulate, for the async case, 
ttm_tt really needs to be pre-allocated, so that we can leave the bo in 
a consistent state.

/Thomas


>
> Christian.
>
>>
>> /Thomas
>>
>>
>>>
>>> Christian.
>>>
>>>>
>>>> /Thomas
>>>>
>>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>>>
>>>>>> /Thomas
>>>>>>
>>>>>>
>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>> Thanks,
>>>>>>>>
>>>>>>>> Thomas
>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>
>>>
>
Christian König May 19, 2021, 10:43 a.m. UTC | #11
Am 19.05.21 um 08:27 schrieb Thomas Hellström:
>
> On 5/18/21 6:30 PM, Christian König wrote:
>> Am 18.05.21 um 18:07 schrieb Thomas Hellström:
>>>
>>> On 5/18/21 5:42 PM, Christian König wrote:
>>>> Am 18.05.21 um 17:38 schrieb Thomas Hellström:
>>>>>
>>>>> On 5/18/21 5:28 PM, Christian König wrote:
>>>>>> Am 18.05.21 um 17:20 schrieb Thomas Hellström:
>>>>>>>
>>>>>>> On 5/18/21 5:18 PM, Christian König wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>> Am 18.05.21 um 17:15 schrieb Thomas Hellström:
>>>>>>>>>
>>>>>>>>> On 5/18/21 10:26 AM, Thomas Hellström wrote:
>>>>>>>>>> We are calling the eviction_valuable driver callback at 
>>>>>>>>>> eviction time to
>>>>>>>>>> determine whether we actually can evict a buffer object.
>>>>>>>>>> The upcoming i915 TTM backend needs the same functionality 
>>>>>>>>>> for swapout,
>>>>>>>>>> and that might actually be beneficial to other drivers as well.
>>>>>>>>>>
>>>>>>>>>> Add an eviction_valuable call also in the swapout path. Try 
>>>>>>>>>> to keep the
>>>>>>>>>> current behaviour for all drivers by returning true if the 
>>>>>>>>>> buffer object
>>>>>>>>>> is already in the TTM_PL_SYSTEM placement. We change 
>>>>>>>>>> behaviour for the
>>>>>>>>>> case where a buffer object is in a TT backed placement when 
>>>>>>>>>> swapped out,
>>>>>>>>>> in which case the drivers normal eviction_valuable path is run.
>>>>>>>>>>
>>>>>>>>>> Finally export ttm_tt_unpopulate() and don't swap out bos
>>>>>>>>>> that are not populated. This allows a driver to purge a bo at
>>>>>>>>>> swapout time if its content is no longer valuable rather than to
>>>>>>>>>> have TTM swap the contents out.
>>>>>>>>>>
>>>>>>>>>> Cc: Christian König <christian.koenig@amd.com>
>>>>>>>>>> Signed-off-by: Thomas Hellström 
>>>>>>>>>> <thomas.hellstrom@linux.intel.com>
>>>>>>>>>
>>>>>>>>> Christian,
>>>>>>>>>
>>>>>>>>> Here we have a ttm_tt_unpopulate() export as well at the end. 
>>>>>>>>> I figure you will push back on that one. What we really need 
>>>>>>>>> is a functionality to just drop the bo contents and end up in 
>>>>>>>>> system memory unpopulated. Should I perhaps add a utility 
>>>>>>>>> function to do that instead? like ttm_bo_purge()?
>>>>>>>>
>>>>>>>> We already have that. Just call ttm_bo_validate() without any 
>>>>>>>> place to put the buffer.
>>>>>>>>
>>>>>>>> See how ttm_bo_pipeline_gutting() is used.
>>>>>>>>
>>>>>>>> Christian.
>>>>>>>
>>>>>>> OK, so is that reentrant from the move() or swap_notify() callback.
>>>>>>
>>>>>> That sounds like a design bug to me since you should never need 
>>>>>> to do this.
>>>>>>
>>>>>> When you want to destroy the backing store of a buffer during 
>>>>>> eviction you should just do this by returning an empty placement 
>>>>>> from the evict_flags callback.
>>>>>
>>>>> So this is for the functionality where the user has indicated that 
>>>>> the contents is no longer of value, but the buffer itself
>>>>> is cached until evicted or swapped out for performance reasons. So 
>>>>> the above would work for eviction, but what about swapout. Could 
>>>>> we add some similar functionality there?
>>>>
>>>> Amdgpu has the same functionality and you don't need to handle swap 
>>>> at all.
>>>>
>>>> Just return from the evict_flags that you want to drop the backing 
>>>> store as soon as the BO leaves the GTT domain.
>>>
>>> Hmm, the pipeline_gutting function seems ok, but overly complex if 
>>> the bo is already idle, Am I allowed to optimize it slightly for the 
>>> latter case?
>>
>> Yeah, sure. We just never hat that use case so far.
>
> One thing about the code here that makes me worried is that the 
> "destination" ttm_tt is allocated *after* pipeline_gutting. We're not 
> really allowed to fail here because that would leave the BO in a state 
> where codepaths (fault for example) try to access a NULL ttm_tt. While 
> the idle case can get away with ttm_tt_unpopulate, for the async case, 
> ttm_tt really needs to be pre-allocated, so that we can leave the bo 
> in a consistent state.

Well the original plan was to make tt allocation purely optional.

But I didn't had the time so far to actually fix that.

Christian.

>
> /Thomas
>
>
>>
>> Christian.
>>
>>>
>>> /Thomas
>>>
>>>
>>>>
>>>> Christian.
>>>>
>>>>>
>>>>> /Thomas
>>>>>
>>>>>>
>>>>>> Regards,
>>>>>> Christian.
>>>>>>
>>>>>>>
>>>>>>> /Thomas
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>>>
>>>>>>>>> Thanks,
>>>>>>>>>
>>>>>>>>> Thomas
>>>>>>>>>
>>>>>>>>>
>>>>>>>>
>>>>>>
>>>>
>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8c7ec09eb1a4..d5a9d7a88315 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1399,6 +1399,10 @@  static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 	struct dma_fence *f;
 	int i;
 
+	/* Swapout? */
+	if (bo->mem.mem_type == TTM_PL_SYSTEM)
+		return true;
+
 	if (bo->type == ttm_bo_type_kernel &&
 	    !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo)))
 		return false;
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 4479c55aaa1d..6a3f3112f62a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -531,6 +531,10 @@  static int ttm_bo_evict(struct ttm_buffer_object *bo,
 bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 			      const struct ttm_place *place)
 {
+	dma_resv_assert_held(bo->base.resv);
+	if (bo->mem.mem_type == TTM_PL_SYSTEM)
+		return true;
+
 	/* Don't evict this BO if it's outside of the
 	 * requested placement range
 	 */
@@ -553,7 +557,9 @@  EXPORT_SYMBOL(ttm_bo_eviction_valuable);
  * b. Otherwise, trylock it.
  */
 static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
-			struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
+					   struct ttm_operation_ctx *ctx,
+					   const struct ttm_place *place,
+					   bool *locked, bool *busy)
 {
 	bool ret = false;
 
@@ -571,6 +577,12 @@  static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
 			*busy = !ret;
 	}
 
+	if (ret && place && !bo->bdev->funcs->eviction_valuable(bo, place)) {
+		ret = false;
+		if (locked)
+			dma_resv_unlock(bo->base.resv);
+	}
+
 	return ret;
 }
 
@@ -625,20 +637,14 @@  int ttm_mem_evict_first(struct ttm_device *bdev,
 		list_for_each_entry(bo, &man->lru[i], lru) {
 			bool busy;
 
-			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
-							    &busy)) {
+			if (!ttm_bo_evict_swapout_allowable(bo, ctx, place,
+							    &locked, &busy)) {
 				if (busy && !busy_bo && ticket !=
 				    dma_resv_locking_ctx(bo->base.resv))
 					busy_bo = bo;
 				continue;
 			}
 
-			if (place && !bdev->funcs->eviction_valuable(bo,
-								      place)) {
-				if (locked)
-					dma_resv_unlock(bo->base.resv);
-				continue;
-			}
 			if (!ttm_bo_get_unless_zero(bo)) {
 				if (locked)
 					dma_resv_unlock(bo->base.resv);
@@ -1138,10 +1144,18 @@  EXPORT_SYMBOL(ttm_bo_wait);
 int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 		   gfp_t gfp_flags)
 {
+	struct ttm_place place = {};
 	bool locked;
 	int ret;
 
-	if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
+	/*
+	 * While the bo may already reside in SYSTEM placement, set
+	 * SYSTEM as new placement to cover also the move further below.
+	 * The driver may use the fact that we're moving from SYSTEM
+	 * as an indication that we're about to swap out.
+	 */
+	place.mem_type = TTM_PL_SYSTEM;
+	if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place, &locked, NULL))
 		return -EBUSY;
 
 	if (!ttm_bo_get_unless_zero(bo)) {
@@ -1166,12 +1180,7 @@  int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 	if (bo->mem.mem_type != TTM_PL_SYSTEM) {
 		struct ttm_operation_ctx ctx = { false, false };
 		struct ttm_resource evict_mem;
-		struct ttm_place place, hop;
-
-		memset(&place, 0, sizeof(place));
-		memset(&hop, 0, sizeof(hop));
-
-		place.mem_type = TTM_PL_SYSTEM;
+		struct ttm_place hop = {};
 
 		ret = ttm_resource_alloc(bo, &place, &evict_mem);
 		if (unlikely(ret))
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 539e0232cb3b..7878ca4876c5 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -258,6 +258,9 @@  int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt *ttm,
 	struct page *to_page;
 	int i, ret;
 
+	if (!ttm_tt_is_populated(ttm))
+		return 0;
+
 	swap_storage = shmem_file_setup("ttm swap", size, 0);
 	if (IS_ERR(swap_storage)) {
 		pr_err("Failed allocating swap storage\n");
@@ -399,6 +402,7 @@  void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
 
 	ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
 }
+EXPORT_SYMBOL(ttm_tt_unpopulate);
 
 #ifdef CONFIG_DEBUG_FS