diff mbox series

dma-buf: Fix missing excl fence waiting

Message ID B737F1D5-292E-4FE2-89A5-6EF72CB3EED1@amd.com (mailing list archive)
State New, archived
Headers show
Series dma-buf: Fix missing excl fence waiting | expand

Commit Message

Pan, Xinhui Feb. 23, 2020, 11:56 a.m. UTC
If shared fence list is not empty, even we want to test all fences, excl fence is ignored.
That is abviously wrong, so fix it.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---
 drivers/dma-buf/dma-resv.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

Comments

Pan, Xinhui Feb. 23, 2020, noon UTC | #1
sorry, paste a wrong patch. will send out v2.


> 2020年2月23日 19:56,Pan, Xinhui <Xinhui.Pan@amd.com> 写道:
> 
> If shared fence list is not empty, even we want to test all fences, excl fence is ignored.
> That is abviously wrong, so fix it.
> 
> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> ---
> drivers/dma-buf/dma-resv.c | 9 +++++----
> 1 file changed, 5 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> index 4264e64788c4..44dc64c547c6 100644
> --- a/drivers/dma-buf/dma-resv.c
> +++ b/drivers/dma-buf/dma-resv.c
> @@ -632,14 +632,14 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
>  */
> bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
> {
> -	unsigned seq, shared_count;
> +	unsigned int seq, shared_count, left;
> 	int ret;
> 
> 	rcu_read_lock();
> retry:
> 	ret = true;
> 	shared_count = 0;
> -	seq = read_seqcount_begin(&obj->seq);
> +	left = seq = read_seqcount_begin(&obj->seq);
> 
> 	if (test_all) {
> 		unsigned i;
> @@ -647,7 +647,7 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
> 		struct dma_resv_list *fobj = rcu_dereference(obj->fence);
> 
> 		if (fobj)
> -			shared_count = fobj->shared_count;
> +			left = shared_count = fobj->shared_count;
> 
> 		for (i = 0; i < shared_count; ++i) {
> 			struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
> @@ -657,13 +657,14 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
> 				goto retry;
> 			else if (!ret)
> 				break;
> +			left--;
> 		}
> 
> 		if (read_seqcount_retry(&obj->seq, seq))
> 			goto retry;
> 	}
> 
> -	if (!shared_count) {
> +	if (!left) {
> 		struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl);
> 
> 		if (fence_excl) {
> -- 
> 2.21.0 (Apple Git-122)
> 
>
Christian König Feb. 23, 2020, 12:04 p.m. UTC | #2
Am 23.02.20 um 12:56 schrieb Pan, Xinhui:
> If shared fence list is not empty, even we want to test all fences, excl fence is ignored.
> That is abviously wrong, so fix it.

Yeah that is a known issue and I completely agree with you, but other 
disagree.

See the shared fences are meant to depend on the exclusive fence. So all 
shared fences must finish only after the exclusive one has finished as well.

The problem now is that for error handling this isn't necessary true. In 
other words when a shared fence completes with an error it is perfectly 
possible that he does this before the exclusive fence is finished.

I'm trying to convince Daniel that this is a problem for years :)

Regards,
Christian.

>
> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> ---
>   drivers/dma-buf/dma-resv.c | 9 +++++----
>   1 file changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> index 4264e64788c4..44dc64c547c6 100644
> --- a/drivers/dma-buf/dma-resv.c
> +++ b/drivers/dma-buf/dma-resv.c
> @@ -632,14 +632,14 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
>    */
>   bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
>   {
> -	unsigned seq, shared_count;
> +	unsigned int seq, shared_count, left;
>   	int ret;
>   
>   	rcu_read_lock();
>   retry:
>   	ret = true;
>   	shared_count = 0;
> -	seq = read_seqcount_begin(&obj->seq);
> +	left = seq = read_seqcount_begin(&obj->seq);
>   
>   	if (test_all) {
>   		unsigned i;
> @@ -647,7 +647,7 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
>   		struct dma_resv_list *fobj = rcu_dereference(obj->fence);
>   
>   		if (fobj)
> -			shared_count = fobj->shared_count;
> +			left = shared_count = fobj->shared_count;
>   
>   		for (i = 0; i < shared_count; ++i) {
>   			struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
> @@ -657,13 +657,14 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
>   				goto retry;
>   			else if (!ret)
>   				break;
> +			left--;
>   		}
>   
>   		if (read_seqcount_retry(&obj->seq, seq))
>   			goto retry;
>   	}
>   
> -	if (!shared_count) {
> +	if (!left) {
>   		struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl);
>   
>   		if (fence_excl) {
Christian König Feb. 23, 2020, 12:12 p.m. UTC | #3
Am 23.02.20 um 13:21 schrieb Pan, Xinhui:
>
>> 2020年2月23日 20:04,Koenig, Christian <Christian.Koenig@amd.com> 写道:
>>
>> Am 23.02.20 um 12:56 schrieb Pan, Xinhui:
>>> If shared fence list is not empty, even we want to test all fences, excl fence is ignored.
>>> That is abviously wrong, so fix it.
>> Yeah that is a known issue and I completely agree with you, but other disagree.
>>
>> See the shared fences are meant to depend on the exclusive fence. So all shared fences must finish only after the exclusive one has finished as well.
> fair enough.
>
>> The problem now is that for error handling this isn't necessary true. In other words when a shared fence completes with an error it is perfectly possible that he does this before the exclusive fence is finished.
>>
>> I'm trying to convince Daniel that this is a problem for years :)
>>
> I have met problems, eviction has race with bo relase.  system memory is overwried by sDMA. the kernel is 4.19, stable one, LOL.

Ok sounds like we add some shared fence which doesn't depend on the 
exclusive one to finish. That is of course highly problematic for the 
current handling.

It might be that this happens with the TTM move fence, but I don't of 
hand know either how to prevent that.

Question at Daniel and others: Can we finally drop this assumption that 
all shared fences must finish after the exclusive one?

Thanks for pointing this out Xinhui,
Christian.

>
> amdgpu add excl fence to bo to move system memory which is done by the drm scheduler.
> after sDMA finish the moving job,  the memory might have already been released as dma_resv_test_signaled_rcu did not check excl fence.
>
> Our local customer report this issue. I took 4 days into it. sigh
>
> thanks
> xinhui
>
>> Regards,
>> Christian.
>>
>>> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
>>> ---
>>>   drivers/dma-buf/dma-resv.c | 9 +++++----
>>>   1 file changed, 5 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
>>> index 4264e64788c4..44dc64c547c6 100644
>>> --- a/drivers/dma-buf/dma-resv.c
>>> +++ b/drivers/dma-buf/dma-resv.c
>>> @@ -632,14 +632,14 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
>>>    */
>>>   bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
>>>   {
>>> -	unsigned seq, shared_count;
>>> +	unsigned int seq, shared_count, left;
>>>   	int ret;
>>>     	rcu_read_lock();
>>>   retry:
>>>   	ret = true;
>>>   	shared_count = 0;
>>> -	seq = read_seqcount_begin(&obj->seq);
>>> +	left = seq = read_seqcount_begin(&obj->seq);
>>>     	if (test_all) {
>>>   		unsigned i;
>>> @@ -647,7 +647,7 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
>>>   		struct dma_resv_list *fobj = rcu_dereference(obj->fence);
>>>     		if (fobj)
>>> -			shared_count = fobj->shared_count;
>>> +			left = shared_count = fobj->shared_count;
>>>     		for (i = 0; i < shared_count; ++i) {
>>>   			struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
>>> @@ -657,13 +657,14 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
>>>   				goto retry;
>>>   			else if (!ret)
>>>   				break;
>>> +			left--;
>>>   		}
>>>     		if (read_seqcount_retry(&obj->seq, seq))
>>>   			goto retry;
>>>   	}
>>>   -	if (!shared_count) {
>>> +	if (!left) {
>>>   		struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl);
>>>     		if (fence_excl) {
Pan, Xinhui Feb. 23, 2020, 12:21 p.m. UTC | #4
> 2020年2月23日 20:04,Koenig, Christian <Christian.Koenig@amd.com> 写道:
> 
> Am 23.02.20 um 12:56 schrieb Pan, Xinhui:
>> If shared fence list is not empty, even we want to test all fences, excl fence is ignored.
>> That is abviously wrong, so fix it.
> 
> Yeah that is a known issue and I completely agree with you, but other disagree.
> 
> See the shared fences are meant to depend on the exclusive fence. So all shared fences must finish only after the exclusive one has finished as well.
fair enough.

> The problem now is that for error handling this isn't necessary true. In other words when a shared fence completes with an error it is perfectly possible that he does this before the exclusive fence is finished.
> 
> I'm trying to convince Daniel that this is a problem for years :)
> 

I have met problems, eviction has race with bo relase.  system memory is overwried by sDMA. the kernel is 4.19, stable one, LOL.

amdgpu add excl fence to bo to move system memory which is done by the drm scheduler.
after sDMA finish the moving job,  the memory might have already been released as dma_resv_test_signaled_rcu did not check excl fence.

Our local customer report this issue. I took 4 days into it. sigh

thanks
xinhui

> Regards,
> Christian.
> 
>> 
>> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
>> ---
>>  drivers/dma-buf/dma-resv.c | 9 +++++----
>>  1 file changed, 5 insertions(+), 4 deletions(-)
>> 
>> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
>> index 4264e64788c4..44dc64c547c6 100644
>> --- a/drivers/dma-buf/dma-resv.c
>> +++ b/drivers/dma-buf/dma-resv.c
>> @@ -632,14 +632,14 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
>>   */
>>  bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
>>  {
>> -	unsigned seq, shared_count;
>> +	unsigned int seq, shared_count, left;
>>  	int ret;
>>    	rcu_read_lock();
>>  retry:
>>  	ret = true;
>>  	shared_count = 0;
>> -	seq = read_seqcount_begin(&obj->seq);
>> +	left = seq = read_seqcount_begin(&obj->seq);
>>    	if (test_all) {
>>  		unsigned i;
>> @@ -647,7 +647,7 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
>>  		struct dma_resv_list *fobj = rcu_dereference(obj->fence);
>>    		if (fobj)
>> -			shared_count = fobj->shared_count;
>> +			left = shared_count = fobj->shared_count;
>>    		for (i = 0; i < shared_count; ++i) {
>>  			struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
>> @@ -657,13 +657,14 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
>>  				goto retry;
>>  			else if (!ret)
>>  				break;
>> +			left--;
>>  		}
>>    		if (read_seqcount_retry(&obj->seq, seq))
>>  			goto retry;
>>  	}
>>  -	if (!shared_count) {
>> +	if (!left) {
>>  		struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl);
>>    		if (fence_excl) {
>
Daniel Vetter Feb. 25, 2020, 5:23 p.m. UTC | #5
On Sun, Feb 23, 2020 at 01:04:15PM +0100, Christian König wrote:
> Am 23.02.20 um 12:56 schrieb Pan, Xinhui:
> > If shared fence list is not empty, even we want to test all fences, excl fence is ignored.
> > That is abviously wrong, so fix it.
> 
> Yeah that is a known issue and I completely agree with you, but other
> disagree.
> 
> See the shared fences are meant to depend on the exclusive fence. So all
> shared fences must finish only after the exclusive one has finished as well.
> 
> The problem now is that for error handling this isn't necessary true. In
> other words when a shared fence completes with an error it is perfectly
> possible that he does this before the exclusive fence is finished.
> 
> I'm trying to convince Daniel that this is a problem for years :)

I thought the consensus is that reasonable gpu schedulers and gpu reset
code should try to make really, really sure it only completes stuff in
sequence? That's at least my take away from the syncobj timeline
discussion, where you convinced me we shouldn't just crash&burn.

I think as long as your scheduler is competent and your gpu reset tries to
limit damage (i.e. kill offending ctx terminally, mark everything else
that didn't complete for re-running) we should end up with everything
completing in sequence. I guess if you do kill a lot more stuff, then
you'd have to push these through your scheduler as dummy jobs, i.e. they
still wait for their dependencies, but then all they do is set the
dma_fence error and complete it. Maybe something the common scheduler
could do.
-Daniel

> 
> Regards,
> Christian.
> 
> > 
> > Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> > ---
> >   drivers/dma-buf/dma-resv.c | 9 +++++----
> >   1 file changed, 5 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> > index 4264e64788c4..44dc64c547c6 100644
> > --- a/drivers/dma-buf/dma-resv.c
> > +++ b/drivers/dma-buf/dma-resv.c
> > @@ -632,14 +632,14 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
> >    */
> >   bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
> >   {
> > -	unsigned seq, shared_count;
> > +	unsigned int seq, shared_count, left;
> >   	int ret;
> >   	rcu_read_lock();
> >   retry:
> >   	ret = true;
> >   	shared_count = 0;
> > -	seq = read_seqcount_begin(&obj->seq);
> > +	left = seq = read_seqcount_begin(&obj->seq);
> >   	if (test_all) {
> >   		unsigned i;
> > @@ -647,7 +647,7 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
> >   		struct dma_resv_list *fobj = rcu_dereference(obj->fence);
> >   		if (fobj)
> > -			shared_count = fobj->shared_count;
> > +			left = shared_count = fobj->shared_count;
> >   		for (i = 0; i < shared_count; ++i) {
> >   			struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
> > @@ -657,13 +657,14 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
> >   				goto retry;
> >   			else if (!ret)
> >   				break;
> > +			left--;
> >   		}
> >   		if (read_seqcount_retry(&obj->seq, seq))
> >   			goto retry;
> >   	}
> > -	if (!shared_count) {
> > +	if (!left) {
> >   		struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl);
> >   		if (fence_excl) {
>
Christian König Feb. 25, 2020, 7:11 p.m. UTC | #6
Am 25.02.20 um 18:23 schrieb Daniel Vetter:
> On Sun, Feb 23, 2020 at 01:04:15PM +0100, Christian König wrote:
>> Am 23.02.20 um 12:56 schrieb Pan, Xinhui:
>>> If shared fence list is not empty, even we want to test all fences, excl fence is ignored.
>>> That is abviously wrong, so fix it.
>> Yeah that is a known issue and I completely agree with you, but other
>> disagree.
>>
>> See the shared fences are meant to depend on the exclusive fence. So all
>> shared fences must finish only after the exclusive one has finished as well.
>>
>> The problem now is that for error handling this isn't necessary true. In
>> other words when a shared fence completes with an error it is perfectly
>> possible that he does this before the exclusive fence is finished.
>>
>> I'm trying to convince Daniel that this is a problem for years :)
> I thought the consensus is that reasonable gpu schedulers and gpu reset
> code should try to make really, really sure it only completes stuff in
> sequence? That's at least my take away from the syncobj timeline
> discussion, where you convinced me we shouldn't just crash&burn.
>
> I think as long as your scheduler is competent and your gpu reset tries to
> limit damage (i.e. kill offending ctx terminally, mark everything else
> that didn't complete for re-running) we should end up with everything
> completing in sequence. I guess if you do kill a lot more stuff, then
> you'd have to push these through your scheduler as dummy jobs, i.e. they
> still wait for their dependencies, but then all they do is set the
> dma_fence error and complete it. Maybe something the common scheduler
> could do.

Yes, that's exactly how we currently implement it. But I still think 
that this is not necessary the best approach :)

Anyway Xinhui's problem turned out to be deeper. We somehow add an old 
stale fence to the dma_resv object sometimes and that can result in 
quite a bunch of problems.

I'm currently trying to hunt down what's going wrong here in more detail.

Regards,
Christian.

> -Daniel
>
>> Regards,
>> Christian.
>>
>>> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
>>> ---
>>>    drivers/dma-buf/dma-resv.c | 9 +++++----
>>>    1 file changed, 5 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
>>> index 4264e64788c4..44dc64c547c6 100644
>>> --- a/drivers/dma-buf/dma-resv.c
>>> +++ b/drivers/dma-buf/dma-resv.c
>>> @@ -632,14 +632,14 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
>>>     */
>>>    bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
>>>    {
>>> -	unsigned seq, shared_count;
>>> +	unsigned int seq, shared_count, left;
>>>    	int ret;
>>>    	rcu_read_lock();
>>>    retry:
>>>    	ret = true;
>>>    	shared_count = 0;
>>> -	seq = read_seqcount_begin(&obj->seq);
>>> +	left = seq = read_seqcount_begin(&obj->seq);
>>>    	if (test_all) {
>>>    		unsigned i;
>>> @@ -647,7 +647,7 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
>>>    		struct dma_resv_list *fobj = rcu_dereference(obj->fence);
>>>    		if (fobj)
>>> -			shared_count = fobj->shared_count;
>>> +			left = shared_count = fobj->shared_count;
>>>    		for (i = 0; i < shared_count; ++i) {
>>>    			struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
>>> @@ -657,13 +657,14 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
>>>    				goto retry;
>>>    			else if (!ret)
>>>    				break;
>>> +			left--;
>>>    		}
>>>    		if (read_seqcount_retry(&obj->seq, seq))
>>>    			goto retry;
>>>    	}
>>> -	if (!shared_count) {
>>> +	if (!left) {
>>>    		struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl);
>>>    		if (fence_excl) {
Pan, Xinhui Feb. 28, 2020, 5:45 a.m. UTC | #7
> 2020年2月26日 03:11,Koenig, Christian <Christian.Koenig@amd.com> 写道:
> 
> Am 25.02.20 um 18:23 schrieb Daniel Vetter:
>> On Sun, Feb 23, 2020 at 01:04:15PM +0100, Christian König wrote:
>>> Am 23.02.20 um 12:56 schrieb Pan, Xinhui:
>>>> If shared fence list is not empty, even we want to test all fences, excl fence is ignored.
>>>> That is abviously wrong, so fix it.
>>> Yeah that is a known issue and I completely agree with you, but other
>>> disagree.
>>> 
>>> See the shared fences are meant to depend on the exclusive fence. So all
>>> shared fences must finish only after the exclusive one has finished as well.
>>> 
>>> The problem now is that for error handling this isn't necessary true. In
>>> other words when a shared fence completes with an error it is perfectly
>>> possible that he does this before the exclusive fence is finished.
>>> 
>>> I'm trying to convince Daniel that this is a problem for years :)
>> I thought the consensus is that reasonable gpu schedulers and gpu reset
>> code should try to make really, really sure it only completes stuff in
>> sequence? That's at least my take away from the syncobj timeline
>> discussion, where you convinced me we shouldn't just crash&burn.
>> 
>> I think as long as your scheduler is competent and your gpu reset tries to
>> limit damage (i.e. kill offending ctx terminally, mark everything else
>> that didn't complete for re-running) we should end up with everything
>> completing in sequence. I guess if you do kill a lot more stuff, then
>> you'd have to push these through your scheduler as dummy jobs, i.e. they
>> still wait for their dependencies, but then all they do is set the
>> dma_fence error and complete it. Maybe something the common scheduler
>> could do.
> 
> Yes, that's exactly how we currently implement it. But I still think that this is not necessary the best approach :)
> 
> Anyway Xinhui's problem turned out to be deeper. We somehow add an old stale fence to the dma_resv object sometimes and that can result in quite a bunch of problems.
> 
> I'm currently trying to hunt down what's going wrong here in more detail.

got some backtrace below.

add excl fence:

<4>[ 1203.904748]          ttm_bo_pipeline_move+0x74/0x368 [ttm]
<4>[ 1203.904809]          amdgpu_move_blit.isra.8+0xf4/0x108 [amdgpu]
<4>[ 1203.904870]          amdgpu_bo_move+0x88/0x208 [amdgpu]
<4>[ 1203.904881]          ttm_bo_handle_move_mem+0x250/0x498 [ttm]
<4>[ 1203.904888]          ttm_bo_evict+0x12c/0x1c8 [ttm]
<4>[ 1203.904895]          ttm_mem_evict_first+0x1d0/0x2c8 [ttm]
<4>[ 1203.904903]          ttm_bo_mem_space+0x2f4/0x498 [ttm]
<4>[ 1203.904913]          ttm_bo_validate+0xdc/0x168 [ttm]
<4>[ 1203.904975]          amdgpu_cs_bo_validate+0xb0/0x230 [amdgpu]
<4>[ 1203.905038]          amdgpu_cs_validate+0x60/0x2b8 [amdgpu]
<4>[ 1203.905099]          amdgpu_cs_list_validate+0xb8/0x1a8 [amdgpu]
<4>[ 1203.905161]          amdgpu_cs_ioctl+0x12b0/0x1598 [amdgpu]
<4>[ 1203.905186]          drm_ioctl_kernel+0x94/0x118 [drm]
<4>[ 1203.905210]          drm_ioctl+0x1f0/0x438 [drm]
<4>[ 1203.905271]          amdgpu_drm_ioctl+0x58/0x90 [amdgpu]
<4>[ 1203.905275]          do_vfs_ioctl+0xc4/0x8c0
<4>[ 1203.905279]          ksys_ioctl+0x8c/0xa0

add shared fence:

<4>[ 1203.905349]          amdgpu_bo_fence+0x6c/0x80 [amdgpu]
<4>[ 1203.905410]          amdgpu_gem_object_close+0x194/0x1d0 [amdgpu]
<4>[ 1203.905435]          drm_gem_object_release_handle+0x3c/0x98 [drm]
<4>[ 1203.905438]          idr_for_each+0x70/0x128
<4>[ 1203.905463]          drm_gem_release+0x30/0x48 [drm]
<4>[ 1203.905486]          drm_file_free.part.0+0x258/0x2f0 [drm]
<4>[ 1203.905511]          drm_release+0x9c/0xe0 [drm]
<4>[ 1203.905514]          __fput+0xac/0x218
<4>[ 1203.905518]          ____fput+0x20/0x30
<4>[ 1203.905521]          task_work_run+0xb8/0xf0
<4>[ 1203.905523]          do_exit+0x398/0xaf8
<4>[ 1203.905525]          do_group_exit+0x3c/0xd0
<4>[ 1203.905527]          get_signal+0xec/0x740
<4>[ 1203.905529]          do_signal+0x88/0x288
<4>[ 1203.905531]          do_notify_resume+0xd8/0x130
<4>[ 1203.905533]          work_pending+0x8/0x10

we are using kernel 4.19.104.

The problem is that, eviction on PT/PD submit one job and add excl fence to bo->resv.

And if application is got killed,  amdgpu_gem_object_close will try to clear PT/PD, and submit one job.
I take a look at the code, it will sync root.base.bo->resv. and add the fence to bo as shared.

So the fence used in clear PT/PD does not sync bo->resv actually. 

amdgpu_vm_bo_update_mapping take excl fence as a parameter for sync.
But amdgpu_vm_clear_freed did not.


thanks
xinhui


> 
> Regards,
> Christian.
> 
>> -Daniel
>> 
>>> Regards,
>>> Christian.
>>> 
>>>> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
>>>> ---
>>>>   drivers/dma-buf/dma-resv.c | 9 +++++----
>>>>   1 file changed, 5 insertions(+), 4 deletions(-)
>>>> 
>>>> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
>>>> index 4264e64788c4..44dc64c547c6 100644
>>>> --- a/drivers/dma-buf/dma-resv.c
>>>> +++ b/drivers/dma-buf/dma-resv.c
>>>> @@ -632,14 +632,14 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
>>>>    */
>>>>   bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
>>>>   {
>>>> -	unsigned seq, shared_count;
>>>> +	unsigned int seq, shared_count, left;
>>>>   	int ret;
>>>>   	rcu_read_lock();
>>>>   retry:
>>>>   	ret = true;
>>>>   	shared_count = 0;
>>>> -	seq = read_seqcount_begin(&obj->seq);
>>>> +	left = seq = read_seqcount_begin(&obj->seq);
>>>>   	if (test_all) {
>>>>   		unsigned i;
>>>> @@ -647,7 +647,7 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
>>>>   		struct dma_resv_list *fobj = rcu_dereference(obj->fence);
>>>>   		if (fobj)
>>>> -			shared_count = fobj->shared_count;
>>>> +			left = shared_count = fobj->shared_count;
>>>>   		for (i = 0; i < shared_count; ++i) {
>>>>   			struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
>>>> @@ -657,13 +657,14 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
>>>>   				goto retry;
>>>>   			else if (!ret)
>>>>   				break;
>>>> +			left--;
>>>>   		}
>>>>   		if (read_seqcount_retry(&obj->seq, seq))
>>>>   			goto retry;
>>>>   	}
>>>> -	if (!shared_count) {
>>>> +	if (!left) {
>>>>   		struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl);
>>>>   		if (fence_excl) {
>
Pan, Xinhui Feb. 28, 2020, 6:08 a.m. UTC | #8
> 2020年2月28日 13:45,panxinhui <xpan@amd.com> 写道:
> 
> 
> 
>> 2020年2月26日 03:11,Koenig, Christian <Christian.Koenig@amd.com> 写道:
>> 
>> Am 25.02.20 um 18:23 schrieb Daniel Vetter:
>>> On Sun, Feb 23, 2020 at 01:04:15PM +0100, Christian König wrote:
>>>> Am 23.02.20 um 12:56 schrieb Pan, Xinhui:
>>>>> If shared fence list is not empty, even we want to test all fences, excl fence is ignored.
>>>>> That is abviously wrong, so fix it.
>>>> Yeah that is a known issue and I completely agree with you, but other
>>>> disagree.
>>>> 
>>>> See the shared fences are meant to depend on the exclusive fence. So all
>>>> shared fences must finish only after the exclusive one has finished as well.
>>>> 
>>>> The problem now is that for error handling this isn't necessary true. In
>>>> other words when a shared fence completes with an error it is perfectly
>>>> possible that he does this before the exclusive fence is finished.
>>>> 
>>>> I'm trying to convince Daniel that this is a problem for years :)
>>> I thought the consensus is that reasonable gpu schedulers and gpu reset
>>> code should try to make really, really sure it only completes stuff in
>>> sequence? That's at least my take away from the syncobj timeline
>>> discussion, where you convinced me we shouldn't just crash&burn.
>>> 
>>> I think as long as your scheduler is competent and your gpu reset tries to
>>> limit damage (i.e. kill offending ctx terminally, mark everything else
>>> that didn't complete for re-running) we should end up with everything
>>> completing in sequence. I guess if you do kill a lot more stuff, then
>>> you'd have to push these through your scheduler as dummy jobs, i.e. they
>>> still wait for their dependencies, but then all they do is set the
>>> dma_fence error and complete it. Maybe something the common scheduler
>>> could do.
>> 
>> Yes, that's exactly how we currently implement it. But I still think that this is not necessary the best approach :)
>> 
>> Anyway Xinhui's problem turned out to be deeper. We somehow add an old stale fence to the dma_resv object sometimes and that can result in quite a bunch of problems.
>> 
>> I'm currently trying to hunt down what's going wrong here in more detail.
> 
> got some backtrace below.
> 
> add excl fence:
> 
> <4>[ 1203.904748]          ttm_bo_pipeline_move+0x74/0x368 [ttm]
> <4>[ 1203.904809]          amdgpu_move_blit.isra.8+0xf4/0x108 [amdgpu]
> <4>[ 1203.904870]          amdgpu_bo_move+0x88/0x208 [amdgpu]
> <4>[ 1203.904881]          ttm_bo_handle_move_mem+0x250/0x498 [ttm]
> <4>[ 1203.904888]          ttm_bo_evict+0x12c/0x1c8 [ttm]
> <4>[ 1203.904895]          ttm_mem_evict_first+0x1d0/0x2c8 [ttm]
> <4>[ 1203.904903]          ttm_bo_mem_space+0x2f4/0x498 [ttm]
> <4>[ 1203.904913]          ttm_bo_validate+0xdc/0x168 [ttm]
> <4>[ 1203.904975]          amdgpu_cs_bo_validate+0xb0/0x230 [amdgpu]
> <4>[ 1203.905038]          amdgpu_cs_validate+0x60/0x2b8 [amdgpu]
> <4>[ 1203.905099]          amdgpu_cs_list_validate+0xb8/0x1a8 [amdgpu]
> <4>[ 1203.905161]          amdgpu_cs_ioctl+0x12b0/0x1598 [amdgpu]
> <4>[ 1203.905186]          drm_ioctl_kernel+0x94/0x118 [drm]
> <4>[ 1203.905210]          drm_ioctl+0x1f0/0x438 [drm]
> <4>[ 1203.905271]          amdgpu_drm_ioctl+0x58/0x90 [amdgpu]
> <4>[ 1203.905275]          do_vfs_ioctl+0xc4/0x8c0
> <4>[ 1203.905279]          ksys_ioctl+0x8c/0xa0
> 
> add shared fence:
> 
> <4>[ 1203.905349]          amdgpu_bo_fence+0x6c/0x80 [amdgpu]
> <4>[ 1203.905410]          amdgpu_gem_object_close+0x194/0x1d0 [amdgpu]
> <4>[ 1203.905435]          drm_gem_object_release_handle+0x3c/0x98 [drm]
> <4>[ 1203.905438]          idr_for_each+0x70/0x128
> <4>[ 1203.905463]          drm_gem_release+0x30/0x48 [drm]
> <4>[ 1203.905486]          drm_file_free.part.0+0x258/0x2f0 [drm]
> <4>[ 1203.905511]          drm_release+0x9c/0xe0 [drm]
> <4>[ 1203.905514]          __fput+0xac/0x218
> <4>[ 1203.905518]          ____fput+0x20/0x30
> <4>[ 1203.905521]          task_work_run+0xb8/0xf0
> <4>[ 1203.905523]          do_exit+0x398/0xaf8
> <4>[ 1203.905525]          do_group_exit+0x3c/0xd0
> <4>[ 1203.905527]          get_signal+0xec/0x740
> <4>[ 1203.905529]          do_signal+0x88/0x288
> <4>[ 1203.905531]          do_notify_resume+0xd8/0x130
> <4>[ 1203.905533]          work_pending+0x8/0x10
> 
> we are using kernel 4.19.104.
> 
> The problem is that, eviction on PT/PD submit one job and add excl fence to bo->resv.
> 
> And if application is got killed,  amdgpu_gem_object_close will try to clear PT/PD, and submit one job.
> I take a look at the code, it will sync root.base.bo->resv. and add the fence to bo as shared.
> 
> So the fence used in clear PT/PD does not sync bo->resv actually. 

wait,  gem_object_open will check bo->resv == root.base.bo->resv. So there is race like below?

amdgpu_vm_bo_update_mapping 			ttm_bo_pipeline_move

sycn_resv
										submit job
										add excl fence
submit job
add shared fence


In the latest code, I notice amdgpu_vm_bo_update_mapping will hole the eviction lock. so no race there.

thanks
xinhui

> 
> amdgpu_vm_bo_update_mapping take excl fence as a parameter for sync.
> But amdgpu_vm_clear_freed did not.
> 
> 
> thanks
> xinhui
> 
> 
>> 
>> Regards,
>> Christian.
>> 
>>> -Daniel
>>> 
>>>> Regards,
>>>> Christian.
>>>> 
>>>>> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
>>>>> ---
>>>>>  drivers/dma-buf/dma-resv.c | 9 +++++----
>>>>>  1 file changed, 5 insertions(+), 4 deletions(-)
>>>>> 
>>>>> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
>>>>> index 4264e64788c4..44dc64c547c6 100644
>>>>> --- a/drivers/dma-buf/dma-resv.c
>>>>> +++ b/drivers/dma-buf/dma-resv.c
>>>>> @@ -632,14 +632,14 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
>>>>>   */
>>>>>  bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
>>>>>  {
>>>>> -	unsigned seq, shared_count;
>>>>> +	unsigned int seq, shared_count, left;
>>>>>  	int ret;
>>>>>  	rcu_read_lock();
>>>>>  retry:
>>>>>  	ret = true;
>>>>>  	shared_count = 0;
>>>>> -	seq = read_seqcount_begin(&obj->seq);
>>>>> +	left = seq = read_seqcount_begin(&obj->seq);
>>>>>  	if (test_all) {
>>>>>  		unsigned i;
>>>>> @@ -647,7 +647,7 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
>>>>>  		struct dma_resv_list *fobj = rcu_dereference(obj->fence);
>>>>>  		if (fobj)
>>>>> -			shared_count = fobj->shared_count;
>>>>> +			left = shared_count = fobj->shared_count;
>>>>>  		for (i = 0; i < shared_count; ++i) {
>>>>>  			struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
>>>>> @@ -657,13 +657,14 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
>>>>>  				goto retry;
>>>>>  			else if (!ret)
>>>>>  				break;
>>>>> +			left--;
>>>>>  		}
>>>>>  		if (read_seqcount_retry(&obj->seq, seq))
>>>>>  			goto retry;
>>>>>  	}
>>>>> -	if (!shared_count) {
>>>>> +	if (!left) {
>>>>>  		struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl);
>>>>>  		if (fence_excl) {
>> 
>
diff mbox series

Patch

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 4264e64788c4..44dc64c547c6 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -632,14 +632,14 @@  static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
  */
 bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
 {
-	unsigned seq, shared_count;
+	unsigned int seq, shared_count, left;
 	int ret;
 
 	rcu_read_lock();
 retry:
 	ret = true;
 	shared_count = 0;
-	seq = read_seqcount_begin(&obj->seq);
+	left = seq = read_seqcount_begin(&obj->seq);
 
 	if (test_all) {
 		unsigned i;
@@ -647,7 +647,7 @@  bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
 		struct dma_resv_list *fobj = rcu_dereference(obj->fence);
 
 		if (fobj)
-			shared_count = fobj->shared_count;
+			left = shared_count = fobj->shared_count;
 
 		for (i = 0; i < shared_count; ++i) {
 			struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
@@ -657,13 +657,14 @@  bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
 				goto retry;
 			else if (!ret)
 				break;
+			left--;
 		}
 
 		if (read_seqcount_retry(&obj->seq, seq))
 			goto retry;
 	}
 
-	if (!shared_count) {
+	if (!left) {
 		struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl);
 
 		if (fence_excl) {