diff mbox series

[3/6] drm/scheduler: Job timeout handler returns status

Message ID 20201125031708.6433-4-luben.tuikov@amd.com (mailing list archive)
State New, archived
Headers show
Series Allow to extend the timeout without jobs disappearing | expand

Commit Message

Luben Tuikov Nov. 25, 2020, 3:17 a.m. UTC
The job timeout handler now returns status
indicating back to the DRM layer whether the job
was successfully cancelled or whether more time
should be given to the job to complete.

Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 ++++--
 include/drm/gpu_scheduler.h             | 13 ++++++++++---
 2 files changed, 14 insertions(+), 5 deletions(-)

Comments

kernel test robot Nov. 25, 2020, 4:41 a.m. UTC | #1
Hi Luben,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.10-rc5 next-20201124]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Luben-Tuikov/Allow-to-extend-the-timeout-without-jobs-disappearing/20201125-111945
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 127c501a03d5db8b833e953728d3bcf53c8832a9
config: nds32-randconfig-s032-20201125 (attached as .config)
compiler: nds32le-linux-gcc (GCC) 9.3.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # apt-get install sparse
        # sparse version: v0.6.3-151-g540c2c4b-dirty
        # https://github.com/0day-ci/linux/commit/14b618148200370c3b43498550534c17d50218fc
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Luben-Tuikov/Allow-to-extend-the-timeout-without-jobs-disappearing/20201125-111945
        git checkout 14b618148200370c3b43498550534c17d50218fc
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=nds32 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> drivers/gpu/drm/etnaviv/etnaviv_sched.c:140:18: error: initialization of 'int (*)(struct drm_sched_job *)' from incompatible pointer type 'void (*)(struct drm_sched_job *)' [-Werror=incompatible-pointer-types]
     140 |  .timedout_job = etnaviv_sched_timedout_job,
         |                  ^~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/etnaviv/etnaviv_sched.c:140:18: note: (near initialization for 'etnaviv_sched_ops.timedout_job')
   cc1: some warnings being treated as errors
--
   drivers/gpu/drm/lima/lima_sched.c: In function 'lima_sched_run_job':
   drivers/gpu/drm/lima/lima_sched.c:226:20: warning: variable 'ret' set but not used [-Wunused-but-set-variable]
     226 |  struct dma_fence *ret;
         |                    ^~~
   drivers/gpu/drm/lima/lima_sched.c: At top level:
>> drivers/gpu/drm/lima/lima_sched.c:472:18: error: initialization of 'int (*)(struct drm_sched_job *)' from incompatible pointer type 'void (*)(struct drm_sched_job *)' [-Werror=incompatible-pointer-types]
     472 |  .timedout_job = lima_sched_timedout_job,
         |                  ^~~~~~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/lima/lima_sched.c:472:18: note: (near initialization for 'lima_sched_ops.timedout_job')
   cc1: some warnings being treated as errors

vim +140 drivers/gpu/drm/etnaviv/etnaviv_sched.c

e93b6deeb45a78 Lucas Stach 2017-12-04  136  
e93b6deeb45a78 Lucas Stach 2017-12-04  137  static const struct drm_sched_backend_ops etnaviv_sched_ops = {
e93b6deeb45a78 Lucas Stach 2017-12-04  138  	.dependency = etnaviv_sched_dependency,
e93b6deeb45a78 Lucas Stach 2017-12-04  139  	.run_job = etnaviv_sched_run_job,
e93b6deeb45a78 Lucas Stach 2017-12-04 @140  	.timedout_job = etnaviv_sched_timedout_job,
e93b6deeb45a78 Lucas Stach 2017-12-04  141  	.free_job = etnaviv_sched_free_job,
e93b6deeb45a78 Lucas Stach 2017-12-04  142  };
e93b6deeb45a78 Lucas Stach 2017-12-04  143  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Christian König Nov. 25, 2020, 9:50 a.m. UTC | #2
Am 25.11.20 um 04:17 schrieb Luben Tuikov:
> The job timeout handler now returns status
> indicating back to the DRM layer whether the job
> was successfully cancelled or whether more time
> should be given to the job to complete.
>
> Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 ++++--
>   include/drm/gpu_scheduler.h             | 13 ++++++++++---
>   2 files changed, 14 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index ff48101bab55..81b73790ecc6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -28,7 +28,7 @@
>   #include "amdgpu.h"
>   #include "amdgpu_trace.h"
>   
> -static void amdgpu_job_timedout(struct drm_sched_job *s_job)
> +static int amdgpu_job_timedout(struct drm_sched_job *s_job)
>   {
>   	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
>   	struct amdgpu_job *job = to_amdgpu_job(s_job);
> @@ -41,7 +41,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>   	    amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
>   		DRM_ERROR("ring %s timeout, but soft recovered\n",
>   			  s_job->sched->name);
> -		return;
> +		return 0;
>   	}
>   
>   	amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
> @@ -53,10 +53,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>   
>   	if (amdgpu_device_should_recover_gpu(ring->adev)) {
>   		amdgpu_device_gpu_recover(ring->adev, job);
> +		return 0;
>   	} else {
>   		drm_sched_suspend_timeout(&ring->sched);
>   		if (amdgpu_sriov_vf(adev))
>   			adev->virt.tdr_debug = true;
> +		return 1;
>   	}
>   }
>   
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 2e0c368e19f6..61f7121e1c19 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -230,10 +230,17 @@ struct drm_sched_backend_ops {
>   	struct dma_fence *(*run_job)(struct drm_sched_job *sched_job);
>   
>   	/**
> -         * @timedout_job: Called when a job has taken too long to execute,
> -         * to trigger GPU recovery.
> +	 * @timedout_job: Called when a job has taken too long to execute,
> +	 * to trigger GPU recovery.
> +	 *
> +	 * Return 0, if the job has been aborted successfully and will
> +	 * never be heard of from the device. Return non-zero if the
> +	 * job wasn't able to be aborted, i.e. if more time should be
> +	 * given to this job. The result is not "bool" as this
> +	 * function is not a predicate, although its result may seem
> +	 * as one.

I think the whole approach of timing out a job needs to be rethinked. 
What's timing out here is the hardware engine, not the job.

So we should also not have the job as parameter here. Maybe we should 
make that the fence we are waiting for instead.

>   	 */
> -	void (*timedout_job)(struct drm_sched_job *sched_job);
> +	int (*timedout_job)(struct drm_sched_job *sched_job);

I would either return an error code, boolean or enum here. But not use a 
number without a define.

Regards,
Christian.

>   
>   	/**
>            * @free_job: Called once the job's finished fence has been signaled
Steven Price Nov. 25, 2020, 11:04 a.m. UTC | #3
On 25/11/2020 03:17, Luben Tuikov wrote:
> The job timeout handler now returns status
> indicating back to the DRM layer whether the job
> was successfully cancelled or whether more time
> should be given to the job to complete.

I'm not sure I understand in what circumstances you would want to give 
the job more time to complete. Could you expand on that?

One thing we're missing at the moment in Panfrost is the ability to 
suspend ("soft stop" is the Mali jargon) a job and pick something else 
to run. The propitiatory driver stack uses this to avoid timing out long 
running jobs while still allowing other processes to have time on the 
GPU. But this interface as it stands doesn't seem to provide that.

As the kernel test robot has already pointed out - you'll need to at the 
very least update the other uses of this interface.

Steve

> 
> Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 ++++--
>   include/drm/gpu_scheduler.h             | 13 ++++++++++---
>   2 files changed, 14 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index ff48101bab55..81b73790ecc6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -28,7 +28,7 @@
>   #include "amdgpu.h"
>   #include "amdgpu_trace.h"
>   
> -static void amdgpu_job_timedout(struct drm_sched_job *s_job)
> +static int amdgpu_job_timedout(struct drm_sched_job *s_job)
>   {
>   	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
>   	struct amdgpu_job *job = to_amdgpu_job(s_job);
> @@ -41,7 +41,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>   	    amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
>   		DRM_ERROR("ring %s timeout, but soft recovered\n",
>   			  s_job->sched->name);
> -		return;
> +		return 0;
>   	}
>   
>   	amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
> @@ -53,10 +53,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>   
>   	if (amdgpu_device_should_recover_gpu(ring->adev)) {
>   		amdgpu_device_gpu_recover(ring->adev, job);
> +		return 0;
>   	} else {
>   		drm_sched_suspend_timeout(&ring->sched);
>   		if (amdgpu_sriov_vf(adev))
>   			adev->virt.tdr_debug = true;
> +		return 1;
>   	}
>   }
>   
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 2e0c368e19f6..61f7121e1c19 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -230,10 +230,17 @@ struct drm_sched_backend_ops {
>   	struct dma_fence *(*run_job)(struct drm_sched_job *sched_job);
>   
>   	/**
> -         * @timedout_job: Called when a job has taken too long to execute,
> -         * to trigger GPU recovery.
> +	 * @timedout_job: Called when a job has taken too long to execute,
> +	 * to trigger GPU recovery.
> +	 *
> +	 * Return 0, if the job has been aborted successfully and will
> +	 * never be heard of from the device. Return non-zero if the
> +	 * job wasn't able to be aborted, i.e. if more time should be
> +	 * given to this job. The result is not "bool" as this
> +	 * function is not a predicate, although its result may seem
> +	 * as one.
>   	 */
> -	void (*timedout_job)(struct drm_sched_job *sched_job);
> +	int (*timedout_job)(struct drm_sched_job *sched_job);
>   
>   	/**
>            * @free_job: Called once the job's finished fence has been signaled
>
Lucas Stach Nov. 25, 2020, 11:15 a.m. UTC | #4
Am Mittwoch, den 25.11.2020, 11:04 +0000 schrieb Steven Price:
> On 25/11/2020 03:17, Luben Tuikov wrote:
> > The job timeout handler now returns status
> > indicating back to the DRM layer whether the job
> > was successfully cancelled or whether more time
> > should be given to the job to complete.
> 
> I'm not sure I understand in what circumstances you would want to give 
> the job more time to complete. Could you expand on that?

On etnaviv we don't have the ability to preempt a running job, but we
can look at the GPU state to determine if it's still making progress
with the current job, so we want to extend the timeout in that case to
not kill a long running but valid job.

Regards,
Lucas

> One thing we're missing at the moment in Panfrost is the ability to 
> suspend ("soft stop" is the Mali jargon) a job and pick something else 
> to run. The propitiatory driver stack uses this to avoid timing out long 
> running jobs while still allowing other processes to have time on the 
> GPU. But this interface as it stands doesn't seem to provide that.
> 
> As the kernel test robot has already pointed out - you'll need to at the 
> very least update the other uses of this interface.
> 
> Steve
> 
> > Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 ++++--
> >   include/drm/gpu_scheduler.h             | 13 ++++++++++---
> >   2 files changed, 14 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > index ff48101bab55..81b73790ecc6 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > @@ -28,7 +28,7 @@
> >   #include "amdgpu.h"
> >   #include "amdgpu_trace.h"
> >   
> > -static void amdgpu_job_timedout(struct drm_sched_job *s_job)
> > +static int amdgpu_job_timedout(struct drm_sched_job *s_job)
> >   {
> >   	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
> >   	struct amdgpu_job *job = to_amdgpu_job(s_job);
> > @@ -41,7 +41,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
> >   	    amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
> >   		DRM_ERROR("ring %s timeout, but soft recovered\n",
> >   			  s_job->sched->name);
> > -		return;
> > +		return 0;
> >   	}
> >   
> >   	amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
> > @@ -53,10 +53,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
> >   
> >   	if (amdgpu_device_should_recover_gpu(ring->adev)) {
> >   		amdgpu_device_gpu_recover(ring->adev, job);
> > +		return 0;
> >   	} else {
> >   		drm_sched_suspend_timeout(&ring->sched);
> >   		if (amdgpu_sriov_vf(adev))
> >   			adev->virt.tdr_debug = true;
> > +		return 1;
> >   	}
> >   }
> >   
> > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> > index 2e0c368e19f6..61f7121e1c19 100644
> > --- a/include/drm/gpu_scheduler.h
> > +++ b/include/drm/gpu_scheduler.h
> > @@ -230,10 +230,17 @@ struct drm_sched_backend_ops {
> >   	struct dma_fence *(*run_job)(struct drm_sched_job *sched_job);
> >   
> >   	/**
> > -         * @timedout_job: Called when a job has taken too long to execute,
> > -         * to trigger GPU recovery.
> > +	 * @timedout_job: Called when a job has taken too long to execute,
> > +	 * to trigger GPU recovery.
> > +	 *
> > +	 * Return 0, if the job has been aborted successfully and will
> > +	 * never be heard of from the device. Return non-zero if the
> > +	 * job wasn't able to be aborted, i.e. if more time should be
> > +	 * given to this job. The result is not "bool" as this
> > +	 * function is not a predicate, although its result may seem
> > +	 * as one.
> >   	 */
> > -	void (*timedout_job)(struct drm_sched_job *sched_job);
> > +	int (*timedout_job)(struct drm_sched_job *sched_job);
> >   
> >   	/**
> >            * @free_job: Called once the job's finished fence has been signaled
> >
Steven Price Nov. 25, 2020, 11:22 a.m. UTC | #5
On 25/11/2020 11:15, Lucas Stach wrote:
> Am Mittwoch, den 25.11.2020, 11:04 +0000 schrieb Steven Price:
>> On 25/11/2020 03:17, Luben Tuikov wrote:
>>> The job timeout handler now returns status
>>> indicating back to the DRM layer whether the job
>>> was successfully cancelled or whether more time
>>> should be given to the job to complete.
>>
>> I'm not sure I understand in what circumstances you would want to give
>> the job more time to complete. Could you expand on that?
> 
> On etnaviv we don't have the ability to preempt a running job, but we
> can look at the GPU state to determine if it's still making progress
> with the current job, so we want to extend the timeout in that case to
> not kill a long running but valid job.

Ok, fair enough. Although from my experience (on Mali) jobs very rarely 
"get stuck" it's just that their run time can be excessive[1] causing 
other processes to not make forward progress. So I'd expect the timeout 
to be set based on how long a job can run before you need to stop it to 
allow other processes to run their jobs.

But I'm not familiar with etnaviv so perhaps stuck jobs are actually a 
thing there.

Thanks,

Steve

[1] Also on Mali it's quite possible to create an infinite duration job 
which appears to be making forward progress, so in that case our measure 
of progress isn't useful against these malicious jobs.

> Regards,
> Lucas
> 
>> One thing we're missing at the moment in Panfrost is the ability to
>> suspend ("soft stop" is the Mali jargon) a job and pick something else
>> to run. The propitiatory driver stack uses this to avoid timing out long
>> running jobs while still allowing other processes to have time on the
>> GPU. But this interface as it stands doesn't seem to provide that.
>>
>> As the kernel test robot has already pointed out - you'll need to at the
>> very least update the other uses of this interface.
>>
>> Steve
>>
>>> Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 ++++--
>>>    include/drm/gpu_scheduler.h             | 13 ++++++++++---
>>>    2 files changed, 14 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>> index ff48101bab55..81b73790ecc6 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>> @@ -28,7 +28,7 @@
>>>    #include "amdgpu.h"
>>>    #include "amdgpu_trace.h"
>>>    
>>> -static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>>> +static int amdgpu_job_timedout(struct drm_sched_job *s_job)
>>>    {
>>>    	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
>>>    	struct amdgpu_job *job = to_amdgpu_job(s_job);
>>> @@ -41,7 +41,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>>>    	    amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
>>>    		DRM_ERROR("ring %s timeout, but soft recovered\n",
>>>    			  s_job->sched->name);
>>> -		return;
>>> +		return 0;
>>>    	}
>>>    
>>>    	amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
>>> @@ -53,10 +53,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>>>    
>>>    	if (amdgpu_device_should_recover_gpu(ring->adev)) {
>>>    		amdgpu_device_gpu_recover(ring->adev, job);
>>> +		return 0;
>>>    	} else {
>>>    		drm_sched_suspend_timeout(&ring->sched);
>>>    		if (amdgpu_sriov_vf(adev))
>>>    			adev->virt.tdr_debug = true;
>>> +		return 1;
>>>    	}
>>>    }
>>>    
>>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>>> index 2e0c368e19f6..61f7121e1c19 100644
>>> --- a/include/drm/gpu_scheduler.h
>>> +++ b/include/drm/gpu_scheduler.h
>>> @@ -230,10 +230,17 @@ struct drm_sched_backend_ops {
>>>    	struct dma_fence *(*run_job)(struct drm_sched_job *sched_job);
>>>    
>>>    	/**
>>> -         * @timedout_job: Called when a job has taken too long to execute,
>>> -         * to trigger GPU recovery.
>>> +	 * @timedout_job: Called when a job has taken too long to execute,
>>> +	 * to trigger GPU recovery.
>>> +	 *
>>> +	 * Return 0, if the job has been aborted successfully and will
>>> +	 * never be heard of from the device. Return non-zero if the
>>> +	 * job wasn't able to be aborted, i.e. if more time should be
>>> +	 * given to this job. The result is not "bool" as this
>>> +	 * function is not a predicate, although its result may seem
>>> +	 * as one.
>>>    	 */
>>> -	void (*timedout_job)(struct drm_sched_job *sched_job);
>>> +	int (*timedout_job)(struct drm_sched_job *sched_job);
>>>    
>>>    	/**
>>>             * @free_job: Called once the job's finished fence has been signaled
>>>
>
Lucas Stach Nov. 25, 2020, 11:47 a.m. UTC | #6
Am Mittwoch, den 25.11.2020, 11:22 +0000 schrieb Steven Price:
> On 25/11/2020 11:15, Lucas Stach wrote:
> > Am Mittwoch, den 25.11.2020, 11:04 +0000 schrieb Steven Price:
> > > On 25/11/2020 03:17, Luben Tuikov wrote:
> > > > The job timeout handler now returns status
> > > > indicating back to the DRM layer whether the job
> > > > was successfully cancelled or whether more time
> > > > should be given to the job to complete.
> > > 
> > > I'm not sure I understand in what circumstances you would want to give
> > > the job more time to complete. Could you expand on that?
> > 
> > On etnaviv we don't have the ability to preempt a running job, but we
> > can look at the GPU state to determine if it's still making progress
> > with the current job, so we want to extend the timeout in that case to
> > not kill a long running but valid job.
> 
> Ok, fair enough. Although from my experience (on Mali) jobs very rarely 
> "get stuck" it's just that their run time can be excessive[1] causing 
> other processes to not make forward progress. So I'd expect the timeout 
> to be set based on how long a job can run before you need to stop it to 
> allow other processes to run their jobs.

Yea, we might want to kill the job eventually, but people tend to get
very angry if their use-case gets broken just because the userspace
driver manages to put enough blits in one job to run over the 500ms
timeout we allow for a job and the kernel then just hard-kills the job.

In an ideal world we would just preempt the job and allow something
else to run for a while, but without proper preemption support in HW
that's not an option right now.

> But I'm not familiar with etnaviv so perhaps stuck jobs are actually a 
> thing there.

It happens from time to time when our understanding of the HW isn't
complete and the userspace driver manages to create command streams
with missing semaphores between HW engines. ;)

Regards,
Lucas

> Thanks,
> 
> Steve
> 
> [1] Also on Mali it's quite possible to create an infinite duration job 
> which appears to be making forward progress, so in that case our measure 
> of progress isn't useful against these malicious jobs.
> 
> > Regards,
> > Lucas
> > 
> > > One thing we're missing at the moment in Panfrost is the ability to
> > > suspend ("soft stop" is the Mali jargon) a job and pick something else
> > > to run. The propitiatory driver stack uses this to avoid timing out long
> > > running jobs while still allowing other processes to have time on the
> > > GPU. But this interface as it stands doesn't seem to provide that.
> > > 
> > > As the kernel test robot has already pointed out - you'll need to at the
> > > very least update the other uses of this interface.
> > > 
> > > Steve
> > > 
> > > > Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
> > > > ---
> > > >    drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 ++++--
> > > >    include/drm/gpu_scheduler.h             | 13 ++++++++++---
> > > >    2 files changed, 14 insertions(+), 5 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > > > index ff48101bab55..81b73790ecc6 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > > > @@ -28,7 +28,7 @@
> > > >    #include "amdgpu.h"
> > > >    #include "amdgpu_trace.h"
> > > >    
> > > > -static void amdgpu_job_timedout(struct drm_sched_job *s_job)
> > > > +static int amdgpu_job_timedout(struct drm_sched_job *s_job)
> > > >    {
> > > >    	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
> > > >    	struct amdgpu_job *job = to_amdgpu_job(s_job);
> > > > @@ -41,7 +41,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
> > > >    	    amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
> > > >    		DRM_ERROR("ring %s timeout, but soft recovered\n",
> > > >    			  s_job->sched->name);
> > > > -		return;
> > > > +		return 0;
> > > >    	}
> > > >    
> > > >    	amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
> > > > @@ -53,10 +53,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
> > > >    
> > > >    	if (amdgpu_device_should_recover_gpu(ring->adev)) {
> > > >    		amdgpu_device_gpu_recover(ring->adev, job);
> > > > +		return 0;
> > > >    	} else {
> > > >    		drm_sched_suspend_timeout(&ring->sched);
> > > >    		if (amdgpu_sriov_vf(adev))
> > > >    			adev->virt.tdr_debug = true;
> > > > +		return 1;
> > > >    	}
> > > >    }
> > > >    
> > > > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> > > > index 2e0c368e19f6..61f7121e1c19 100644
> > > > --- a/include/drm/gpu_scheduler.h
> > > > +++ b/include/drm/gpu_scheduler.h
> > > > @@ -230,10 +230,17 @@ struct drm_sched_backend_ops {
> > > >    	struct dma_fence *(*run_job)(struct drm_sched_job *sched_job);
> > > >    
> > > >    	/**
> > > > -         * @timedout_job: Called when a job has taken too long to execute,
> > > > -         * to trigger GPU recovery.
> > > > +	 * @timedout_job: Called when a job has taken too long to execute,
> > > > +	 * to trigger GPU recovery.
> > > > +	 *
> > > > +	 * Return 0, if the job has been aborted successfully and will
> > > > +	 * never be heard of from the device. Return non-zero if the
> > > > +	 * job wasn't able to be aborted, i.e. if more time should be
> > > > +	 * given to this job. The result is not "bool" as this
> > > > +	 * function is not a predicate, although its result may seem
> > > > +	 * as one.
> > > >    	 */
> > > > -	void (*timedout_job)(struct drm_sched_job *sched_job);
> > > > +	int (*timedout_job)(struct drm_sched_job *sched_job);
> > > >    
> > > >    	/**
> > > >             * @free_job: Called once the job's finished fence has been signaled
> > > >
Christian König Nov. 25, 2020, 12:41 p.m. UTC | #7
Am 25.11.20 um 12:04 schrieb Steven Price:
> On 25/11/2020 03:17, Luben Tuikov wrote:
>> The job timeout handler now returns status
>> indicating back to the DRM layer whether the job
>> was successfully cancelled or whether more time
>> should be given to the job to complete.
>
> I'm not sure I understand in what circumstances you would want to give 
> the job more time to complete. Could you expand on that?
>
> One thing we're missing at the moment in Panfrost is the ability to 
> suspend ("soft stop" is the Mali jargon) a job and pick something else 
> to run. The propitiatory driver stack uses this to avoid timing out 
> long running jobs while still allowing other processes to have time on 
> the GPU. But this interface as it stands doesn't seem to provide that.

On AMD hardware we call this IB preemption and it is indeed not handled 
very well by the scheduler at the moment.

See how the amdgpu code messes with the preempted IBs to restart them 
for example.

Christian.

>
> As the kernel test robot has already pointed out - you'll need to at 
> the very least update the other uses of this interface.
>
> Steve
>
>>
>> Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 ++++--
>>   include/drm/gpu_scheduler.h             | 13 ++++++++++---
>>   2 files changed, 14 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> index ff48101bab55..81b73790ecc6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> @@ -28,7 +28,7 @@
>>   #include "amdgpu.h"
>>   #include "amdgpu_trace.h"
>>   -static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>> +static int amdgpu_job_timedout(struct drm_sched_job *s_job)
>>   {
>>       struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
>>       struct amdgpu_job *job = to_amdgpu_job(s_job);
>> @@ -41,7 +41,7 @@ static void amdgpu_job_timedout(struct 
>> drm_sched_job *s_job)
>>           amdgpu_ring_soft_recovery(ring, job->vmid, 
>> s_job->s_fence->parent)) {
>>           DRM_ERROR("ring %s timeout, but soft recovered\n",
>>                 s_job->sched->name);
>> -        return;
>> +        return 0;
>>       }
>>         amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
>> @@ -53,10 +53,12 @@ static void amdgpu_job_timedout(struct 
>> drm_sched_job *s_job)
>>         if (amdgpu_device_should_recover_gpu(ring->adev)) {
>>           amdgpu_device_gpu_recover(ring->adev, job);
>> +        return 0;
>>       } else {
>>           drm_sched_suspend_timeout(&ring->sched);
>>           if (amdgpu_sriov_vf(adev))
>>               adev->virt.tdr_debug = true;
>> +        return 1;
>>       }
>>   }
>>   diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>> index 2e0c368e19f6..61f7121e1c19 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -230,10 +230,17 @@ struct drm_sched_backend_ops {
>>       struct dma_fence *(*run_job)(struct drm_sched_job *sched_job);
>>         /**
>> -         * @timedout_job: Called when a job has taken too long to 
>> execute,
>> -         * to trigger GPU recovery.
>> +     * @timedout_job: Called when a job has taken too long to execute,
>> +     * to trigger GPU recovery.
>> +     *
>> +     * Return 0, if the job has been aborted successfully and will
>> +     * never be heard of from the device. Return non-zero if the
>> +     * job wasn't able to be aborted, i.e. if more time should be
>> +     * given to this job. The result is not "bool" as this
>> +     * function is not a predicate, although its result may seem
>> +     * as one.
>>        */
>> -    void (*timedout_job)(struct drm_sched_job *sched_job);
>> +    int (*timedout_job)(struct drm_sched_job *sched_job);
>>         /**
>>            * @free_job: Called once the job's finished fence has been 
>> signaled
>>
>
Luben Tuikov Nov. 25, 2020, 4:48 p.m. UTC | #8
On 2020-11-25 04:50, Christian König wrote:
> Am 25.11.20 um 04:17 schrieb Luben Tuikov:
>> The job timeout handler now returns status
>> indicating back to the DRM layer whether the job
>> was successfully cancelled or whether more time
>> should be given to the job to complete.
>>
>> Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 ++++--
>>   include/drm/gpu_scheduler.h             | 13 ++++++++++---
>>   2 files changed, 14 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> index ff48101bab55..81b73790ecc6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> @@ -28,7 +28,7 @@
>>   #include "amdgpu.h"
>>   #include "amdgpu_trace.h"
>>   
>> -static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>> +static int amdgpu_job_timedout(struct drm_sched_job *s_job)
>>   {
>>   	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
>>   	struct amdgpu_job *job = to_amdgpu_job(s_job);
>> @@ -41,7 +41,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>>   	    amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
>>   		DRM_ERROR("ring %s timeout, but soft recovered\n",
>>   			  s_job->sched->name);
>> -		return;
>> +		return 0;
>>   	}
>>   
>>   	amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
>> @@ -53,10 +53,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>>   
>>   	if (amdgpu_device_should_recover_gpu(ring->adev)) {
>>   		amdgpu_device_gpu_recover(ring->adev, job);
>> +		return 0;
>>   	} else {
>>   		drm_sched_suspend_timeout(&ring->sched);
>>   		if (amdgpu_sriov_vf(adev))
>>   			adev->virt.tdr_debug = true;
>> +		return 1;
>>   	}
>>   }
>>   
>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>> index 2e0c368e19f6..61f7121e1c19 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -230,10 +230,17 @@ struct drm_sched_backend_ops {
>>   	struct dma_fence *(*run_job)(struct drm_sched_job *sched_job);
>>   
>>   	/**
>> -         * @timedout_job: Called when a job has taken too long to execute,
>> -         * to trigger GPU recovery.
>> +	 * @timedout_job: Called when a job has taken too long to execute,
>> +	 * to trigger GPU recovery.
>> +	 *
>> +	 * Return 0, if the job has been aborted successfully and will
>> +	 * never be heard of from the device. Return non-zero if the
>> +	 * job wasn't able to be aborted, i.e. if more time should be
>> +	 * given to this job. The result is not "bool" as this
>> +	 * function is not a predicate, although its result may seem
>> +	 * as one.
> 
> I think the whole approach of timing out a job needs to be rethinked. 
> What's timing out here is the hardware engine, not the job.
> 
> So we should also not have the job as parameter here. Maybe we should 
> make that the fence we are waiting for instead.

Yes, I wanted this patch to be minimal, and not to disrupt
too many things.

Yes, in the future we can totally revamp this, but this
is a minimal patch.

> 
>>   	 */
>> -	void (*timedout_job)(struct drm_sched_job *sched_job);
>> +	int (*timedout_job)(struct drm_sched_job *sched_job);
> 
> I would either return an error code, boolean or enum here. But not use a 
> number without a define.

Yes, that's a great idea--I'll make the change now, and resubmit.

Regards,
Luben

> 
> Regards,
> Christian.
> 
>>   
>>   	/**
>>            * @free_job: Called once the job's finished fence has been signaled
>
Andrey Grodzovsky Nov. 26, 2020, 3:06 p.m. UTC | #9
On 11/24/20 10:17 PM, Luben Tuikov wrote:
> The job timeout handler now returns status
> indicating back to the DRM layer whether the job
> was successfully cancelled or whether more time
> should be given to the job to complete.
>
> Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  6 ++++--
>   include/drm/gpu_scheduler.h             | 13 ++++++++++---
>   2 files changed, 14 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index ff48101bab55..81b73790ecc6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -28,7 +28,7 @@
>   #include "amdgpu.h"
>   #include "amdgpu_trace.h"
>   
> -static void amdgpu_job_timedout(struct drm_sched_job *s_job)
> +static int amdgpu_job_timedout(struct drm_sched_job *s_job)
>   {
>   	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
>   	struct amdgpu_job *job = to_amdgpu_job(s_job);
> @@ -41,7 +41,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>   	    amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
>   		DRM_ERROR("ring %s timeout, but soft recovered\n",
>   			  s_job->sched->name);
> -		return;
> +		return 0;
>   	}
>   
>   	amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
> @@ -53,10 +53,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>   
>   	if (amdgpu_device_should_recover_gpu(ring->adev)) {
>   		amdgpu_device_gpu_recover(ring->adev, job);
> +		return 0;


For amdgpu specifically - not that amdgpu_device_gpu_recover returns a value 
which is 0 for successful GPU reset
meaning we reset the GPU and resubmitted to HW the job that triggered the 
timeout to HW (guilty).
It means the job is still should be considered part of pending list and so a non 
zero value
should be returned. I think only if we reset the GPU and don't submit back the 
guilty job then
it can be considered 'aborted' - but I don't think we even do this.

Andrey


>   	} else {
>   		drm_sched_suspend_timeout(&ring->sched);
>   		if (amdgpu_sriov_vf(adev))
>   			adev->virt.tdr_debug = true;
> +		return 1;
>   	}
>   }
>   
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 2e0c368e19f6..61f7121e1c19 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -230,10 +230,17 @@ struct drm_sched_backend_ops {
>   	struct dma_fence *(*run_job)(struct drm_sched_job *sched_job);
>   
>   	/**
> -         * @timedout_job: Called when a job has taken too long to execute,
> -         * to trigger GPU recovery.
> +	 * @timedout_job: Called when a job has taken too long to execute,
> +	 * to trigger GPU recovery.
> +	 *
> +	 * Return 0, if the job has been aborted successfully and will
> +	 * never be heard of from the device. Return non-zero if the
> +	 * job wasn't able to be aborted, i.e. if more time should be
> +	 * given to this job. The result is not "bool" as this
> +	 * function is not a predicate, although its result may seem
> +	 * as one.
>   	 */
> -	void (*timedout_job)(struct drm_sched_job *sched_job);
> +	int (*timedout_job)(struct drm_sched_job *sched_job);
>   
>   	/**
>            * @free_job: Called once the job's finished fence has been signaled
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index ff48101bab55..81b73790ecc6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -28,7 +28,7 @@ 
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
-static void amdgpu_job_timedout(struct drm_sched_job *s_job)
+static int amdgpu_job_timedout(struct drm_sched_job *s_job)
 {
 	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
 	struct amdgpu_job *job = to_amdgpu_job(s_job);
@@ -41,7 +41,7 @@  static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 	    amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
 		DRM_ERROR("ring %s timeout, but soft recovered\n",
 			  s_job->sched->name);
-		return;
+		return 0;
 	}
 
 	amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
@@ -53,10 +53,12 @@  static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 
 	if (amdgpu_device_should_recover_gpu(ring->adev)) {
 		amdgpu_device_gpu_recover(ring->adev, job);
+		return 0;
 	} else {
 		drm_sched_suspend_timeout(&ring->sched);
 		if (amdgpu_sriov_vf(adev))
 			adev->virt.tdr_debug = true;
+		return 1;
 	}
 }
 
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 2e0c368e19f6..61f7121e1c19 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -230,10 +230,17 @@  struct drm_sched_backend_ops {
 	struct dma_fence *(*run_job)(struct drm_sched_job *sched_job);
 
 	/**
-         * @timedout_job: Called when a job has taken too long to execute,
-         * to trigger GPU recovery.
+	 * @timedout_job: Called when a job has taken too long to execute,
+	 * to trigger GPU recovery.
+	 *
+	 * Return 0, if the job has been aborted successfully and will
+	 * never be heard of from the device. Return non-zero if the
+	 * job wasn't able to be aborted, i.e. if more time should be
+	 * given to this job. The result is not "bool" as this
+	 * function is not a predicate, although its result may seem
+	 * as one.
 	 */
-	void (*timedout_job)(struct drm_sched_job *sched_job);
+	int (*timedout_job)(struct drm_sched_job *sched_job);
 
 	/**
          * @free_job: Called once the job's finished fence has been signaled