diff mbox series

drm/v3d: fix sched job resources cleanup when a job is aborted

Message ID 20210916212726.2u2psq2egwy2mdva@mail.igalia.com (mailing list archive)
State New, archived
Headers show
Series drm/v3d: fix sched job resources cleanup when a job is aborted | expand

Commit Message

Melissa Wen Sept. 16, 2021, 9:27 p.m. UTC
In a cl submission, when bin job initialization fails, sched job resources
were already allocated for the render job. At this point,
drm_sched_job_init(render) was done in v3d_job_init but the render job is
aborted before drm_sched_job_arm (in v3d_job_push) happens; therefore, not
only v3d_job_put but also drm_sched_job_cleanup should be called (by
v3d_job_cleanup). A similar issue is addressed for csd and tfu submissions.

The issue was noticed from a review by Iago Toral in a patch that touches
the same part of the code.

Fixes: 916044fac8623 ("drm/v3d: Move drm_sched_job_init to v3d_job_init")
Signed-off-by: Melissa Wen <mwen@igalia.com>
---
 drivers/gpu/drm/v3d/v3d_gem.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

Comments

Iago Toral Sept. 17, 2021, 6:43 a.m. UTC | #1
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

With that said, I don't like how we are doing error handling here, I
think we want to simplify this and try to make it so we centralize
error handling in one place instead of having multiple error exits
paths, each one trying to do the right thing at that point. This is
error prone, as this patch is showing.

Here is a proposal to make this better:

Make job memory allocation part of v3d_job_init. v3d_job init already
handles error conditions nicely. If we do this, we no longer need to
handle allocation errors in ioctls one by one and for any job we only
have two scenarios: v3d_job_init was successul or it failed (in which
case we know it already cleaned up after itself and we should have a
NULL job as a result). If v3d_job_init failed, then we *always* jump to
the fail tag and there we call v3d_job_cleanup for all jobs that can be
created in that ioctl. If a job is NULL then v3d_job_cleanup returns
early, otherwise, we know it is a fully initialized job, so it does 	
drm_sched_job_cleanup + v3d_job_put.

I think that should make error handling in these functions a lot
easier.

Iago


On Thu, 2021-09-16 at 22:27 +0100, Melissa Wen wrote:
> In a cl submission, when bin job initialization fails, sched job
> resources
> were already allocated for the render job. At this point,
> drm_sched_job_init(render) was done in v3d_job_init but the render
> job is
> aborted before drm_sched_job_arm (in v3d_job_push) happens;
> therefore, not
> only v3d_job_put but also drm_sched_job_cleanup should be called (by
> v3d_job_cleanup). A similar issue is addressed for csd and tfu
> submissions.
> 
> The issue was noticed from a review by Iago Toral in a patch that
> touches
> the same part of the code.
> 
> Fixes: 916044fac8623 ("drm/v3d: Move drm_sched_job_init to
> v3d_job_init")
> Signed-off-by: Melissa Wen <mwen@igalia.com>
> ---
>  drivers/gpu/drm/v3d/v3d_gem.c | 11 +++++------
>  1 file changed, 5 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c
> b/drivers/gpu/drm/v3d/v3d_gem.c
> index 1953706bdaeb..ead0be8d48a7 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -567,14 +567,14 @@ v3d_submit_cl_ioctl(struct drm_device *dev,
> void *data,
>  	if (args->bcl_start != args->bcl_end) {
>  		bin = kcalloc(1, sizeof(*bin), GFP_KERNEL);
>  		if (!bin) {
> -			v3d_job_put(&render->base);
> +			v3d_job_cleanup(&render->base);
> 
> >  			return -ENOMEM;
>  		}
>  
>  		ret = v3d_job_init(v3d, file_priv, &bin->base,
>  				   v3d_job_free, args->in_sync_bcl,
> V3D_BIN);
>  		if (ret) {
> -			v3d_job_put(&render->base);
> +			v3d_job_cleanup(&render->base);
>  			kfree(bin);
>  			return ret;
>  		}
> @@ -716,7 +716,7 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void
> *data,
>  	job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles),
>  			       sizeof(*job->base.bo), GFP_KERNEL);
>  	if (!job->base.bo) {
> -		v3d_job_put(&job->base);
> +		v3d_job_cleanup(&job->base);
>  		return -ENOMEM;
>  	}
>  
> @@ -810,14 +810,13 @@ v3d_submit_csd_ioctl(struct drm_device *dev,
> void *data,
>  
>  	clean_job = kcalloc(1, sizeof(*clean_job), GFP_KERNEL);
>  	if (!clean_job) {
> -		v3d_job_put(&job->base);
> -		kfree(job);
> +		v3d_job_cleanup(&job->base);
>  		return -ENOMEM;
>  	}
>  
>  	ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0,
> V3D_CACHE_CLEAN);
>  	if (ret) {
> -		v3d_job_put(&job->base);
> +		v3d_job_cleanup(&job->base);
>  		kfree(clean_job);
>  		return ret;
>  	}
Melissa Wen Sept. 17, 2021, 9:27 a.m. UTC | #2
On 09/17, Iago Toral wrote:
> Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
> 
> With that said, I don't like how we are doing error handling here, I
> think we want to simplify this and try to make it so we centralize
> error handling in one place instead of having multiple error exits
> paths, each one trying to do the right thing at that point. This is
> error prone, as this patch is showing.
> 
> Here is a proposal to make this better:
> 
> Make job memory allocation part of v3d_job_init. v3d_job init already
> handles error conditions nicely. If we do this, we no longer need to
> handle allocation errors in ioctls one by one and for any job we only
> have two scenarios: v3d_job_init was successul or it failed (in which
> case we know it already cleaned up after itself and we should have a
> NULL job as a result). If v3d_job_init failed, then we *always* jump to
> the fail tag and there we call v3d_job_cleanup for all jobs that can be
> created in that ioctl. If a job is NULL then v3d_job_cleanup returns
> early, otherwise, we know it is a fully initialized job, so it does 	
> drm_sched_job_cleanup + v3d_job_put.
> 
> I think that should make error handling in these functions a lot
> easier.

Hi,

yes, sounds good. I can include this refactoring in the v2 of the
multisync patchset, since there is a prep work there (first patch).

Thanks for reviewing both,

Melissa

> 
> Iago
> 
> 
> On Thu, 2021-09-16 at 22:27 +0100, Melissa Wen wrote:
> > In a cl submission, when bin job initialization fails, sched job
> > resources
> > were already allocated for the render job. At this point,
> > drm_sched_job_init(render) was done in v3d_job_init but the render
> > job is
> > aborted before drm_sched_job_arm (in v3d_job_push) happens;
> > therefore, not
> > only v3d_job_put but also drm_sched_job_cleanup should be called (by
> > v3d_job_cleanup). A similar issue is addressed for csd and tfu
> > submissions.
> > 
> > The issue was noticed from a review by Iago Toral in a patch that
> > touches
> > the same part of the code.
> > 
> > Fixes: 916044fac8623 ("drm/v3d: Move drm_sched_job_init to
> > v3d_job_init")
> > Signed-off-by: Melissa Wen <mwen@igalia.com>
> > ---
> >  drivers/gpu/drm/v3d/v3d_gem.c | 11 +++++------
> >  1 file changed, 5 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/v3d/v3d_gem.c
> > b/drivers/gpu/drm/v3d/v3d_gem.c
> > index 1953706bdaeb..ead0be8d48a7 100644
> > --- a/drivers/gpu/drm/v3d/v3d_gem.c
> > +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> > @@ -567,14 +567,14 @@ v3d_submit_cl_ioctl(struct drm_device *dev,
> > void *data,
> >  	if (args->bcl_start != args->bcl_end) {
> >  		bin = kcalloc(1, sizeof(*bin), GFP_KERNEL);
> >  		if (!bin) {
> > -			v3d_job_put(&render->base);
> > +			v3d_job_cleanup(&render->base);
> > 
> > >  			return -ENOMEM;
> >  		}
> >  
> >  		ret = v3d_job_init(v3d, file_priv, &bin->base,
> >  				   v3d_job_free, args->in_sync_bcl,
> > V3D_BIN);
> >  		if (ret) {
> > -			v3d_job_put(&render->base);
> > +			v3d_job_cleanup(&render->base);
> >  			kfree(bin);
> >  			return ret;
> >  		}
> > @@ -716,7 +716,7 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void
> > *data,
> >  	job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles),
> >  			       sizeof(*job->base.bo), GFP_KERNEL);
> >  	if (!job->base.bo) {
> > -		v3d_job_put(&job->base);
> > +		v3d_job_cleanup(&job->base);
> >  		return -ENOMEM;
> >  	}
> >  
> > @@ -810,14 +810,13 @@ v3d_submit_csd_ioctl(struct drm_device *dev,
> > void *data,
> >  
> >  	clean_job = kcalloc(1, sizeof(*clean_job), GFP_KERNEL);
> >  	if (!clean_job) {
> > -		v3d_job_put(&job->base);
> > -		kfree(job);
> > +		v3d_job_cleanup(&job->base);
> >  		return -ENOMEM;
> >  	}
> >  
> >  	ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0,
> > V3D_CACHE_CLEAN);
> >  	if (ret) {
> > -		v3d_job_put(&job->base);
> > +		v3d_job_cleanup(&job->base);
> >  		kfree(clean_job);
> >  		return ret;
> >  	}
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 1953706bdaeb..ead0be8d48a7 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -567,14 +567,14 @@  v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 	if (args->bcl_start != args->bcl_end) {
 		bin = kcalloc(1, sizeof(*bin), GFP_KERNEL);
 		if (!bin) {
-			v3d_job_put(&render->base);
+			v3d_job_cleanup(&render->base);
 			return -ENOMEM;
 		}
 
 		ret = v3d_job_init(v3d, file_priv, &bin->base,
 				   v3d_job_free, args->in_sync_bcl, V3D_BIN);
 		if (ret) {
-			v3d_job_put(&render->base);
+			v3d_job_cleanup(&render->base);
 			kfree(bin);
 			return ret;
 		}
@@ -716,7 +716,7 @@  v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
 	job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles),
 			       sizeof(*job->base.bo), GFP_KERNEL);
 	if (!job->base.bo) {
-		v3d_job_put(&job->base);
+		v3d_job_cleanup(&job->base);
 		return -ENOMEM;
 	}
 
@@ -810,14 +810,13 @@  v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
 
 	clean_job = kcalloc(1, sizeof(*clean_job), GFP_KERNEL);
 	if (!clean_job) {
-		v3d_job_put(&job->base);
-		kfree(job);
+		v3d_job_cleanup(&job->base);
 		return -ENOMEM;
 	}
 
 	ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0, V3D_CACHE_CLEAN);
 	if (ret) {
-		v3d_job_put(&job->base);
+		v3d_job_cleanup(&job->base);
 		kfree(clean_job);
 		return ret;
 	}