Message ID | 20241114063942.3448607-1-suhui@nfschina.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/xe/hw_engine_group: Fix bad free in xe_hw_engine_setup_groups() | expand |
On 14.11.2024 14:39, Su Hui wrote: > Clang static checker(scan-build) warning: > drivers/gpu/drm/xe/xe_hw_engine_group.c: line 134, column 2 > Argument to kfree() is a constant address (18446744073709551604), which > is not memory allocated by malloc(). > > There are some problems in hw_engine_group_alloc() and > xe_hw_engine_setup_groups(). First, kfree() can only handle NULL > pointers instead of negitave error codes. When hw_engine_group_alloc() > failed, there is a bad kfree call for negitave error codes in > xe_hw_engine_setup_groups(). Second, when drmm_add_action_or_reset() > failed, destroy_workqueue() should be called to free 'group->resume_wq'. > > Free 'group' and destroy 'group->resume_wq' when hw_engine_group_alloc() > failed to fix these problems. > > Fixes: d16ef1a18e39 ("drm/xe/exec: Switch hw engine group execution mode upon job submission") > Fixes: f784750c670f ("drm/xe/hw_engine_group: Introduce xe_hw_engine_group") > Signed-off-by: Su Hui <suhui@nfschina.com> > --- > drivers/gpu/drm/xe/xe_hw_engine_group.c | 31 ++++++++++++++----------- > 1 file changed, 17 insertions(+), 14 deletions(-) > > diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c > index 82750520a90a..ee2cb32817fa 100644 > --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c > +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c > @@ -51,7 +51,7 @@ static struct xe_hw_engine_group * > hw_engine_group_alloc(struct xe_device *xe) > { > struct xe_hw_engine_group *group; > - int err; > + int err = -ENOMEM; > > group = kzalloc(sizeof(*group), GFP_KERNEL); > if (!group) > @@ -59,7 +59,7 @@ hw_engine_group_alloc(struct xe_device *xe) > > group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0); > if (!group->resume_wq) > - return ERR_PTR(-ENOMEM); > + goto free_group; kfree can be directly called from here followed by return, instead of a goto. > > init_rwsem(&group->mode_sem); > INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); > @@ -67,9 +67,15 @@ hw_engine_group_alloc(struct xe_device *xe) > > err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); > if (err) > - return ERR_PTR(err); > + goto destroy_wq; There is no need to clear the resources on failure, because drmm_add_action_or_reset takes care of freeing the resources on failure. > > return group; > + > +destroy_wq: > + destroy_workqueue(group->resume_wq); > +free_group: > + kfree(group); > + return ERR_PTR(err); > } > > /** > @@ -87,21 +93,19 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) > int err; > > group_rcs_ccs = hw_engine_group_alloc(xe); > - if (IS_ERR(group_rcs_ccs)) { > - err = PTR_ERR(group_rcs_ccs); > - goto err_group_rcs_ccs; > - } > + if (IS_ERR(group_rcs_ccs)) > + return PTR_ERR(group_rcs_ccs); > > group_bcs = hw_engine_group_alloc(xe); > if (IS_ERR(group_bcs)) { > err = PTR_ERR(group_bcs); > - goto err_group_bcs; > + goto free_group_rcs_ccs; No need of freeing the memory here and in the following lines as we have managed it through the drmm_add_action_or_reset call in hw_engine_group_alloc. We can simply return the error code. > } > > group_vcs_vecs = hw_engine_group_alloc(xe); > if (IS_ERR(group_vcs_vecs)) { > err = PTR_ERR(group_vcs_vecs); > - goto err_group_vcs_vecs; > + goto free_group_bcs; > } > > for_each_hw_engine(hwe, gt, id) { > @@ -126,13 +130,12 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) > > return 0; > > -err_group_vcs_vecs: > - kfree(group_vcs_vecs); > -err_group_bcs: > +free_group_bcs: > + destroy_workqueue(group_bcs->resume_wq); > kfree(group_bcs); > -err_group_rcs_ccs: > +free_group_rcs_ccs: > + destroy_workqueue(group_rcs_ccs->resume_wq); > kfree(group_rcs_ccs); > - All these kfree statements are not required. Regards, Bala > return err; > } > > -- > 2.30.2 >
On 2024/11/14 15:45, Vivekanandan, Balasubramani wrote: > On 14.11.2024 14:39, Su Hui wrote: >> >> diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c >> index 82750520a90a..ee2cb32817fa 100644 >> --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c >> +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c >> @@ -51,7 +51,7 @@ static struct xe_hw_engine_group * >> hw_engine_group_alloc(struct xe_device *xe) >> { >> struct xe_hw_engine_group *group; >> - int err; >> + int err = -ENOMEM; >> >> group = kzalloc(sizeof(*group), GFP_KERNEL); >> if (!group) >> @@ -59,7 +59,7 @@ hw_engine_group_alloc(struct xe_device *xe) >> >> group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0); >> if (!group->resume_wq) >> - return ERR_PTR(-ENOMEM); >> + goto free_group; > kfree can be directly called from here followed by return, instead of a > goto. Agreed. >> >> init_rwsem(&group->mode_sem); >> INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); >> @@ -67,9 +67,15 @@ hw_engine_group_alloc(struct xe_device *xe) >> >> err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); >> if (err) >> - return ERR_PTR(err); >> + goto destroy_wq; > There is no need to clear the resources on failure, because > drmm_add_action_or_reset takes care of freeing the resources on > failure. Oh, my fault, I missed this function. >> >> >> /** >> @@ -87,21 +93,19 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) >> int err; >> >> group_rcs_ccs = hw_engine_group_alloc(xe); >> - if (IS_ERR(group_rcs_ccs)) { >> - err = PTR_ERR(group_rcs_ccs); >> - goto err_group_rcs_ccs; >> - } >> + if (IS_ERR(group_rcs_ccs)) >> + return PTR_ERR(group_rcs_ccs); >> >> group_bcs = hw_engine_group_alloc(xe); >> if (IS_ERR(group_bcs)) { >> err = PTR_ERR(group_bcs); >> - goto err_group_bcs; >> + goto free_group_rcs_ccs; > No need of freeing the memory here and in the following lines as we have > managed it through the drmm_add_action_or_reset call in > hw_engine_group_alloc. > We can simply return the error code. Got it. > >> >> -err_group_vcs_vecs: >> - kfree(group_vcs_vecs); >> -err_group_bcs: >> +free_group_bcs: >> + destroy_workqueue(group_bcs->resume_wq); >> kfree(group_bcs); >> -err_group_rcs_ccs: >> +free_group_rcs_ccs: >> + destroy_workqueue(group_rcs_ccs->resume_wq); >> kfree(group_rcs_ccs); >> - > All these kfree statements are not required. Agreed too. Thanks for your review. I will send a v2 patch to remove these kfree if there are no further suggestions. Regards, Su Hui
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 82750520a90a..ee2cb32817fa 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -51,7 +51,7 @@ static struct xe_hw_engine_group * hw_engine_group_alloc(struct xe_device *xe) { struct xe_hw_engine_group *group; - int err; + int err = -ENOMEM; group = kzalloc(sizeof(*group), GFP_KERNEL); if (!group) @@ -59,7 +59,7 @@ hw_engine_group_alloc(struct xe_device *xe) group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0); if (!group->resume_wq) - return ERR_PTR(-ENOMEM); + goto free_group; init_rwsem(&group->mode_sem); INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); @@ -67,9 +67,15 @@ hw_engine_group_alloc(struct xe_device *xe) err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); if (err) - return ERR_PTR(err); + goto destroy_wq; return group; + +destroy_wq: + destroy_workqueue(group->resume_wq); +free_group: + kfree(group); + return ERR_PTR(err); } /** @@ -87,21 +93,19 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) int err; group_rcs_ccs = hw_engine_group_alloc(xe); - if (IS_ERR(group_rcs_ccs)) { - err = PTR_ERR(group_rcs_ccs); - goto err_group_rcs_ccs; - } + if (IS_ERR(group_rcs_ccs)) + return PTR_ERR(group_rcs_ccs); group_bcs = hw_engine_group_alloc(xe); if (IS_ERR(group_bcs)) { err = PTR_ERR(group_bcs); - goto err_group_bcs; + goto free_group_rcs_ccs; } group_vcs_vecs = hw_engine_group_alloc(xe); if (IS_ERR(group_vcs_vecs)) { err = PTR_ERR(group_vcs_vecs); - goto err_group_vcs_vecs; + goto free_group_bcs; } for_each_hw_engine(hwe, gt, id) { @@ -126,13 +130,12 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) return 0; -err_group_vcs_vecs: - kfree(group_vcs_vecs); -err_group_bcs: +free_group_bcs: + destroy_workqueue(group_bcs->resume_wq); kfree(group_bcs); -err_group_rcs_ccs: +free_group_rcs_ccs: + destroy_workqueue(group_rcs_ccs->resume_wq); kfree(group_rcs_ccs); - return err; }
Clang static checker(scan-build) warning: drivers/gpu/drm/xe/xe_hw_engine_group.c: line 134, column 2 Argument to kfree() is a constant address (18446744073709551604), which is not memory allocated by malloc(). There are some problems in hw_engine_group_alloc() and xe_hw_engine_setup_groups(). First, kfree() can only handle NULL pointers instead of negitave error codes. When hw_engine_group_alloc() failed, there is a bad kfree call for negitave error codes in xe_hw_engine_setup_groups(). Second, when drmm_add_action_or_reset() failed, destroy_workqueue() should be called to free 'group->resume_wq'. Free 'group' and destroy 'group->resume_wq' when hw_engine_group_alloc() failed to fix these problems. Fixes: d16ef1a18e39 ("drm/xe/exec: Switch hw engine group execution mode upon job submission") Fixes: f784750c670f ("drm/xe/hw_engine_group: Introduce xe_hw_engine_group") Signed-off-by: Su Hui <suhui@nfschina.com> --- drivers/gpu/drm/xe/xe_hw_engine_group.c | 31 ++++++++++++++----------- 1 file changed, 17 insertions(+), 14 deletions(-)