Message ID | 20220323094329.56352-1-gwan-gyeong.mun@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915: Decouple engine->sanitize callback on removing the status page | expand |
On Wed, 23 Mar 2022 at 09:44, Gwan-gyeong Mun <gwan-gyeong.mun@intel.com> wrote: > > From: Chris Wilson <chris@chris-wilson.co.uk> > > We have to be careful not to call into the submission backend's sanitize > callback if we abort the module load and free the status page. Since we > are only using the sanitize callback to cleanup the status page when we > suspect its contents may have been lost (first load, upon resume etc) > let us move the callback to engine->status_page and remove it as we free > the status page. > > <3> [306.359604] BUG: KASAN: slab-out-of-bounds in xcs_sanitize+0x4a/0x110 [i915] > <3> [306.360346] Write of size 4096 at addr ffff88806d5e8000 by task i915_module_loa/1052 > <3> [306.360561] > <4> [306.360627] CPU: 1 PID: 1052 Comm: i915_module_loa Tainted: G U 5.12.0-rc2-g249f72def27bf-kasan_218+ #1 > <4> [306.360650] Hardware name: GIGABYTE GB-BXBT-1900/MZBAYAB-00, BIOS F6 02/17/2015 > <4> [306.360667] Call Trace: > <4> [306.360688] dump_stack+0xa4/0xe5 > <4> [306.360727] ? xcs_sanitize+0x4a/0x110 [i915] > <4> [306.361274] print_address_description.constprop.9+0x3a/0x60 > <4> [306.361311] ? xcs_sanitize+0x4a/0x110 [i915] > <4> [306.361855] ? xcs_sanitize+0x4a/0x110 [i915] > <4> [306.362408] kasan_report.cold.14+0x7c/0xd8 > <4> [306.362456] ? xcs_sanitize+0x4a/0x110 [i915] > <4> [306.363015] kasan_check_range+0x1c1/0x1e0 > <4> [306.363056] memset+0x1f/0x40 > <4> [306.363093] xcs_sanitize+0x4a/0x110 [i915] > <4> [306.363661] gt_sanitize+0x2f7/0x6d0 [i915] > <4> [306.364221] ? __pm_runtime_suspend+0x186/0x2e0 > <4> [306.364270] intel_gt_suspend_late+0x126/0x2c0 [i915] > <4> [306.364833] i915_gem_suspend_late+0x9d/0x470 [i915] > <4> [306.365402] ? intel_wakeref_auto+0x3ba/0x520 [i915] > <4> [306.365939] ? i915_gem_suspend+0x180/0x180 [i915] > <4> [306.366521] i915_gem_driver_remove+0x25/0x1f0 [i915] > <4> [306.367071] ? lockdep_hardirqs_on+0xbf/0x130 > <4> [306.367124] i915_driver_remove+0xba/0xf0 [i915] > <4> [306.367670] i915_pci_remove+0x34/0x70 [i915] > <4> [306.368224] pci_device_remove+0xa3/0x1e0 > <4> [306.368275] device_release_driver_internal+0x1e0/0x4a0 > <4> [306.368320] driver_detach+0xbc/0x180 > <4> [306.368364] bus_remove_driver+0x15e/0x2d0 > <4> [306.368404] pci_unregister_driver+0x28/0x220 > <4> [306.368456] i915_exit+0x1b/0x26 [i915] > <4> [306.369055] __x64_sys_delete_module+0x257/0x370 > <4> [306.369093] ? __ia32_sys_delete_module+0x370/0x370 > <4> [306.369146] ? lockdep_hardirqs_on+0xbf/0x130 > <4> [306.369185] do_syscall_64+0x33/0x80 > <4> [306.369212] entry_SYSCALL_64_after_hwframe+0x44/0xae > <4> [306.369237] RIP: 0033:0x7f7a7020fbcb > <4> [306.369259] Code: 73 01 c3 48 8b 0d c5 82 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 95 82 0c 00 f7 d8 64 89 01 48 > <4> [306.369281] RSP: 002b:00007ffc22d2ef78 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 > <4> [306.369315] RAX: ffffffffffffffda RBX: 000055724a324db0 RCX: 00007f7a7020fbcb > <4> [306.369334] RDX: 00007f7a702d8be0 RSI: 0000000000000800 RDI: 000055724a324e18 > <4> [306.369352] RBP: 00007f7a70392702 R08: 0000000000000000 R09: 00007f7a702d8da0 > <4> [306.369370] R10: 000055724a2ee010 R11: 0000000000000206 R12: 0000000000000000 > <4> [306.369388] R13: 00007ffc22d2f670 R14: 0000000000000000 R15: 0000000000000000 > Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/4105 > Fixes: b436a5f8b6c8 ("drm/i915/gt: Track all timelines created using the HWSP") > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Signed-off-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com> > Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> > --- > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 +++++-- > drivers/gpu/drm/i915/gt/intel_engine_types.h | 4 +++- > drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 4 +--- > drivers/gpu/drm/i915/gt/intel_gt_pm.c | 4 ++-- > drivers/gpu/drm/i915/gt/intel_ring_submission.c | 2 +- > drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 ++-- > 6 files changed, 14 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > index 98b61ff13c95..7e3a65b0661c 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > @@ -844,6 +844,9 @@ static void cleanup_status_page(struct intel_engine_cs *engine) > > i915_gem_object_unpin_map(vma->obj); > i915_gem_object_put(vma->obj); > + > + /* no longer in control, nothing left to sanitize */ > + engine->status_page.sanitize = NULL; > } > > static int pin_ggtt_status_page(struct intel_engine_cs *engine, > @@ -1542,8 +1545,8 @@ void intel_engines_reset_default_submission(struct intel_gt *gt) > enum intel_engine_id id; > > for_each_engine(engine, gt, id) { > - if (engine->sanitize) > - engine->sanitize(engine); > + if (engine->status_page.sanitize) > + engine->status_page.sanitize(engine); > > engine->set_default_submission(engine); > } > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h > index eac20112709c..268249efd76c 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h > @@ -53,6 +53,7 @@ struct intel_gt; > struct intel_ring; > struct intel_uncore; > struct intel_breadcrumbs; > +struct intel_engine_cs; > > typedef u32 intel_engine_mask_t; > #define ALL_ENGINES ((intel_engine_mask_t)~0ul) > @@ -61,6 +62,8 @@ struct intel_hw_status_page { > struct list_head timelines; > struct i915_vma *vma; > u32 *addr; > + > + void (*sanitize)(struct intel_engine_cs *engine); > }; > > struct intel_instdone { > @@ -445,7 +448,6 @@ struct intel_engine_cs { > void (*irq_disable)(struct intel_engine_cs *engine); > void (*irq_handler)(struct intel_engine_cs *engine, u16 iir); > > - void (*sanitize)(struct intel_engine_cs *engine); > int (*resume)(struct intel_engine_cs *engine); > > struct { > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > index e23c1d0e980b..c3b850e23c4b 100644 > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > @@ -3328,8 +3328,6 @@ static void execlists_shutdown(struct intel_engine_cs *engine) > > static void execlists_release(struct intel_engine_cs *engine) > { > - engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ > - > execlists_shutdown(engine); > > intel_engine_cleanup_common(engine); > @@ -3520,7 +3518,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) > } > > /* Finally, take ownership and responsibility for cleanup! */ > - engine->sanitize = execlists_sanitize; > + engine->status_page.sanitize = execlists_sanitize; > engine->release = execlists_release; > > return 0; > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c > index eeead40485fb..a38124748a7d 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c > +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c > @@ -189,8 +189,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force) > intel_uc_reset_prepare(>->uc); > > for_each_engine(engine, gt, id) > - if (engine->sanitize) > - engine->sanitize(engine); > + if (engine->status_page.sanitize) > + engine->status_page.sanitize(engine); > > if (reset_engines(gt) || force) { > for_each_engine(engine, gt, id) > diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c > index 5423bfd301ad..a8d4398ea024 100644 > --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c > +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c > @@ -1121,7 +1121,7 @@ static void setup_common(struct intel_engine_cs *engine) > setup_irq(engine); > > engine->resume = xcs_resume; > - engine->sanitize = xcs_sanitize; > + engine->status_page.sanitize = xcs_sanitize; > > engine->reset.prepare = reset_prepare; > engine->reset.rewind = reset_rewind; > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > index e1612c393781..156a5a9e6a55 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > @@ -3676,7 +3676,7 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc) > > static void guc_release(struct intel_engine_cs *engine) > { > - engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ > + tasklet_kill(&engine->sched_engine->tasklet); > > intel_engine_cleanup_common(engine); > lrc_fini_wa_ctx(engine); > @@ -3809,7 +3809,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) > lrc_init_wa_ctx(engine); > > /* Finally, take ownership and responsibility for cleanup! */ > - engine->sanitize = guc_sanitize; > + engine->status_page.sanitize = guc_sanitize; > engine->release = guc_release; > > return 0; > -- > 2.34.1 >
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 98b61ff13c95..7e3a65b0661c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -844,6 +844,9 @@ static void cleanup_status_page(struct intel_engine_cs *engine) i915_gem_object_unpin_map(vma->obj); i915_gem_object_put(vma->obj); + + /* no longer in control, nothing left to sanitize */ + engine->status_page.sanitize = NULL; } static int pin_ggtt_status_page(struct intel_engine_cs *engine, @@ -1542,8 +1545,8 @@ void intel_engines_reset_default_submission(struct intel_gt *gt) enum intel_engine_id id; for_each_engine(engine, gt, id) { - if (engine->sanitize) - engine->sanitize(engine); + if (engine->status_page.sanitize) + engine->status_page.sanitize(engine); engine->set_default_submission(engine); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index eac20112709c..268249efd76c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -53,6 +53,7 @@ struct intel_gt; struct intel_ring; struct intel_uncore; struct intel_breadcrumbs; +struct intel_engine_cs; typedef u32 intel_engine_mask_t; #define ALL_ENGINES ((intel_engine_mask_t)~0ul) @@ -61,6 +62,8 @@ struct intel_hw_status_page { struct list_head timelines; struct i915_vma *vma; u32 *addr; + + void (*sanitize)(struct intel_engine_cs *engine); }; struct intel_instdone { @@ -445,7 +448,6 @@ struct intel_engine_cs { void (*irq_disable)(struct intel_engine_cs *engine); void (*irq_handler)(struct intel_engine_cs *engine, u16 iir); - void (*sanitize)(struct intel_engine_cs *engine); int (*resume)(struct intel_engine_cs *engine); struct { diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index e23c1d0e980b..c3b850e23c4b 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3328,8 +3328,6 @@ static void execlists_shutdown(struct intel_engine_cs *engine) static void execlists_release(struct intel_engine_cs *engine) { - engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ - execlists_shutdown(engine); intel_engine_cleanup_common(engine); @@ -3520,7 +3518,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) } /* Finally, take ownership and responsibility for cleanup! */ - engine->sanitize = execlists_sanitize; + engine->status_page.sanitize = execlists_sanitize; engine->release = execlists_release; return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index eeead40485fb..a38124748a7d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -189,8 +189,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force) intel_uc_reset_prepare(>->uc); for_each_engine(engine, gt, id) - if (engine->sanitize) - engine->sanitize(engine); + if (engine->status_page.sanitize) + engine->status_page.sanitize(engine); if (reset_engines(gt) || force) { for_each_engine(engine, gt, id) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 5423bfd301ad..a8d4398ea024 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -1121,7 +1121,7 @@ static void setup_common(struct intel_engine_cs *engine) setup_irq(engine); engine->resume = xcs_resume; - engine->sanitize = xcs_sanitize; + engine->status_page.sanitize = xcs_sanitize; engine->reset.prepare = reset_prepare; engine->reset.rewind = reset_rewind; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index e1612c393781..156a5a9e6a55 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -3676,7 +3676,7 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc) static void guc_release(struct intel_engine_cs *engine) { - engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ + tasklet_kill(&engine->sched_engine->tasklet); intel_engine_cleanup_common(engine); lrc_fini_wa_ctx(engine); @@ -3809,7 +3809,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) lrc_init_wa_ctx(engine); /* Finally, take ownership and responsibility for cleanup! */ - engine->sanitize = guc_sanitize; + engine->status_page.sanitize = guc_sanitize; engine->release = guc_release; return 0;