Message ID | 20190802184055.31988-5-michal.wajdeczko@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | add more probe failures | expand |
Quoting Michal Wajdeczko (2019-08-02 19:40:53) > When we fail to load GuC and want to abort probe, we hit: > > <7> [229.915779] i915 0000:00:02.0: [drm:intel_uc_init_hw [i915]] GuC initialization failed -6 > <7> [229.915813] i915 0000:00:02.0: [drm:i915_gem_init_hw [i915]] Enabling uc failed (-6) > <4> [229.953354] ------------[ cut here ]------------ > <4> [229.953355] WARN_ON(dev_priv->mm.shrink_count) > <4> [229.953406] WARNING: CPU: 9 PID: 3287 at drivers/gpu/drm/i915/i915_gem.c:1684 i915_gem_cleanup_early+0xfc/0x110 [i915] > <4> [229.953464] Call Trace: > <4> [229.953489] i915_driver_late_release+0x19/0x60 [i915] > <4> [229.953514] i915_driver_probe+0xb82/0x18a0 [i915] > <4> [229.953519] ? __pm_runtime_resume+0x4f/0x80 > <4> [229.953545] i915_pci_probe+0x43/0x1b0 [i915] > ... > <4> [229.962951] ------------[ cut here ]------------ > <4> [229.962956] DEBUG_LOCKS_WARN_ON(lock->magic != lock) > <4> [229.962959] WARNING: CPU: 8 PID: 2395 at kernel/locking/mutex.c:912 __mutex_lock+0x750/0x9b0 > <4> [229.963091] Call Trace: > <4> [229.963129] ? i915_vma_destroy+0x86/0x350 [i915] > <4> [229.963166] ? i915_vma_destroy+0x86/0x350 [i915] > <4> [229.963201] i915_vma_destroy+0x86/0x350 [i915] > <4> [229.963236] __i915_gem_free_objects+0xb8/0x510 [i915] > <4> [229.963270] __i915_gem_free_work+0x5a/0x90 [i915] > <4> [229.963275] process_one_work+0x245/0x610 > > as since commit 6f76098fe0f3 ("drm/i915/uc: Move uC early functions > inside the GT ones") we cleanup uc after gem. > > Move captured GuC load error log to uc struct and release it > in intel_uc_fini() instead of intel_uc_driver_late_release() > > Note that intel_uc_driver_late_release() is now empty, but > we can leave it as a placeholder for future code. > > Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> Well done, I was looking for an incorrect onion and didn't spot it, so thought it was just the deferred free. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> -Chris
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 6edb29b9ceaa..cc035c9781ae 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -47,9 +47,6 @@ struct intel_guc { struct intel_guc_log log; struct intel_guc_ct ct; - /* Log snapshot if GuC errors during load */ - struct drm_i915_gem_object *load_err_log; - /* intel_guc_recv interrupt related state */ spinlock_t irq_lock; unsigned int msg_enabled_mask; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index f24860a86d0e..e5421c0b9a25 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -31,8 +31,6 @@ #include "i915_drv.h" -static void guc_free_load_err_log(struct intel_guc *guc); - /* Reset GuC providing us with fresh state for both GuC and HuC. */ static int __intel_uc_reset_hw(struct intel_uc *uc) @@ -103,7 +101,6 @@ void intel_uc_init_early(struct intel_uc *uc) void intel_uc_driver_late_release(struct intel_uc *uc) { - guc_free_load_err_log(&uc->guc); } /** @@ -118,21 +115,20 @@ void intel_uc_init_mmio(struct intel_uc *uc) intel_guc_init_send_regs(&uc->guc); } -static void guc_capture_load_err_log(struct intel_guc *guc) +static void __uc_capture_load_err_log(struct intel_uc *uc) { - if (!guc->log.vma || !intel_guc_log_get_level(&guc->log)) - return; - - if (!guc->load_err_log) - guc->load_err_log = i915_gem_object_get(guc->log.vma->obj); + struct intel_guc *guc = &uc->guc; - return; + if (guc->log.vma && !uc->load_err_log) + uc->load_err_log = i915_gem_object_get(guc->log.vma->obj); } -static void guc_free_load_err_log(struct intel_guc *guc) +static void __uc_free_load_err_log(struct intel_uc *uc) { - if (guc->load_err_log) - i915_gem_object_put(guc->load_err_log); + struct drm_i915_gem_object *log = fetch_and_zero(&uc->load_err_log); + + if (log) + i915_gem_object_put(log); } /* @@ -338,6 +334,8 @@ void intel_uc_fini(struct intel_uc *uc) intel_huc_fini(&uc->huc); intel_guc_fini(guc); + + __uc_free_load_err_log(uc); } static int __uc_sanitize(struct intel_uc *uc) @@ -493,7 +491,7 @@ int intel_uc_init_hw(struct intel_uc *uc) err_communication: guc_disable_communication(guc); err_log_capture: - guc_capture_load_err_log(guc); + __uc_capture_load_err_log(uc); err_out: __uc_sanitize(uc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h index 0cca839422e2..cf9ee3c27877 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -31,6 +31,9 @@ struct intel_uc { struct intel_guc guc; struct intel_huc huc; + + /* Snapshot of GuC log from last failed load */ + struct drm_i915_gem_object *load_err_log; }; void intel_uc_init_early(struct intel_uc *uc); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6dbd85b38759..461a8dd4cc47 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2066,7 +2066,7 @@ static int i915_guc_log_dump(struct seq_file *m, void *data) return -ENODEV; if (dump_load_err) - obj = dev_priv->gt.uc.guc.load_err_log; + obj = dev_priv->gt.uc.load_err_log; else if (dev_priv->gt.uc.guc.log.vma) obj = dev_priv->gt.uc.guc.log.vma->obj;
When we fail to load GuC and want to abort probe, we hit: <7> [229.915779] i915 0000:00:02.0: [drm:intel_uc_init_hw [i915]] GuC initialization failed -6 <7> [229.915813] i915 0000:00:02.0: [drm:i915_gem_init_hw [i915]] Enabling uc failed (-6) <4> [229.953354] ------------[ cut here ]------------ <4> [229.953355] WARN_ON(dev_priv->mm.shrink_count) <4> [229.953406] WARNING: CPU: 9 PID: 3287 at drivers/gpu/drm/i915/i915_gem.c:1684 i915_gem_cleanup_early+0xfc/0x110 [i915] <4> [229.953464] Call Trace: <4> [229.953489] i915_driver_late_release+0x19/0x60 [i915] <4> [229.953514] i915_driver_probe+0xb82/0x18a0 [i915] <4> [229.953519] ? __pm_runtime_resume+0x4f/0x80 <4> [229.953545] i915_pci_probe+0x43/0x1b0 [i915] ... <4> [229.962951] ------------[ cut here ]------------ <4> [229.962956] DEBUG_LOCKS_WARN_ON(lock->magic != lock) <4> [229.962959] WARNING: CPU: 8 PID: 2395 at kernel/locking/mutex.c:912 __mutex_lock+0x750/0x9b0 <4> [229.963091] Call Trace: <4> [229.963129] ? i915_vma_destroy+0x86/0x350 [i915] <4> [229.963166] ? i915_vma_destroy+0x86/0x350 [i915] <4> [229.963201] i915_vma_destroy+0x86/0x350 [i915] <4> [229.963236] __i915_gem_free_objects+0xb8/0x510 [i915] <4> [229.963270] __i915_gem_free_work+0x5a/0x90 [i915] <4> [229.963275] process_one_work+0x245/0x610 as since commit 6f76098fe0f3 ("drm/i915/uc: Move uC early functions inside the GT ones") we cleanup uc after gem. Move captured GuC load error log to uc struct and release it in intel_uc_fini() instead of intel_uc_driver_late_release() Note that intel_uc_driver_late_release() is now empty, but we can leave it as a placeholder for future code. Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 3 --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 26 ++++++++++++-------------- drivers/gpu/drm/i915/gt/uc/intel_uc.h | 3 +++ drivers/gpu/drm/i915/i915_debugfs.c | 2 +- 4 files changed, 16 insertions(+), 18 deletions(-)