diff mbox series

[v7,4/6] drm/i915/uc: Move GuC error log to uc and release it on fini

Message ID 20190802184055.31988-5-michal.wajdeczko@intel.com (mailing list archive)
State New, archived
Headers show
Series add more probe failures | expand

Commit Message

Michal Wajdeczko Aug. 2, 2019, 6:40 p.m. UTC
When we fail to load GuC and want to abort probe, we hit:

<7> [229.915779] i915 0000:00:02.0: [drm:intel_uc_init_hw [i915]] GuC initialization failed -6
<7> [229.915813] i915 0000:00:02.0: [drm:i915_gem_init_hw [i915]] Enabling uc failed (-6)
<4> [229.953354] ------------[ cut here ]------------
<4> [229.953355] WARN_ON(dev_priv->mm.shrink_count)
<4> [229.953406] WARNING: CPU: 9 PID: 3287 at drivers/gpu/drm/i915/i915_gem.c:1684 i915_gem_cleanup_early+0xfc/0x110 [i915]
<4> [229.953464] Call Trace:
<4> [229.953489]  i915_driver_late_release+0x19/0x60 [i915]
<4> [229.953514]  i915_driver_probe+0xb82/0x18a0 [i915]
<4> [229.953519]  ? __pm_runtime_resume+0x4f/0x80
<4> [229.953545]  i915_pci_probe+0x43/0x1b0 [i915]
...
<4> [229.962951] ------------[ cut here ]------------
<4> [229.962956] DEBUG_LOCKS_WARN_ON(lock->magic != lock)
<4> [229.962959] WARNING: CPU: 8 PID: 2395 at kernel/locking/mutex.c:912 __mutex_lock+0x750/0x9b0
<4> [229.963091] Call Trace:
<4> [229.963129]  ? i915_vma_destroy+0x86/0x350 [i915]
<4> [229.963166]  ? i915_vma_destroy+0x86/0x350 [i915]
<4> [229.963201]  i915_vma_destroy+0x86/0x350 [i915]
<4> [229.963236]  __i915_gem_free_objects+0xb8/0x510 [i915]
<4> [229.963270]  __i915_gem_free_work+0x5a/0x90 [i915]
<4> [229.963275]  process_one_work+0x245/0x610

as since commit 6f76098fe0f3 ("drm/i915/uc: Move uC early functions
inside the GT ones") we cleanup uc after gem.

Move captured GuC load error log to uc struct and release it
in intel_uc_fini() instead of intel_uc_driver_late_release()

Note that intel_uc_driver_late_release() is now empty, but
we can leave it as a placeholder for future code.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/uc/intel_guc.h |  3 ---
 drivers/gpu/drm/i915/gt/uc/intel_uc.c  | 26 ++++++++++++--------------
 drivers/gpu/drm/i915/gt/uc/intel_uc.h  |  3 +++
 drivers/gpu/drm/i915/i915_debugfs.c    |  2 +-
 4 files changed, 16 insertions(+), 18 deletions(-)

Comments

Chris Wilson Aug. 2, 2019, 7:41 p.m. UTC | #1
Quoting Michal Wajdeczko (2019-08-02 19:40:53)
> When we fail to load GuC and want to abort probe, we hit:
> 
> <7> [229.915779] i915 0000:00:02.0: [drm:intel_uc_init_hw [i915]] GuC initialization failed -6
> <7> [229.915813] i915 0000:00:02.0: [drm:i915_gem_init_hw [i915]] Enabling uc failed (-6)
> <4> [229.953354] ------------[ cut here ]------------
> <4> [229.953355] WARN_ON(dev_priv->mm.shrink_count)
> <4> [229.953406] WARNING: CPU: 9 PID: 3287 at drivers/gpu/drm/i915/i915_gem.c:1684 i915_gem_cleanup_early+0xfc/0x110 [i915]
> <4> [229.953464] Call Trace:
> <4> [229.953489]  i915_driver_late_release+0x19/0x60 [i915]
> <4> [229.953514]  i915_driver_probe+0xb82/0x18a0 [i915]
> <4> [229.953519]  ? __pm_runtime_resume+0x4f/0x80
> <4> [229.953545]  i915_pci_probe+0x43/0x1b0 [i915]
> ...
> <4> [229.962951] ------------[ cut here ]------------
> <4> [229.962956] DEBUG_LOCKS_WARN_ON(lock->magic != lock)
> <4> [229.962959] WARNING: CPU: 8 PID: 2395 at kernel/locking/mutex.c:912 __mutex_lock+0x750/0x9b0
> <4> [229.963091] Call Trace:
> <4> [229.963129]  ? i915_vma_destroy+0x86/0x350 [i915]
> <4> [229.963166]  ? i915_vma_destroy+0x86/0x350 [i915]
> <4> [229.963201]  i915_vma_destroy+0x86/0x350 [i915]
> <4> [229.963236]  __i915_gem_free_objects+0xb8/0x510 [i915]
> <4> [229.963270]  __i915_gem_free_work+0x5a/0x90 [i915]
> <4> [229.963275]  process_one_work+0x245/0x610
> 
> as since commit 6f76098fe0f3 ("drm/i915/uc: Move uC early functions
> inside the GT ones") we cleanup uc after gem.
> 
> Move captured GuC load error log to uc struct and release it
> in intel_uc_fini() instead of intel_uc_driver_late_release()
> 
> Note that intel_uc_driver_late_release() is now empty, but
> we can leave it as a placeholder for future code.
> 
> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>

Well done, I was looking for an incorrect onion and didn't spot it, so
thought it was just the deferred free.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 6edb29b9ceaa..cc035c9781ae 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -47,9 +47,6 @@  struct intel_guc {
 	struct intel_guc_log log;
 	struct intel_guc_ct ct;
 
-	/* Log snapshot if GuC errors during load */
-	struct drm_i915_gem_object *load_err_log;
-
 	/* intel_guc_recv interrupt related state */
 	spinlock_t irq_lock;
 	unsigned int msg_enabled_mask;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index f24860a86d0e..e5421c0b9a25 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -31,8 +31,6 @@ 
 
 #include "i915_drv.h"
 
-static void guc_free_load_err_log(struct intel_guc *guc);
-
 /* Reset GuC providing us with fresh state for both GuC and HuC.
  */
 static int __intel_uc_reset_hw(struct intel_uc *uc)
@@ -103,7 +101,6 @@  void intel_uc_init_early(struct intel_uc *uc)
 
 void intel_uc_driver_late_release(struct intel_uc *uc)
 {
-	guc_free_load_err_log(&uc->guc);
 }
 
 /**
@@ -118,21 +115,20 @@  void intel_uc_init_mmio(struct intel_uc *uc)
 	intel_guc_init_send_regs(&uc->guc);
 }
 
-static void guc_capture_load_err_log(struct intel_guc *guc)
+static void __uc_capture_load_err_log(struct intel_uc *uc)
 {
-	if (!guc->log.vma || !intel_guc_log_get_level(&guc->log))
-		return;
-
-	if (!guc->load_err_log)
-		guc->load_err_log = i915_gem_object_get(guc->log.vma->obj);
+	struct intel_guc *guc = &uc->guc;
 
-	return;
+	if (guc->log.vma && !uc->load_err_log)
+		uc->load_err_log = i915_gem_object_get(guc->log.vma->obj);
 }
 
-static void guc_free_load_err_log(struct intel_guc *guc)
+static void __uc_free_load_err_log(struct intel_uc *uc)
 {
-	if (guc->load_err_log)
-		i915_gem_object_put(guc->load_err_log);
+	struct drm_i915_gem_object *log = fetch_and_zero(&uc->load_err_log);
+
+	if (log)
+		i915_gem_object_put(log);
 }
 
 /*
@@ -338,6 +334,8 @@  void intel_uc_fini(struct intel_uc *uc)
 		intel_huc_fini(&uc->huc);
 
 	intel_guc_fini(guc);
+
+	__uc_free_load_err_log(uc);
 }
 
 static int __uc_sanitize(struct intel_uc *uc)
@@ -493,7 +491,7 @@  int intel_uc_init_hw(struct intel_uc *uc)
 err_communication:
 	guc_disable_communication(guc);
 err_log_capture:
-	guc_capture_load_err_log(guc);
+	__uc_capture_load_err_log(uc);
 err_out:
 	__uc_sanitize(uc);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
index 0cca839422e2..cf9ee3c27877 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
@@ -31,6 +31,9 @@ 
 struct intel_uc {
 	struct intel_guc guc;
 	struct intel_huc huc;
+
+	/* Snapshot of GuC log from last failed load */
+	struct drm_i915_gem_object *load_err_log;
 };
 
 void intel_uc_init_early(struct intel_uc *uc);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 6dbd85b38759..461a8dd4cc47 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2066,7 +2066,7 @@  static int i915_guc_log_dump(struct seq_file *m, void *data)
 		return -ENODEV;
 
 	if (dump_load_err)
-		obj = dev_priv->gt.uc.guc.load_err_log;
+		obj = dev_priv->gt.uc.load_err_log;
 	else if (dev_priv->gt.uc.guc.log.vma)
 		obj = dev_priv->gt.uc.guc.log.vma->obj;