From patchwork Mon May 15 16:35:30 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Daniele Ceraolo Spurio X-Patchwork-Id: 9727553 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork.web.codeaurora.org (Postfix) with ESMTP id 06C5460386 for ; Mon, 15 May 2017 16:35:41 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id F071028985 for ; Mon, 15 May 2017 16:35:40 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id E4CC22898C; Mon, 15 May 2017 16:35:40 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-4.2 required=2.0 tests=BAYES_00, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 6B9CE28985 for ; Mon, 15 May 2017 16:35:40 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 5969C6E252; Mon, 15 May 2017 16:35:38 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by gabe.freedesktop.org (Postfix) with ESMTPS id 68FA16E252 for ; Mon, 15 May 2017 16:35:37 +0000 (UTC) Received: from fmsmga006.fm.intel.com ([10.253.24.20]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 15 May 2017 09:35:35 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.38,345,1491289200"; d="scan'208";a="102329503" Received: from relo-linux-1.fm.intel.com ([10.1.27.112]) by fmsmga006.fm.intel.com with ESMTP; 15 May 2017 09:35:35 -0700 From: Daniele Ceraolo Spurio To: intel-gfx@lists.freedesktop.org Date: Mon, 15 May 2017 09:35:30 -0700 Message-Id: <1494866130-28051-1-git-send-email-daniele.ceraolospurio@intel.com> X-Mailer: git-send-email 1.9.1 Subject: [Intel-gfx] [PATCH v4] drm/i915/guc: capture GuC logs if FW fails to load X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.18 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Virus-Scanned: ClamAV using ClamSMTP We're currently deleting the GuC logs if the FW fails to load, but those are still useful to understand why the loading failed. Keeping the object around allows us to access them after driver load is completed. v2: keep the object around instead of using kernel memory (chris) don't store the logs in the gpu_error struct (Chris) add a check on guc_log_level to avoid snapshotting empty logs v3: use separate debugfs for error log (Chris) v4: rebased Reviewed-by: Chris Wilson (v3) Cc: Chris Wilson Cc: Oscar Mateo Cc: Michal Wajdeczko Signed-off-by: Daniele Ceraolo Spurio --- drivers/gpu/drm/i915/i915_debugfs.c | 35 ++++++++++++++++++++++------------- drivers/gpu/drm/i915/i915_drv.c | 3 +++ drivers/gpu/drm/i915/intel_guc_log.c | 17 +++++++++++++++++ drivers/gpu/drm/i915/intel_uc.c | 7 +++++-- drivers/gpu/drm/i915/intel_uc.h | 5 +++++ 5 files changed, 52 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index bd9abef..d9a5bd8 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2607,27 +2607,35 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data) static int i915_guc_log_dump(struct seq_file *m, void *data) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_info_node *node = m->private; + struct drm_i915_private *dev_priv = node_to_i915(node); + bool dump_err_log = !!node->info_ent->data; struct drm_i915_gem_object *obj; - int i = 0, pg; + u32 *log; + int i = 0; - if (!dev_priv->guc.log.vma) + if (!dump_err_log && dev_priv->guc.log.vma) + obj = dev_priv->guc.log.vma->obj; + else if (dump_err_log && dev_priv->guc.err_load_log) + obj = dev_priv->guc.err_load_log; + else return 0; - obj = dev_priv->guc.log.vma->obj; - for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) { - u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg)); - - for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4) - seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n", - *(log + i), *(log + i + 1), - *(log + i + 2), *(log + i + 3)); - - kunmap_atomic(log); + log = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(log)) { + DRM_ERROR("Failed to pin guc_log object\n"); + return PTR_ERR(log); } + for (i = 0; i < obj->base.size / sizeof(u32); i += 4) + seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n", + *(log + i), *(log + i + 1), + *(log + i + 2), *(log + i + 3)); + seq_putc(m, '\n'); + i915_gem_object_unpin_map(obj); + return 0; } @@ -4811,6 +4819,7 @@ static int i915_hpd_storm_ctl_open(struct inode *inode, struct file *file) {"i915_guc_info", i915_guc_info, 0}, {"i915_guc_load_status", i915_guc_load_status_info, 0}, {"i915_guc_log_dump", i915_guc_log_dump, 0}, + {"i915_guc_err_load_log_dump", i915_guc_log_dump, 0, (void *)1}, {"i915_guc_stage_pool", i915_guc_stage_pool, 0}, {"i915_huc_load_status", i915_huc_load_status_info, 0}, {"i915_frequency_info", i915_frequency_info, 0}, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 72fb47a..d309165 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1363,6 +1363,9 @@ void i915_driver_unload(struct drm_device *dev) cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); i915_reset_error_state(dev_priv); + /* release GuC error log (if any) */ + i915_guc_load_error_log_free(&dev_priv->guc); + /* Flush any outstanding unpin_work. */ drain_workqueue(dev_priv->wq); diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index 16d3b87..691da42 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -660,3 +660,20 @@ void i915_guc_log_unregister(struct drm_i915_private *dev_priv) guc_log_runtime_destroy(&dev_priv->guc); mutex_unlock(&dev_priv->drm.struct_mutex); } + +void i915_guc_load_error_log_capture(struct intel_guc *guc) +{ + if (!guc->log.vma || i915.guc_log_level < 0) + return; + + if (!guc->err_load_log) + guc->err_load_log = i915_gem_object_get(guc->log.vma->obj); + + return; +} + +void i915_guc_load_error_log_free(struct intel_guc *guc) +{ + if (guc->err_load_log) + i915_gem_object_put(guc->err_load_log); +} diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 07c5658..3669b57 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -309,6 +309,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) guc_disable_communication(guc); gen9_reset_guc_interrupts(dev_priv); + i915_guc_load_error_log_free(guc); /* We need to notify the guc whenever we change the GGTT */ i915_ggtt_enable_guc(dev_priv); @@ -355,11 +356,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) /* Did we succeded or run out of retries? */ if (ret) - goto err_submission; + goto err_log_capture; ret = guc_enable_communication(guc); if (ret) - goto err_submission; + goto err_log_capture; intel_guc_auth_huc(dev_priv); if (i915.enable_guc_submission) { @@ -385,6 +386,8 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) err_interrupts: guc_disable_communication(guc); gen9_disable_guc_interrupts(dev_priv); +err_log_capture: + i915_guc_load_error_log_capture(guc); err_submission: if (i915.enable_guc_submission) i915_guc_submission_fini(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 7618b71..d2ed030 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -220,6 +220,9 @@ struct intel_guc { /* GuC's FW specific notify function */ void (*notify)(struct intel_guc *guc); + + /* Log snapshot if GuC errors during load */ + struct drm_i915_gem_object *err_load_log; }; struct intel_huc { @@ -272,6 +275,8 @@ static inline void intel_guc_notify(struct intel_guc *guc) int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); void i915_guc_log_register(struct drm_i915_private *dev_priv); void i915_guc_log_unregister(struct drm_i915_private *dev_priv); +void i915_guc_load_error_log_capture(struct intel_guc *guc); +void i915_guc_load_error_log_free(struct intel_guc *guc); static inline u32 guc_ggtt_offset(struct i915_vma *vma) {