From patchwork Mon May 22 17:46:31 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Michel Thierry X-Patchwork-Id: 9741227 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork.web.codeaurora.org (Postfix) with ESMTP id E0F19601C2 for ; Mon, 22 May 2017 17:47:02 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id CF10A28723 for ; Mon, 22 May 2017 17:47:02 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id C3F892872A; Mon, 22 May 2017 17:47:02 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-4.2 required=2.0 tests=BAYES_00, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 6BEE128724 for ; Mon, 22 May 2017 17:47:02 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 249606E265; Mon, 22 May 2017 17:46:57 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by gabe.freedesktop.org (Postfix) with ESMTPS id CD9746E259 for ; Mon, 22 May 2017 17:46:46 +0000 (UTC) Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by fmsmga102.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 22 May 2017 10:46:46 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.38,378,1491289200"; d="scan'208";a="860055324" Received: from relo-linux-11.sc.intel.com ([10.3.160.214]) by FMSMGA003.fm.intel.com with ESMTP; 22 May 2017 10:46:43 -0700 From: Michel Thierry To: intel-gfx@lists.freedesktop.org Date: Mon, 22 May 2017 10:46:31 -0700 Message-Id: <20170522174641.25354-11-michel.thierry@intel.com> X-Mailer: git-send-email 2.11.0 In-Reply-To: <20170522174641.25354-1-michel.thierry@intel.com> References: <20170522174641.25354-1-michel.thierry@intel.com> Subject: [Intel-gfx] [PATCH v8 10/20] drm/i915/selftests: reset engine self tests X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.18 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Virus-Scanned: ClamAV using ClamSMTP Check that we can reset specific engines, also check the fallback to full reset if something didn't work. v2: rebase. v3: use RESET_ENGINE_IN_PROGRESS flag. Signed-off-by: Michel Thierry --- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 149 +++++++++++++++++++++++ 1 file changed, 149 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index aa31d6c0cdfb..8a3edb8bd440 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -322,6 +322,56 @@ static int igt_global_reset(void *arg) return err; } +static int igt_reset_engine(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned int reset_count, reset_engine_count; + int err = 0; + + /* Check that we can issue a global GPU and engine reset */ + + if (!intel_has_gpu_reset(i915)) + return 0; + + if (!intel_has_reset_engine(i915)) + return 0; + + set_bit(I915_RESET_ENGINE_IN_PROGRESS, &i915->gpu_error.flags); + + for_each_engine(engine, i915, id) { + reset_count = i915_reset_count(&i915->gpu_error); + reset_engine_count = i915_reset_engine_count(&i915->gpu_error, + engine); + + err = i915_reset_engine(engine); + if (err) { + pr_err("i915_reset_engine failed\n"); + break; + } + + if (i915_reset_count(&i915->gpu_error) != reset_count) { + pr_err("Full GPU reset recorded! (engine reset expected)\n"); + err = -EINVAL; + break; + } + + if (i915_reset_engine_count(&i915->gpu_error, engine) == + reset_engine_count) { + pr_err("No %s engine reset recorded!\n", engine->name); + err = -EINVAL; + break; + } + } + + clear_bit(I915_RESET_ENGINE_IN_PROGRESS, &i915->gpu_error.flags); + if (i915_terminally_wedged(&i915->gpu_error)) + err = -EIO; + + return err; +} + static u32 fake_hangcheck(struct drm_i915_gem_request *rq) { u32 reset_count; @@ -526,13 +576,112 @@ static int igt_reset_queue(void *arg) return err; } +static int igt_render_engine_reset_fallback(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine = i915->engine[RCS]; + struct hang h; + struct drm_i915_gem_request *rq; + unsigned int reset_count, reset_engine_count; + int err = 0; + + /* Check that we can issue a global GPU and engine reset */ + + if (!intel_has_gpu_reset(i915)) + return 0; + + if (!intel_has_reset_engine(i915)) + return 0; + + set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); + mutex_lock(&i915->drm.struct_mutex); + + err = hang_init(&h, i915); + if (err) + goto unlock; + + rq = hang_create_request(&h, engine, i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto fini; + } + + i915_gem_request_get(rq); + __i915_add_request(rq, true); + + /* make reset engine fail */ + rq->fence.error = -EIO; + + if (!wait_for_hang(&h, rq)) { + pr_err("Failed to start request %x\n", rq->fence.seqno); + err = -EIO; + goto fini; + } + + reset_engine_count = i915_reset_engine_count(&i915->gpu_error, engine); + reset_count = fake_hangcheck(rq); + + /* unlock since we'll call handle_error */ + mutex_unlock(&i915->drm.struct_mutex); + clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); + + i915_handle_error(i915, intel_engine_flag(engine), "live test"); + + if (i915_reset_engine_count(&i915->gpu_error, engine) != + reset_engine_count) { + pr_err("render engine reset recorded! (full reset expected)\n"); + err = -EINVAL; + goto fini; + } + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No full GPU reset recorded!\n"); + err = -EINVAL; + goto fini; + } + + /* + * by using fence.error = -EIO, full reset sets the wedged flag, do one + * more full reset to re-enable the hw. + */ + if (i915_terminally_wedged(&i915->gpu_error)) { + set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); + mutex_lock(&i915->drm.struct_mutex); + rq->fence.error = 0; + + set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags); + i915_reset(i915); + GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, + &i915->gpu_error.flags)); + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No full GPU reset recorded!\n"); + err = -EINVAL; + goto fini; + } + } + +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); + + if (i915_terminally_wedged(&i915->gpu_error)) + return -EIO; + + return err; +} + int intel_hangcheck_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_hang_sanitycheck), SUBTEST(igt_global_reset), + SUBTEST(igt_reset_engine), SUBTEST(igt_wait_reset), SUBTEST(igt_reset_queue), + SUBTEST(igt_render_engine_reset_fallback), }; if (!intel_has_gpu_reset(i915))