diff mbox series

[v2,2/2] drm/i915/guc: Add a selftest for FAST_REQUEST errors

Message ID 20231114010016.234570-3-John.C.Harrison@Intel.com (mailing list archive)
State New, archived
Headers show
Series Selftest for FAST_REQUEST feature | expand

Commit Message

John Harrison Nov. 14, 2023, 1 a.m. UTC
From: John Harrison <John.C.Harrison@Intel.com>

There is a mechanism for reporting errors from fire and forget H2G
messages. This is the only way to find out about almost any error in
the GuC backend submission path. So it would be useful to know that it
is working.

v2: Fix some dumb over-complications and a couple of typos - review
feedback from Daniele.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/gt/uc/intel_guc.h    |   4 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |   9 ++
 drivers/gpu/drm/i915/gt/uc/selftest_guc.c | 115 ++++++++++++++++++++++
 3 files changed, 128 insertions(+)

Comments

Daniele Ceraolo Spurio Nov. 29, 2023, 9:22 p.m. UTC | #1
On 11/13/2023 5:00 PM, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
>
> There is a mechanism for reporting errors from fire and forget H2G
> messages. This is the only way to find out about almost any error in
> the GuC backend submission path. So it would be useful to know that it
> is working.
>
> v2: Fix some dumb over-complications and a couple of typos - review
> feedback from Daniele.
>
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>

Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

Daniele

> ---
>   drivers/gpu/drm/i915/gt/uc/intel_guc.h    |   4 +
>   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |   9 ++
>   drivers/gpu/drm/i915/gt/uc/selftest_guc.c | 115 ++++++++++++++++++++++
>   3 files changed, 128 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> index 2b6dfe62c8f2a..e22c12ce245ad 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> @@ -297,6 +297,10 @@ struct intel_guc {
>   	 * @number_guc_id_stolen: The number of guc_ids that have been stolen
>   	 */
>   	int number_guc_id_stolen;
> +	/**
> +	 * @fast_response_selftest: Backdoor to CT handler for fast response selftest
> +	 */
> +	u32 fast_response_selftest;
>   #endif
>   };
>   
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> index 89e314b3756bb..ed6ce73ef3b07 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> @@ -1076,6 +1076,15 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
>   		found = true;
>   		break;
>   	}
> +
> +#ifdef CONFIG_DRM_I915_SELFTEST
> +	if (!found && ct_to_guc(ct)->fast_response_selftest) {
> +		CT_DEBUG(ct, "Assuming unsolicited response due to FAST_REQUEST selftest\n");
> +		ct_to_guc(ct)->fast_response_selftest++;
> +		found = true;
> +	}
> +#endif
> +
>   	if (!found) {
>   		CT_ERROR(ct, "Unsolicited response message: len %u, data %#x (fence %u, last %u)\n",
>   			 len, hxg[0], fence, ct->requests.last_fence);
> diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
> index bfb72143566f6..c900aac85adba 100644
> --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
> @@ -286,11 +286,126 @@ static int intel_guc_steal_guc_ids(void *arg)
>   	return ret;
>   }
>   
> +/*
> + * Send a context schedule H2G message with an invalid context id.
> + * This should generate a GUC_RESULT_INVALID_CONTEXT response.
> + */
> +static int bad_h2g(struct intel_guc *guc)
> +{
> +	u32 action[] = {
> +	   INTEL_GUC_ACTION_SCHED_CONTEXT,
> +	   0x12345678,
> +	};
> +
> +	return intel_guc_send_nb(guc, action, ARRAY_SIZE(action), 0);
> +}
> +
> +/*
> + * Set a spinner running to make sure the system is alive and active,
> + * then send a bad but asynchronous H2G command and wait to see if an
> + * error response is returned. If no response is received or if the
> + * spinner dies then the test will fail.
> + */
> +#define FAST_RESPONSE_TIMEOUT_MS	1000
> +static int intel_guc_fast_request(void *arg)
> +{
> +	struct intel_gt *gt = arg;
> +	struct intel_context *ce;
> +	struct igt_spinner spin;
> +	struct i915_request *rq;
> +	intel_wakeref_t wakeref;
> +	struct intel_engine_cs *engine = intel_selftest_find_any_engine(gt);
> +	bool spinning = false;
> +	int ret = 0;
> +
> +	if (!engine)
> +		return 0;
> +
> +	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
> +
> +	ce = intel_context_create(engine);
> +	if (IS_ERR(ce)) {
> +		ret = PTR_ERR(ce);
> +		gt_err(gt, "Failed to create spinner request: %pe\n", ce);
> +		goto err_pm;
> +	}
> +
> +	ret = igt_spinner_init(&spin, engine->gt);
> +	if (ret) {
> +		gt_err(gt, "Failed to create spinner: %pe\n", ERR_PTR(ret));
> +		goto err_pm;
> +	}
> +	spinning = true;
> +
> +	rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
> +	intel_context_put(ce);
> +	if (IS_ERR(rq)) {
> +		ret = PTR_ERR(rq);
> +		gt_err(gt, "Failed to create spinner request: %pe\n", rq);
> +		goto err_spin;
> +	}
> +
> +	ret = request_add_spin(rq, &spin);
> +	if (ret) {
> +		gt_err(gt, "Failed to add Spinner request: %pe\n", ERR_PTR(ret));
> +		goto err_rq;
> +	}
> +
> +	gt->uc.guc.fast_response_selftest = 1;
> +
> +	ret = bad_h2g(&gt->uc.guc);
> +	if (ret) {
> +		gt_err(gt, "Failed to send H2G: %pe\n", ERR_PTR(ret));
> +		goto err_rq;
> +	}
> +
> +	ret = wait_for(gt->uc.guc.fast_response_selftest != 1 || i915_request_completed(rq),
> +		       FAST_RESPONSE_TIMEOUT_MS);
> +	if (ret) {
> +		gt_err(gt, "Request wait failed: %pe\n", ERR_PTR(ret));
> +		goto err_rq;
> +	}
> +
> +	if (i915_request_completed(rq)) {
> +		gt_err(gt, "Spinner died waiting for fast request error!\n");
> +		ret = -EIO;
> +		goto err_rq;
> +	}
> +
> +	if (gt->uc.guc.fast_response_selftest != 2) {
> +		gt_err(gt, "Unexpected fast response count: %d\n",
> +		       gt->uc.guc.fast_response_selftest);
> +		goto err_rq;
> +	}
> +
> +	igt_spinner_end(&spin);
> +	spinning = false;
> +
> +	ret = intel_selftest_wait_for_rq(rq);
> +	if (ret) {
> +		gt_err(gt, "Request failed to complete: %pe\n", ERR_PTR(ret));
> +		goto err_rq;
> +	}
> +
> +err_rq:
> +	i915_request_put(rq);
> +
> +err_spin:
> +	if (spinning)
> +		igt_spinner_end(&spin);
> +	igt_spinner_fini(&spin);
> +
> +err_pm:
> +	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
> +	return ret;
> +}
> +
>   int intel_guc_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
>   		SUBTEST(intel_guc_scrub_ctbs),
>   		SUBTEST(intel_guc_steal_guc_ids),
> +		SUBTEST(intel_guc_fast_request),
>   	};
>   	struct intel_gt *gt = to_gt(i915);
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 2b6dfe62c8f2a..e22c12ce245ad 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -297,6 +297,10 @@  struct intel_guc {
 	 * @number_guc_id_stolen: The number of guc_ids that have been stolen
 	 */
 	int number_guc_id_stolen;
+	/**
+	 * @fast_response_selftest: Backdoor to CT handler for fast response selftest
+	 */
+	u32 fast_response_selftest;
 #endif
 };
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 89e314b3756bb..ed6ce73ef3b07 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -1076,6 +1076,15 @@  static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
 		found = true;
 		break;
 	}
+
+#ifdef CONFIG_DRM_I915_SELFTEST
+	if (!found && ct_to_guc(ct)->fast_response_selftest) {
+		CT_DEBUG(ct, "Assuming unsolicited response due to FAST_REQUEST selftest\n");
+		ct_to_guc(ct)->fast_response_selftest++;
+		found = true;
+	}
+#endif
+
 	if (!found) {
 		CT_ERROR(ct, "Unsolicited response message: len %u, data %#x (fence %u, last %u)\n",
 			 len, hxg[0], fence, ct->requests.last_fence);
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
index bfb72143566f6..c900aac85adba 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -286,11 +286,126 @@  static int intel_guc_steal_guc_ids(void *arg)
 	return ret;
 }
 
+/*
+ * Send a context schedule H2G message with an invalid context id.
+ * This should generate a GUC_RESULT_INVALID_CONTEXT response.
+ */
+static int bad_h2g(struct intel_guc *guc)
+{
+	u32 action[] = {
+	   INTEL_GUC_ACTION_SCHED_CONTEXT,
+	   0x12345678,
+	};
+
+	return intel_guc_send_nb(guc, action, ARRAY_SIZE(action), 0);
+}
+
+/*
+ * Set a spinner running to make sure the system is alive and active,
+ * then send a bad but asynchronous H2G command and wait to see if an
+ * error response is returned. If no response is received or if the
+ * spinner dies then the test will fail.
+ */
+#define FAST_RESPONSE_TIMEOUT_MS	1000
+static int intel_guc_fast_request(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_context *ce;
+	struct igt_spinner spin;
+	struct i915_request *rq;
+	intel_wakeref_t wakeref;
+	struct intel_engine_cs *engine = intel_selftest_find_any_engine(gt);
+	bool spinning = false;
+	int ret = 0;
+
+	if (!engine)
+		return 0;
+
+	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce)) {
+		ret = PTR_ERR(ce);
+		gt_err(gt, "Failed to create spinner request: %pe\n", ce);
+		goto err_pm;
+	}
+
+	ret = igt_spinner_init(&spin, engine->gt);
+	if (ret) {
+		gt_err(gt, "Failed to create spinner: %pe\n", ERR_PTR(ret));
+		goto err_pm;
+	}
+	spinning = true;
+
+	rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+	intel_context_put(ce);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+		gt_err(gt, "Failed to create spinner request: %pe\n", rq);
+		goto err_spin;
+	}
+
+	ret = request_add_spin(rq, &spin);
+	if (ret) {
+		gt_err(gt, "Failed to add Spinner request: %pe\n", ERR_PTR(ret));
+		goto err_rq;
+	}
+
+	gt->uc.guc.fast_response_selftest = 1;
+
+	ret = bad_h2g(&gt->uc.guc);
+	if (ret) {
+		gt_err(gt, "Failed to send H2G: %pe\n", ERR_PTR(ret));
+		goto err_rq;
+	}
+
+	ret = wait_for(gt->uc.guc.fast_response_selftest != 1 || i915_request_completed(rq),
+		       FAST_RESPONSE_TIMEOUT_MS);
+	if (ret) {
+		gt_err(gt, "Request wait failed: %pe\n", ERR_PTR(ret));
+		goto err_rq;
+	}
+
+	if (i915_request_completed(rq)) {
+		gt_err(gt, "Spinner died waiting for fast request error!\n");
+		ret = -EIO;
+		goto err_rq;
+	}
+
+	if (gt->uc.guc.fast_response_selftest != 2) {
+		gt_err(gt, "Unexpected fast response count: %d\n",
+		       gt->uc.guc.fast_response_selftest);
+		goto err_rq;
+	}
+
+	igt_spinner_end(&spin);
+	spinning = false;
+
+	ret = intel_selftest_wait_for_rq(rq);
+	if (ret) {
+		gt_err(gt, "Request failed to complete: %pe\n", ERR_PTR(ret));
+		goto err_rq;
+	}
+
+err_rq:
+	i915_request_put(rq);
+
+err_spin:
+	if (spinning)
+		igt_spinner_end(&spin);
+	igt_spinner_fini(&spin);
+
+err_pm:
+	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+	return ret;
+}
+
 int intel_guc_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(intel_guc_scrub_ctbs),
 		SUBTEST(intel_guc_steal_guc_ids),
+		SUBTEST(intel_guc_fast_request),
 	};
 	struct intel_gt *gt = to_gt(i915);