diff mbox

[RFC,2/3] drm/i915: Watchdog timeout: Ringbuffer command emission for gen8+

Message ID 20170223194421.28463-2-michel.thierry@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Michel Thierry Feb. 23, 2017, 7:44 p.m. UTC
Emit the required commands into the ring buffer for starting and
stopping the watchdog timer before/after batch buffer start during
batch buffer submission.

Signed-off-by: Tomas Elf <tomas.elf@intel.com>
Signed-off-by: Ian Lister <ian.lister@intel.com>
Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 26 +++++++++++
 drivers/gpu/drm/i915/intel_lrc.c           | 74 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  2 +
 3 files changed, 102 insertions(+)

Comments

Chris Wilson Feb. 23, 2017, 9:03 p.m. UTC | #1
On Thu, Feb 23, 2017 at 11:44:18AM -0800, Michel Thierry wrote:
> Emit the required commands into the ring buffer for starting and
> stopping the watchdog timer before/after batch buffer start during
> batch buffer submission.
> 
> Signed-off-by: Tomas Elf <tomas.elf@intel.com>
> Signed-off-by: Ian Lister <ian.lister@intel.com>
> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 26 +++++++++++
>  drivers/gpu/drm/i915/intel_lrc.c           | 74 ++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_ringbuffer.h    |  2 +
>  3 files changed, 102 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 35d2cb979452..348d81c40e81 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1416,8 +1416,15 @@ execbuf_submit(struct i915_execbuffer_params *params,
>  	u64 exec_start, exec_len;
>  	int instp_mode;
>  	u32 instp_mask, *cs;
> +	bool watchdog_running = false;
>  	int ret;
>  
> +	/*
> +	 * NB: Place-holder until watchdog timeout is enabled through DRM
> +	 * interface
> +	 */
> +	bool enable_watchdog = false;
> +
>  	ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
>  	if (ret)
>  		return ret;
> @@ -1480,6 +1487,18 @@ execbuf_submit(struct i915_execbuffer_params *params,
>  			return ret;
>  	}
>  
> +	/* Start watchdog timer */
> +	if (enable_watchdog) {
> +		if (!params->engine->emit_start_watchdog)
> +			return -EINVAL;

In the future GEM API, this is checked much earlier!

> +
> +		ret = params->engine->emit_start_watchdog(params->request);
> +		if (ret)
> +			return ret;
> +
> +		watchdog_running = true;
> +	}
> +
>  	exec_len   = args->batch_len;
>  	exec_start = params->batch->node.start +
>  		     params->args_batch_start_offset;
> @@ -1493,6 +1512,13 @@ execbuf_submit(struct i915_execbuffer_params *params,
>  	if (ret)
>  		return ret;
>  
> +	/* Cancel watchdog timer */
> +	if (watchdog_running && params->engine->emit_stop_watchdog) {
> +		ret = params->engine->emit_stop_watchdog(params->request);
> +		if (ret)
> +			return ret;
> +	}

What happens *when* we hit the error path above? You need to reserve
space in the tail to disable the watchdog, or reserve space in the
bb_emit. Or will the watchdog firing randomly be fine?

>  	i915_gem_execbuffer_move_to_active(vmas, params->request);
>  
>  	return 0;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 8c9ebf0cebf7..b4939d9f338a 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1474,6 +1474,72 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
>  	return 0;
>  }
>  
> +static int gen8_emit_start_watchdog(struct drm_i915_gem_request *req)
> +{
> +	struct intel_engine_cs *engine = req->engine;
> +	struct i915_gem_context *ctx = req->ctx;
> +	u32 *cs;
> +
> +	cs = intel_ring_begin(req, 8);
> +	if (IS_ERR(cs))
> +		return PTR_ERR(cs);
> +
> +	/*
> +	 * watchdog register must never be programmed to zero. This would
> +	 * cause the watchdog counter to exceed and not allow the engine to
> +	 * go into IDLE state
> +	 */
> +	GEM_BUG_ON(ctx->watchdog_threshold == 0);
> +
> +	/* Set counter period */
> +	*cs++ = MI_LOAD_REGISTER_IMM(1);
> +	*cs++ = i915_mmio_reg_offset(RING_THRESH(engine->mmio_base));
> +	*cs++ = ctx->watchdog_threshold;
> +	*cs++ = MI_NOOP;

Why split into 2 instructions rather than LRI(2) ? Why the NOOPs?
Where's my PID?

> +
> +	/* Start counter */
> +	*cs++ = MI_LOAD_REGISTER_IMM(1);
> +	*cs++ = i915_mmio_reg_offset(RING_CNTR(engine->mmio_base));
> +	*cs++ = GEN8_WATCHDOG_ENABLE;
> +	*cs++ = MI_NOOP;
> +	intel_ring_advance(req, cs);
> +
> +	return 0;
> +}
> +
> +static int gen8_emit_stop_watchdog(struct drm_i915_gem_request *req)
> +{
> +	struct intel_engine_cs *engine = req->engine;
> +	u32 *cs;
> +
> +	cs = intel_ring_begin(req, 4);
> +	if (IS_ERR(cs))
> +		return PTR_ERR(cs);
> +
> +	*cs++ = MI_LOAD_REGISTER_IMM(1);
> +	*cs++ = i915_mmio_reg_offset(RING_CNTR(engine->mmio_base));
> +
> +	switch (engine->id) {
> +	default:
> +		WARN(1, "%s does not support watchdog timeout!\n",
> +		     engine->name);
> +	/* default to render engine */

Just
GEM_BUG_ON(engine->id == BCS);
if (engine->id == RCS)
	*cs++ = GEN8_RCS_WATCHDOG_DISABLE;
else
	*cs++ = GEN8_XCS_WATCHDOG_DISABLE;
-Chris
Tvrtko Ursulin Feb. 24, 2017, 9:15 a.m. UTC | #2
On 23/02/2017 21:03, Chris Wilson wrote:
> On Thu, Feb 23, 2017 at 11:44:18AM -0800, Michel Thierry wrote:

[snip]

>> +static int gen8_emit_stop_watchdog(struct drm_i915_gem_request *req)
>> +{
>> +	struct intel_engine_cs *engine = req->engine;
>> +	u32 *cs;
>> +
>> +	cs = intel_ring_begin(req, 4);
>> +	if (IS_ERR(cs))
>> +		return PTR_ERR(cs);
>> +
>> +	*cs++ = MI_LOAD_REGISTER_IMM(1);
>> +	*cs++ = i915_mmio_reg_offset(RING_CNTR(engine->mmio_base));
>> +
>> +	switch (engine->id) {
>> +	default:
>> +		WARN(1, "%s does not support watchdog timeout!\n",
>> +		     engine->name);
>> +	/* default to render engine */
>
> Just
> GEM_BUG_ON(engine->id == BCS);
> if (engine->id == RCS)
> 	*cs++ = GEN8_RCS_WATCHDOG_DISABLE;
> else
> 	*cs++ = GEN8_XCS_WATCHDOG_DISABLE;

I'd go further and store it in the engine eg. "*cs++ = 
engine->watchdog_disable_cmd" or something. I think that's better than 
engines with identity crisis. :)

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 35d2cb979452..348d81c40e81 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1416,8 +1416,15 @@  execbuf_submit(struct i915_execbuffer_params *params,
 	u64 exec_start, exec_len;
 	int instp_mode;
 	u32 instp_mask, *cs;
+	bool watchdog_running = false;
 	int ret;
 
+	/*
+	 * NB: Place-holder until watchdog timeout is enabled through DRM
+	 * interface
+	 */
+	bool enable_watchdog = false;
+
 	ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
 	if (ret)
 		return ret;
@@ -1480,6 +1487,18 @@  execbuf_submit(struct i915_execbuffer_params *params,
 			return ret;
 	}
 
+	/* Start watchdog timer */
+	if (enable_watchdog) {
+		if (!params->engine->emit_start_watchdog)
+			return -EINVAL;
+
+		ret = params->engine->emit_start_watchdog(params->request);
+		if (ret)
+			return ret;
+
+		watchdog_running = true;
+	}
+
 	exec_len   = args->batch_len;
 	exec_start = params->batch->node.start +
 		     params->args_batch_start_offset;
@@ -1493,6 +1512,13 @@  execbuf_submit(struct i915_execbuffer_params *params,
 	if (ret)
 		return ret;
 
+	/* Cancel watchdog timer */
+	if (watchdog_running && params->engine->emit_stop_watchdog) {
+		ret = params->engine->emit_stop_watchdog(params->request);
+		if (ret)
+			return ret;
+	}
+
 	i915_gem_execbuffer_move_to_active(vmas, params->request);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8c9ebf0cebf7..b4939d9f338a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1474,6 +1474,72 @@  static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
 	return 0;
 }
 
+static int gen8_emit_start_watchdog(struct drm_i915_gem_request *req)
+{
+	struct intel_engine_cs *engine = req->engine;
+	struct i915_gem_context *ctx = req->ctx;
+	u32 *cs;
+
+	cs = intel_ring_begin(req, 8);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	/*
+	 * watchdog register must never be programmed to zero. This would
+	 * cause the watchdog counter to exceed and not allow the engine to
+	 * go into IDLE state
+	 */
+	GEM_BUG_ON(ctx->watchdog_threshold == 0);
+
+	/* Set counter period */
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(RING_THRESH(engine->mmio_base));
+	*cs++ = ctx->watchdog_threshold;
+	*cs++ = MI_NOOP;
+
+	/* Start counter */
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(RING_CNTR(engine->mmio_base));
+	*cs++ = GEN8_WATCHDOG_ENABLE;
+	*cs++ = MI_NOOP;
+	intel_ring_advance(req, cs);
+
+	return 0;
+}
+
+static int gen8_emit_stop_watchdog(struct drm_i915_gem_request *req)
+{
+	struct intel_engine_cs *engine = req->engine;
+	u32 *cs;
+
+	cs = intel_ring_begin(req, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(RING_CNTR(engine->mmio_base));
+
+	switch (engine->id) {
+	default:
+		WARN(1, "%s does not support watchdog timeout!\n",
+		     engine->name);
+	/* default to render engine */
+	case RCS:
+		*cs++ = GEN8_RCS_WATCHDOG_DISABLE;
+		break;
+	case VCS:
+	case VCS2:
+	case VECS:
+		*cs++ = GEN8_XCS_WATCHDOG_DISABLE;
+		break;
+	}
+
+	*cs++ = MI_NOOP;
+	intel_ring_advance(req, cs);
+
+	return 0;
+}
+
 /*
  * Reserve space for 2 NOOPs at the end of each request to be
  * used as a workaround for not being allowed to do lite
@@ -1740,6 +1806,8 @@  int logical_render_ring_init(struct intel_engine_cs *engine)
 	engine->emit_flush = gen8_emit_flush_render;
 	engine->emit_breadcrumb = gen8_emit_breadcrumb_render;
 	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_render_sz;
+	engine->emit_start_watchdog = gen8_emit_start_watchdog;
+	engine->emit_stop_watchdog = gen8_emit_stop_watchdog;
 
 	ret = intel_engine_create_scratch(engine, PAGE_SIZE);
 	if (ret)
@@ -1763,6 +1831,12 @@  int logical_xcs_ring_init(struct intel_engine_cs *engine)
 {
 	logical_ring_setup(engine);
 
+	/* BCS engine does not have a watchdog-expired irq */
+	if (engine->id != BCS) {
+		engine->emit_start_watchdog = gen8_emit_start_watchdog;
+		engine->emit_stop_watchdog = gen8_emit_stop_watchdog;
+	}
+
 	return logical_ring_init(engine);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0f29e07a9581..5a9764708186 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -282,6 +282,8 @@  struct intel_engine_cs {
 
 	int		(*emit_flush)(struct drm_i915_gem_request *request,
 				      u32 mode);
+	int		(*emit_start_watchdog)(struct drm_i915_gem_request *req);
+	int		(*emit_stop_watchdog)(struct drm_i915_gem_request *req);
 #define EMIT_INVALIDATE	BIT(0)
 #define EMIT_FLUSH	BIT(1)
 #define EMIT_BARRIER	(EMIT_INVALIDATE | EMIT_FLUSH)