@@ -3019,15 +3019,11 @@ static void engine_set_wedged(struct intel_engine_cs *engine)
*/
if (i915.enable_execlists) {
- struct execlist_port *port = engine->execlist_port;
unsigned long flags;
- unsigned int n;
spin_lock_irqsave(&engine->timeline->lock, flags);
- for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
- i915_gem_request_put(port_request(&port[n]));
- memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
+ intel_lr_clear_execlist_ports(engine);
engine->execlist_queue = RB_ROOT;
engine->execlist_first = NULL;
@@ -534,21 +534,20 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq)
unsigned int engine_id = engine->id;
struct intel_guc *guc = &rq->i915->guc;
struct i915_guc_client *client = guc->execbuf_client;
- unsigned long flags;
int b_ret;
/* WA to flush out the pending GMADR writes to ring buffer. */
if (i915_vma_is_map_and_fenceable(rq->ring->vma))
POSTING_READ_FW(GUC_STATUS);
- spin_lock_irqsave(&client->wq_lock, flags);
+ spin_lock(&client->wq_lock);
guc_wq_item_append(client, rq);
b_ret = guc_ring_doorbell(client);
client->submissions[engine_id] += 1;
- spin_unlock_irqrestore(&client->wq_lock, flags);
+ spin_unlock(&client->wq_lock);
}
static void nested_enable_signaling(struct drm_i915_gem_request *rq)
@@ -1189,9 +1188,6 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
guc_interrupts_capture(dev_priv);
for_each_engine(engine, dev_priv, id) {
- struct execlist_port *port = engine->execlist_port;
- int n;
-
/* The tasklet was initialised by execlists, and may be in
* a state of flux (across a reset) and so we just want to
* take over the callback without changing any other state
@@ -1199,13 +1195,6 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
*/
engine->irq_tasklet.func = i915_guc_irq_handler;
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-
- for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
- if (!port_isset(&port[n]))
- break;
-
- i915_guc_submit(port_request(&port[n]));
- }
}
return 0;
@@ -291,6 +291,26 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
return ctx->engine[engine->id].lrc_desc;
}
+static inline struct execlist_port *
+execlists_last_port(struct intel_engine_cs *engine)
+{
+ return &engine->execlist_port[ARRAY_SIZE(engine->execlist_port) - 1];
+}
+
+void intel_lr_clear_execlist_ports(struct intel_engine_cs *engine)
+{
+ struct execlist_port *port = engine->execlist_port;
+ struct drm_i915_gem_request *rq;
+
+ while ((rq = port_request(port))) {
+ i915_gem_request_put(rq);
+ if (port == execlists_last_port(engine))
+ break;
+ port++;
+ }
+ memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
+}
+
static inline void
execlists_context_status_change(struct drm_i915_gem_request *rq,
unsigned long status)
@@ -952,6 +972,36 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
return 0;
}
+static void intel_lr_resubmit_requests(struct intel_engine_cs *engine)
+{
+ struct i915_priolist *p = &engine->default_priolist;
+ struct drm_i915_gem_request *rq, *rq_prev;
+ struct i915_priotree *pt;
+ bool first;
+ int last_prio;
+
+ lockdep_assert_held(&engine->timeline->lock);
+
+ last_prio = INT_MIN;
+
+ list_for_each_entry_safe_reverse(rq, rq_prev,
+ &engine->timeline->requests, link) {
+ if (i915_gem_request_completed(rq))
+ break;
+
+ pt = &rq->priotree;
+ if (pt->priority != last_prio)
+ p = priolist_lookup(engine, pt->priority,
+ &first);
+ __i915_gem_request_unsubmit(rq);
+ trace_i915_gem_request_out(rq);
+
+ /* lifo, since we're traversing timeline in reverse */
+ list_add(&pt->link, &p->requests);
+ last_prio = pt->priority;
+ }
+}
+
/*
* In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
* PIPE_CONTROL instruction. This is required for the flush to happen correctly
@@ -1220,9 +1270,6 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
static int gen8_init_common_ring(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
- struct execlist_port *port = engine->execlist_port;
- unsigned int n;
- bool submit;
int ret;
ret = intel_mocs_init_engine(engine);
@@ -1241,26 +1288,6 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
- /* After a GPU reset, we may have requests to replay */
- clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-
- submit = false;
- for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
- if (!port_isset(&port[n]))
- break;
-
- DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n",
- engine->name, n,
- port_request(&port[n])->global_seqno);
-
- /* Discard the current inflight count */
- port_set(&port[n], port_request(&port[n]));
- submit = true;
- }
-
- if (submit && !i915.enable_guc_submission)
- execlists_submit_ports(engine);
-
return 0;
}
@@ -1300,10 +1327,9 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
static void reset_common_ring(struct intel_engine_cs *engine,
struct drm_i915_gem_request *request)
{
- struct execlist_port *port = engine->execlist_port;
struct intel_context *ce;
- /* If the request was innocent, we leave the request in the ELSP
+ /* If the request was innocent, we leave the request intact
* and will try to replay it on restarting. The context image may
* have been corrupted by the reset, in which case we may have
* to service a new GPU hang, but more likely we can continue on
@@ -1313,42 +1339,45 @@ static void reset_common_ring(struct intel_engine_cs *engine,
* and have to at least restore the RING register in the context
* image back to the expected values to skip over the guilty request.
*/
- if (!request || request->fence.error != -EIO)
- return;
-
- /* We want a simple context + ring to execute the breadcrumb update.
- * We cannot rely on the context being intact across the GPU hang,
- * so clear it and rebuild just what we need for the breadcrumb.
- * All pending requests for this context will be zapped, and any
- * future request will be after userspace has had the opportunity
- * to recreate its own state.
- */
- ce = &request->ctx->engine[engine->id];
- execlists_init_reg_state(ce->lrc_reg_state,
- request->ctx, engine, ce->ring);
-
- /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
- ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
- i915_ggtt_offset(ce->ring->vma);
- ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
+ if (request && request->fence.error == -EIO) {
+ /* We want a simple context + ring to execute the breadcrumb
+ * update. We cannot rely on the context being intact across
+ * the GPU hang, so clear it and rebuild just what we need for
+ * the breadcrumb. All pending requests for this context will
+ * be zapped, and any future request will be after userspace
+ * has had the opportunity to recreate its own state.
+ */
+ ce = &request->ctx->engine[engine->id];
+ execlists_init_reg_state(ce->lrc_reg_state,
+ request->ctx, engine, ce->ring);
- request->ring->head = request->postfix;
- intel_ring_update_space(request->ring);
- /* Catch up with any missed context-switch interrupts */
- if (request->ctx != port_request(port)->ctx) {
- i915_gem_request_put(port_request(port));
- port[0] = port[1];
- memset(&port[1], 0, sizeof(port[1]));
+ /* Move the RING_HEAD onto the breadcrumb,
+ * past the hanging batch
+ */
+ ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
+ i915_ggtt_offset(ce->ring->vma);
+ ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
+
+ request->ring->head = request->postfix;
+ intel_ring_update_space(request->ring);
+
+ /* Reset WaIdleLiteRestore:bdw,skl as well */
+ request->tail =
+ intel_ring_wrap(request->ring,
+ request->wa_tail -
+ WA_TAIL_DWORDS * sizeof(u32));
+ assert_ring_tail_valid(request->ring, request->tail);
}
- GEM_BUG_ON(request->ctx != port_request(port)->ctx);
+ spin_lock_irq(&engine->timeline->lock);
+ intel_lr_resubmit_requests(engine);
+ spin_unlock_irq(&engine->timeline->lock);
- /* Reset WaIdleLiteRestore:bdw,skl as well */
- request->tail =
- intel_ring_wrap(request->ring,
- request->wa_tail - WA_TAIL_DWORDS*sizeof(u32));
- assert_ring_tail_valid(request->ring, request->tail);
+ intel_lr_clear_execlist_ports(engine);
+ clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+
+ tasklet_hi_schedule(&engine->irq_tasklet);
}
static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
@@ -83,6 +83,7 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
struct intel_engine_cs *engine);
/* Execlists */
+void intel_lr_clear_execlist_ports(struct intel_engine_cs *engine);
int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv,
int enable_execlists);
Now that we're able to unsubmit requests, we can take advantage of it during reset. Rather than resubmitting the previous workload directly to GuC/ELSP, we can simply move the requests back to priority queue, submitting from the tasklet instead. v2: Move the tasklet schedule out for legacy ringbuffer submission v3: Handle allocation error in lookup rather than in caller (Chris) Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Jeff McGee <jeff.mcgee@intel.com> Cc: Michel Thierry <michel.thierry@intel.com> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> --- drivers/gpu/drm/i915/i915_gem.c | 6 +- drivers/gpu/drm/i915/i915_guc_submission.c | 15 +-- drivers/gpu/drm/i915/intel_lrc.c | 141 +++++++++++++++++------------ drivers/gpu/drm/i915/intel_lrc.h | 1 + 4 files changed, 89 insertions(+), 74 deletions(-)