@@ -1350,10 +1350,8 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
bool tasklet = false;
if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
- if (port_count(&execlists->port[0])) {
- __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
- tasklet = true;
- }
+ __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+ tasklet = true;
}
if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) { diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
@@ -208,8 +208,8 @@
/* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
-
#define WA_TAIL_DWORDS 2
+#define PREEMPT_ID 0x1
static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
struct intel_engine_cs *engine); @@ -482,26 +482,39 @@ static void port_assign(struct execlist_port *port,
port_set(port, port_pack(i915_gem_request_get(rq), port_count(port))); }
+static void inject_preempt_context(struct intel_engine_cs *engine) {
+ struct intel_context *ce =
+ &engine->i915->preempt_context->engine[engine->id];
+ u32 __iomem *elsp =
+ engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+ unsigned int n;
+
+ GEM_BUG_ON(engine->i915->preempt_context->hw_id != PREEMPT_ID);
+
+ memset(ce->ring->vaddr + ce->ring->tail, 0, 8);
+ ce->ring->tail += 8;
+ ce->ring->tail &= (ce->ring->size - 1);
+ ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail;
+
+ for (n = execlists_num_ports(&engine->execlists); --n; ) {
+ writel(0, elsp);
+ writel(0, elsp);
+ }
+ writel(upper_32_bits(ce->lrc_desc), elsp);
+ writel(lower_32_bits(ce->lrc_desc), elsp); }
+
static void execlists_dequeue(struct intel_engine_cs *engine) {
- struct drm_i915_gem_request *last;
struct intel_engine_execlists * const execlists = &engine->execlists;
struct execlist_port *port = execlists->port;
const struct execlist_port * const last_port =
&execlists->port[execlists->port_mask];
+ struct drm_i915_gem_request *last = port_request(port);
struct rb_node *rb;
bool submit = false;
- last = port_request(port);
- if (last)
- /* WaIdleLiteRestore:bdw,skl
- * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
- * as we resubmit the request. See gen8_emit_breadcrumb()
- * for where we prepare the padding after the end of the
- * request.
- */
- last->tail = last->wa_tail;
-
/* Hardware submission is through 2 ports. Conceptually each port
* has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
* static for a context, and unique to each, so we only execute @@ -526,7 +539,44 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
spin_lock_irq(&engine->timeline->lock);
rb = execlists->first;
GEM_BUG_ON(rb_first(&execlists->queue) != rb);
- while (rb) {
+ if (!rb)
+ goto unlock;
+
+ if (last) {
+ /*
+ * Don't resubmit or switch until all outstanding
+ * preemptions (lite-restore) are seen. Then we
+ * know the next preemption status we see corresponds
+ * to this ELSP update.
+ */
+ if (port_count(&port[0]) > 1)
+ goto unlock;
+
+ if (rb_entry(rb, struct i915_priolist, node)->priority >
+ max(last->priotree.priority, 0)) {
+ /*
+ * Switch to our empty preempt context so
+ * the state of the GPU is known (idle).
+ */
+ inject_preempt_context(engine);
+ execlists->preempt = true;
+ goto unlock;
+ } else {
+ if (port_count(&port[1]))
+ goto unlock;
+
+ /* WaIdleLiteRestore:bdw,skl
+ * Apply the wa NOOPs to prevent
+ * ring:HEAD == req:TAIL as we resubmit the
+ * request. See gen8_emit_breadcrumb() for
+ * where we prepare the padding after the
+ * end of the request.
+ */
+ last->tail = last->wa_tail;
+ }
+ }
+
+ do {
struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
struct drm_i915_gem_request *rq, *rn;
@@ -589,11 +639,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
INIT_LIST_HEAD(&p->requests);
if (p->priority != I915_PRIORITY_NORMAL)
kmem_cache_free(engine->i915->priorities, p);
- }
+ } while (rb);
done:
execlists->first = rb;
if (submit)
port_assign(port, last);
+unlock:
spin_unlock_irq(&engine->timeline->lock);
if (submit)
@@ -670,13 +721,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
spin_unlock_irqrestore(&engine->timeline->lock, flags); }
-static bool execlists_elsp_ready(const struct intel_engine_cs *engine) -{
- const struct execlist_port *port = engine->execlists.port;
-
- return port_count(&port[0]) + port_count(&port[1]) < 2;
-}
-
/*
* Check the unread Context Status Buffers and manage the submission of new
* contexts to the ELSP accordingly.
@@ -685,7 +729,7 @@ static void intel_lrc_irq_handler(unsigned long data) {
struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
struct intel_engine_execlists * const execlists = &engine->execlists;
- struct execlist_port *port = execlists->port;
+ struct execlist_port * const port = execlists->port;
struct drm_i915_private *dev_priv = engine->i915;
/* We can skip acquiring intel_runtime_pm_get() here as it was taken @@ -770,6 +814,23 @@ static void intel_lrc_irq_handler(unsigned long data)
if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
continue;
+ if (status & GEN8_CTX_STATUS_ACTIVE_IDLE &&
+ buf[2*head + 1] == PREEMPT_ID) {
+ execlist_cancel_port_requests(execlists);
+
+ spin_lock_irq(&engine->timeline->lock);
+ unwind_incomplete_requests(engine);
+ spin_unlock_irq(&engine->timeline->lock);
+
+ GEM_BUG_ON(!execlists->preempt);
+ execlists->preempt = false;
+ continue;
+ }
+
+ if (status & GEN8_CTX_STATUS_PREEMPTED &&
+ execlists->preempt)
+ continue;
+
/* Check the context/desc id for this event matches */
GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
@@ -801,7 +862,7 @@ static void intel_lrc_irq_handler(unsigned long data)
}
}
- if (execlists_elsp_ready(engine))
+ if (!execlists->preempt)
execlists_dequeue(engine);
intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); @@ -814,7 +875,7 @@ static void insert_request(struct intel_engine_cs *engine,
struct i915_priolist *p = lookup_priolist(engine, pt, prio);
list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
- if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(engine))
+ if (ptr_unmask_bits(p, 1))
tasklet_hi_schedule(&engine->execlists.irq_tasklet);
}
@@ -942,8 +1003,6 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
}
spin_unlock_irq(&engine->timeline->lock);
-
- /* XXX Do we need to preempt to make room for us and our deps? */
}
static struct intel_ring *
@@ -1139,6 +1198,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
i915_ggtt_offset(engine->scratch) +
2 * CACHELINE_BYTES);
+ *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
/* Pad to end of cacheline */
while ((unsigned long)batch % CACHELINE_BYTES)
*batch++ = MI_NOOP;
@@ -1154,6 +1215,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) {
+ *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
batch = gen8_emit_flush_coherentl3_wa(engine, batch);
@@ -1199,6 +1262,8 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
*batch++ = 0;
}
+ *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
/* Pad to end of cacheline */
while ((unsigned long)batch % CACHELINE_BYTES)
*batch++ = MI_NOOP;
@@ -1352,6 +1417,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
execlists->csb_head = -1;
+ execlists->preempt = false;
/* After a GPU reset, we may have requests to replay */
if (!i915_modparams.enable_guc_submission && execlists->first) @@ -1647,7 +1713,8 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
*/
static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs) {
- *cs++ = MI_NOOP;
+ /* Ensure there's always at least one preemption point per-request. */
+ *cs++ = MI_ARB_CHECK;
*cs++ = MI_NOOP;
request->wa_tail = intel_ring_offset(request, cs); } @@ -1668,7 +1735,6 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
gen8_emit_wa_tail(request, cs);
}
-
static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, @@ -1696,7 +1762,6 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
gen8_emit_wa_tail(request, cs);
}
-
static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS;
static int gen8_init_rcs_context(struct drm_i915_gem_request *req) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -238,6 +238,8 @@ struct intel_engine_execlists { #define EXECLIST_MAX_PORTS 2
} port[EXECLIST_MAX_PORTS];
+ bool preempt;
+
/**
* @port_mask: number of execlist ports - 1
*/
--
2.14.1