@@ -70,6 +70,9 @@ struct intel_context {
u32 *lrc_reg_state;
u64 lrc_desc;
+
+ u32 ctx_bb_offset;
+
u32 tag; /* cookie passed to HW to track this context on submission */
/* Time on GPU as tracked by the hw. */
@@ -138,7 +138,7 @@
*/
#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
/* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
-#define MI_LRI_CS_MMIO (1<<19)
+#define MI_LRI_LRM_CS_MMIO REG_BIT(19)
#define MI_LRI_FORCE_POSTED (1<<12)
#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
@@ -156,6 +156,7 @@
#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1)
#define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2)
#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
+#define MI_LRR_SOURCE_CS_MMIO REG_BIT(18)
#define MI_BATCH_BUFFER MI_INSTR(0x30, 1)
#define MI_BATCH_NON_SECURE (1)
/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
@@ -234,7 +234,7 @@ static void execlists_init_reg_state(u32 *reg_state,
const struct intel_ring *ring,
bool close);
static void
-__execlists_update_reg_state(const struct intel_context *ce,
+__execlists_update_reg_state(struct intel_context *ce,
const struct intel_engine_cs *engine,
u32 head);
@@ -314,6 +314,24 @@ lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
}
}
+static void
+lrc_ring_setup_indirect_ctx(const struct intel_context *ce,
+ u32 ctx_bb_ggtt_addr,
+ u32 size)
+{
+ u32 * const regs = ce->lrc_reg_state;
+
+ GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
+ GEM_BUG_ON(lrc_ring_indirect_ptr(ce->engine) == -1);
+ regs[lrc_ring_indirect_ptr(ce->engine) + 1] =
+ ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
+
+ GEM_BUG_ON(lrc_ring_indirect_offset(ce->engine) == -1);
+ regs[lrc_ring_indirect_offset(ce->engine) + 1] =
+ /* Go with default offset */
+ lrc_ring_indirect_offset_default(ce->engine) << 6;
+}
+
static u32 intel_context_get_runtime(const struct intel_context *ce)
{
/*
@@ -613,7 +631,7 @@ static void set_offsets(u32 *regs,
if (flags & POSTED)
*regs |= MI_LRI_FORCE_POSTED;
if (INTEL_GEN(engine->i915) >= 11)
- *regs |= MI_LRI_CS_MMIO;
+ *regs |= MI_LRI_LRM_CS_MMIO;
regs++;
GEM_BUG_ON(!count);
@@ -3187,8 +3205,107 @@ static void execlists_context_unpin(struct intel_context *ce)
i915_gem_object_unpin_map(ce->state->obj);
}
+static u32 *
+gen12_emit_timestamp_wa(struct intel_context *ce, u32 *cs)
+{
+ const u32 lrc_offset = i915_ggtt_offset(ce->state) +
+ LRC_STATE_OFFSET;
+ const u32 scratch_reg = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+
+ *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
+ MI_SRM_LRM_GLOBAL_GTT | MI_LRI_LRM_CS_MMIO;
+ *cs++ = scratch_reg;
+ *cs++ = lrc_offset + CTX_TIMESTAMP * sizeof(u32);
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_REG |
+ MI_LRR_SOURCE_CS_MMIO | MI_LRI_LRM_CS_MMIO;
+ *cs++ = scratch_reg;
+ *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
+
+ *cs++ = MI_LOAD_REGISTER_REG |
+ MI_LRR_SOURCE_CS_MMIO | MI_LRI_LRM_CS_MMIO;
+ *cs++ = scratch_reg;
+ *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
+
+ return cs;
+}
+
+static u32 *
+gen12_emit_restore_scratch(struct intel_context *ce, u32 *cs)
+{
+ const u32 lrc_offset = i915_ggtt_offset(ce->state) +
+ LRC_STATE_OFFSET;
+ const u32 scratch_reg = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
+
+ GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
+
+ *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
+ MI_SRM_LRM_GLOBAL_GTT | MI_LRI_LRM_CS_MMIO;
+ *cs++ = scratch_reg;
+ *cs++ = lrc_offset + (lrc_ring_gpr0(ce->engine) + 1)* sizeof(u32);
+ *cs++ = 0;
+
+ return cs;
+}
+
+static u32 *
+gen12_emit_indirect_ctx_xcs(struct intel_context *ce, u32 *cs)
+{
+ cs = gen12_emit_timestamp_wa(ce, cs);
+ cs = gen12_emit_restore_scratch(ce, cs);
+
+ return cs;
+}
+
+static u32 *context_indirect_bb(struct intel_context *ce)
+{
+ void *ptr;
+
+ GEM_BUG_ON(!ce->ctx_bb_offset);
+
+ ptr = ce->lrc_reg_state;
+ ptr -= LRC_STATE_OFFSET; /* back to start of context image */
+ ptr += ce->ctx_bb_offset;
+
+ return ptr;
+}
+
+static u32 *
+execlists_emit_indirect_ctx_bb(struct intel_context *ce,
+ u32 *(*emit)(struct intel_context *, u32 *))
+{
+ u32 *cs = context_indirect_bb(ce);
+ const u32 * const batch_start = cs;
+
+ cs = emit(ce, cs);
+
+ GEM_DEBUG_BUG_ON(cs - batch_start >
+ I915_GTT_PAGE_SIZE/sizeof(*cs));
+
+ return cs;
+}
+
+static void
+setup_indirect_ctx_bb(struct intel_context *ce,
+ u32 *(*emit)(struct intel_context *, u32 *))
+{
+ const u32 * const start = context_indirect_bb(ce);
+ u32 *cs;
+
+ cs = execlists_emit_indirect_ctx_bb(ce, emit);
+
+ while ((unsigned long)cs % CACHELINE_BYTES)
+ *cs++ = MI_NOOP;
+
+ lrc_ring_setup_indirect_ctx(ce,
+ i915_ggtt_offset(ce->state) +
+ ce->ctx_bb_offset,
+ (cs - start) * sizeof(*cs));
+}
+
static void
-__execlists_update_reg_state(const struct intel_context *ce,
+__execlists_update_reg_state(struct intel_context *ce,
const struct intel_engine_cs *engine,
u32 head)
{
@@ -3209,6 +3326,13 @@ __execlists_update_reg_state(const struct intel_context *ce,
intel_sseu_make_rpcs(engine->i915, &ce->sseu);
i915_oa_init_reg_state(ce, engine);
+
+ }
+
+ if (ce->ctx_bb_offset) {
+ /* Mutually exclusive wrt to global indirect bb */
+ GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
+ setup_indirect_ctx_bb(ce, gen12_emit_indirect_ctx_xcs);
}
}
@@ -4737,7 +4861,6 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
return 0;
}
-
static void init_common_reg_state(u32 * const regs,
const struct intel_engine_cs *engine,
const struct intel_ring *ring,
@@ -4758,30 +4881,24 @@ static void init_common_reg_state(u32 * const regs,
regs[CTX_TIMESTAMP] = 0;
}
-static void init_wa_bb_reg_state(u32 * const regs,
- const struct intel_engine_cs *engine)
+static void init_wa_bb_reg_state(const struct intel_context *ce)
{
- const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
+ const struct i915_ctx_workarounds * const wa_ctx = &ce->engine->wa_ctx;
if (wa_ctx->per_ctx.size) {
const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
- GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
- regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
+ GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(ce->engine) == -1);
+ ce->lrc_reg_state[lrc_ring_wa_bb_per_ctx(ce->engine) + 1] =
(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
}
if (wa_ctx->indirect_ctx.size) {
- const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
+ const u32 bb_offset = i915_ggtt_offset(wa_ctx->vma) +
+ wa_ctx->indirect_ctx.offset;
+ const u32 bb_size = wa_ctx->indirect_ctx.size;
- GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
- regs[lrc_ring_indirect_ptr(engine) + 1] =
- (ggtt_offset + wa_ctx->indirect_ctx.offset) |
- (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
-
- GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
- regs[lrc_ring_indirect_offset(engine) + 1] =
- lrc_ring_indirect_offset_default(engine) << 6;
+ lrc_ring_setup_indirect_ctx(ce, bb_offset, bb_size);
}
}
@@ -4830,7 +4947,7 @@ static void execlists_init_reg_state(u32 *regs,
init_common_reg_state(regs, engine, ring, inhibit);
init_ppgtt_reg_state(regs, vm_alias(ce->vm));
- init_wa_bb_reg_state(regs, engine);
+ init_wa_bb_reg_state(ce);
__reset_stop_ring(regs, engine);
}
@@ -4903,6 +5020,11 @@ static int __execlists_context_alloc(struct intel_context *ce,
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
context_size += I915_GTT_PAGE_SIZE; /* for redzone */
+ if (INTEL_GEN(engine->i915) == 12) {
+ ce->ctx_bb_offset = context_size;
+ context_size += PAGE_SIZE;
+ }
+
ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
if (IS_ERR(ctx_obj))
return PTR_ERR(ctx_obj);