@@ -583,6 +583,43 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs)
return cs;
}
+/* Wa_14014475959:dg2 */
+#define CCS_SEMAPHORE_PPHWSP_OFFSET 0x540
+static u32 ccs_semaphore_offset(struct i915_request *rq)
+{
+ return i915_ggtt_offset(rq->context->state) +
+ (LRC_PPHWSP_PN * PAGE_SIZE) + CCS_SEMAPHORE_PPHWSP_OFFSET;
+}
+
+/* Wa_14014475959:dg2 */
+static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs)
+{
+ int i;
+
+ *cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL |
+ MI_ATOMIC_MOVE;
+ *cs++ = ccs_semaphore_offset(rq);
+ *cs++ = 0;
+ *cs++ = 1;
+
+ /*
+ * When MI_ATOMIC_INLINE_DATA set this command must be 11 DW + (1 NOP)
+ * to align. 4 DWs above + 8 filler DWs here.
+ */
+ for (i = 0; i < 8; ++i)
+ *cs++ = 0;
+
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD;
+ *cs++ = 0;
+ *cs++ = ccs_semaphore_offset(rq);
+ *cs++ = 0;
+
+ return cs;
+}
+
static __always_inline u32*
gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
{
@@ -593,6 +630,10 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
!intel_uc_uses_guc_submission(&rq->engine->gt->uc))
cs = gen12_emit_preempt_busywait(rq, cs);
+ /* Wa_14014475959:dg2 */
+ if (intel_engine_uses_wa_hold_ccs_switchout(rq->engine))
+ cs = ccs_emit_wa_busywait(rq, cs);
+
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
@@ -529,6 +529,7 @@ struct intel_engine_cs {
#define I915_ENGINE_HAS_RCS_REG_STATE BIT(9)
#define I915_ENGINE_HAS_EU_PRIORITY BIT(10)
#define I915_ENGINE_FIRST_RENDER_COMPUTE BIT(11)
+#define I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT BIT(12)
unsigned int flags;
/*
@@ -629,6 +630,13 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO;
}
+/* Wa_14014475959:dg2 */
+static inline bool
+intel_engine_uses_wa_hold_ccs_switchout(struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
+}
+
#define instdone_has_slice(dev_priv___, sseu___, slice___) \
((GRAPHICS_VER(dev_priv___) == 7 ? 1 : ((sseu___)->slice_mask)) & BIT(slice___))
@@ -134,6 +134,13 @@
#define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */
#define MI_USE_GGTT (1 << 22) /* g4x+ */
#define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1)
+#define MI_ATOMIC MI_INSTR(0x2f, 1)
+#define MI_ATOMIC_INLINE (MI_INSTR(0x2f, 9) | MI_ATOMIC_INLINE_DATA)
+#define MI_ATOMIC_GLOBAL_GTT (1 << 22)
+#define MI_ATOMIC_INLINE_DATA (1 << 18)
+#define MI_ATOMIC_CS_STALL (1 << 17)
+#define MI_ATOMIC_MOVE (0x4 << 8)
+
/*
* Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM:
* - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw
@@ -296,6 +296,10 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
flags |= GUC_WA_GAM_CREDITS;
+ /* Wa_14014475959:dg2 */
+ if (IS_DG2(gt->i915))
+ flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
+
/*
* Wa_14012197797:dg2_g10:a0,dg2_g11:a0
* Wa_22011391025:dg2_g10,dg2_g11,dg2_g12
@@ -102,6 +102,7 @@
#define GUC_WA_DUAL_QUEUE BIT(11)
#define GUC_WA_RCS_RESET_BEFORE_RC6 BIT(13)
#define GUC_WA_PRE_PARSER BIT(14)
+#define GUC_WA_HOLD_CCS_SWITCHOUT BIT(17)
#define GUC_WA_POLLCS BIT(18)
#define GUC_CTL_FEATURE 2
@@ -3897,6 +3897,10 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
engine->flags |= I915_ENGINE_HAS_TIMESLICES;
+ /* Wa_14014475959:dg2 */
+ if (IS_DG2(engine->i915) && engine->class == COMPUTE_CLASS)
+ engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
+
/*
* TODO: GuC supports timeslicing and semaphores as well, but they're
* handled by the firmware so some minor tweaks are required before