@@ -983,35 +983,15 @@ void intel_gt_info_print(const struct intel_gt_info *info,
intel_sseu_dump(&info->sseu, p);
}
-struct reg_and_bit {
- union {
- i915_reg_t reg;
- i915_mcr_reg_t mcr_reg;
- };
- u32 bit;
+union inv_reg {
+ i915_reg_t reg;
+ i915_mcr_reg_t mcr_reg;
};
-static struct reg_and_bit
-get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
- const i915_reg_t *regs, const unsigned int num)
-{
- const unsigned int class = engine->class;
- struct reg_and_bit rb = { };
-
- if (drm_WARN_ON_ONCE(&engine->i915->drm,
- class >= num || !regs[class].reg))
- return rb;
-
- rb.reg = regs[class];
- if (gen8 && class == VIDEO_DECODE_CLASS)
- rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
- else
- rb.bit = engine->instance;
-
- rb.bit = BIT(rb.bit);
-
- return rb;
-}
+struct inv_reg_and_bit {
+ union inv_reg addr;
+ u32 bit;
+};
/*
* HW architecture suggest typical invalidation time at 40us,
@@ -1026,52 +1006,72 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
* but are now considered MCR registers. Since they exist within a GAM range,
* the primary instance of the register rolls up the status from each unit.
*/
-static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
+static int wait_for_invalidate(struct intel_gt *gt, struct inv_reg_and_bit rb)
{
if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
- return intel_gt_mcr_wait_for_reg(gt, rb.mcr_reg, rb.bit, 0,
+ return intel_gt_mcr_wait_for_reg(gt, rb.addr.mcr_reg, rb.bit, 0,
TLB_INVAL_TIMEOUT_US,
TLB_INVAL_TIMEOUT_MS);
else
- return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0,
+ return __intel_wait_for_register_fw(gt->uncore, rb.addr.reg,
+ rb.bit, 0,
TLB_INVAL_TIMEOUT_US,
TLB_INVAL_TIMEOUT_MS,
NULL);
}
+static void inv_reg_set_write_mask(struct inv_reg_and_bit *rb, u8 class, u8 ver)
+{
+ if (ver >= 12 && (class == VIDEO_DECODE_CLASS ||
+ class == VIDEO_ENHANCEMENT_CLASS ||
+ class == COMPUTE_CLASS))
+ rb->bit = _MASKED_BIT_ENABLE(rb->bit);
+}
+
+static void inv_reg_set_instance(struct inv_reg_and_bit *rb, u8 class,
+ u8 instance, u8 ver)
+{
+ if (ver < 11 && class == VIDEO_DECODE_CLASS) {
+ rb->addr.reg.reg += 4 * instance; /* GEN8_M2TCR */
+ rb->bit = 1;
+ } else {
+ rb->bit = BIT(instance);
+ }
+}
+
static void mmio_invalidate_full(struct intel_gt *gt)
{
- static const i915_reg_t gen8_regs[] = {
- [RENDER_CLASS] = GEN8_RTCR,
- [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */
- [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR,
- [COPY_ENGINE_CLASS] = GEN8_BTCR,
+ static const union inv_reg gen8_regs[] = {
+ [RENDER_CLASS].reg = GEN8_RTCR,
+ [VIDEO_DECODE_CLASS].reg = GEN8_M1TCR, /* , GEN8_M2TCR */
+ [VIDEO_ENHANCEMENT_CLASS].reg = GEN8_VTCR,
+ [COPY_ENGINE_CLASS].reg = GEN8_BTCR,
};
- static const i915_reg_t gen12_regs[] = {
- [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR,
- [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR,
- [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR,
- [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
- [COMPUTE_CLASS] = GEN12_COMPCTX_TLB_INV_CR,
+ static const union inv_reg gen12_regs[] = {
+ [RENDER_CLASS].reg = GEN12_GFX_TLB_INV_CR,
+ [VIDEO_DECODE_CLASS].reg = GEN12_VD_TLB_INV_CR,
+ [VIDEO_ENHANCEMENT_CLASS].reg = GEN12_VE_TLB_INV_CR,
+ [COPY_ENGINE_CLASS].reg = GEN12_BLT_TLB_INV_CR,
+ [COMPUTE_CLASS].reg = GEN12_COMPCTX_TLB_INV_CR,
};
- static const i915_mcr_reg_t xehp_regs[] = {
- [RENDER_CLASS] = XEHP_GFX_TLB_INV_CR,
- [VIDEO_DECODE_CLASS] = XEHP_VD_TLB_INV_CR,
- [VIDEO_ENHANCEMENT_CLASS] = XEHP_VE_TLB_INV_CR,
- [COPY_ENGINE_CLASS] = XEHP_BLT_TLB_INV_CR,
- [COMPUTE_CLASS] = XEHP_COMPCTX_TLB_INV_CR,
+ static const union inv_reg xehp_regs[] = {
+ [RENDER_CLASS].mcr_reg = XEHP_GFX_TLB_INV_CR,
+ [VIDEO_DECODE_CLASS].mcr_reg = XEHP_VD_TLB_INV_CR,
+ [VIDEO_ENHANCEMENT_CLASS].mcr_reg = XEHP_VE_TLB_INV_CR,
+ [COPY_ENGINE_CLASS].mcr_reg = XEHP_BLT_TLB_INV_CR,
+ [COMPUTE_CLASS].mcr_reg = XEHP_COMPCTX_TLB_INV_CR,
};
struct drm_i915_private *i915 = gt->i915;
struct intel_uncore *uncore = gt->uncore;
struct intel_engine_cs *engine;
intel_engine_mask_t awake, tmp;
+ const union inv_reg *regs;
enum intel_engine_id id;
- const i915_reg_t *regs;
unsigned int num = 0;
unsigned long flags;
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
- regs = NULL;
+ regs = xehp_regs;
num = ARRAY_SIZE(xehp_regs);
} else if (GRAPHICS_VER(i915) == 12) {
regs = gen12_regs;
@@ -1083,10 +1083,6 @@ static void mmio_invalidate_full(struct intel_gt *gt)
return;
}
- if (drm_WARN_ONCE(&i915->drm, !num,
- "Platform does not implement TLB invalidation!"))
- return;
-
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
intel_gt_mcr_lock(gt, &flags);
@@ -1094,33 +1090,22 @@ static void mmio_invalidate_full(struct intel_gt *gt)
awake = 0;
for_each_engine(engine, gt, id) {
- struct reg_and_bit rb;
+ struct inv_reg_and_bit rb;
if (!intel_engine_pm_is_awake(engine))
continue;
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
- u32 val = BIT(engine->instance);
-
- if (engine->class == VIDEO_DECODE_CLASS ||
- engine->class == VIDEO_ENHANCEMENT_CLASS ||
- engine->class == COMPUTE_CLASS)
- val = _MASKED_BIT_ENABLE(val);
- intel_gt_mcr_multicast_write_fw(gt,
- xehp_regs[engine->class],
- val);
- } else {
- rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
- if (!i915_mmio_reg_offset(rb.reg))
- continue;
-
- if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS ||
- engine->class == VIDEO_ENHANCEMENT_CLASS ||
- engine->class == COMPUTE_CLASS))
- rb.bit = _MASKED_BIT_ENABLE(rb.bit);
-
- intel_uncore_write_fw(uncore, rb.reg, rb.bit);
- }
+ rb.addr = regs[engine->class];
+ if (!i915_mmio_reg_offset(rb.addr.reg))
+ continue;
+ inv_reg_set_instance(&rb, engine->class, engine->instance,
+ GRAPHICS_VER(i915));
+ inv_reg_set_write_mask(&rb, engine->class, GRAPHICS_VER(i915));
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+ intel_gt_mcr_multicast_write_fw(gt, rb.addr.mcr_reg,
+ rb.bit);
+ else
+ intel_uncore_write_fw(uncore, rb.addr.reg, rb.bit);
awake |= engine->mask;
}
@@ -1139,15 +1124,10 @@ static void mmio_invalidate_full(struct intel_gt *gt)
intel_gt_mcr_unlock(gt, flags);
for_each_engine_masked(engine, gt, awake, tmp) {
- struct reg_and_bit rb;
-
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
- rb.mcr_reg = xehp_regs[engine->class];
- rb.bit = BIT(engine->instance);
- } else {
- rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
- }
+ struct inv_reg_and_bit rb = { .addr = regs[engine->class] };
+ inv_reg_set_instance(&rb, engine->class, engine->instance,
+ GRAPHICS_VER(i915));
if (wait_for_invalidate(gt, rb))
drm_err_ratelimited(>->i915->drm,
"%s TLB invalidation did not complete in %ums!\n",
After adding multicast and write mask support the TLB invalidation code become slightly incosistent and redundant. Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com> --- Hi, This is another cleanup attempt. Multicast makes things quite complicated, either lot of ifs, either redundancy. I guess we will end up with some common helpers to support cases where the same code should access mcr and traditional registers, depending on GPU version. I took some ideas from your proposition, but I've replaced get_reg_and_bit with two very simple helpers. Regards Andrzej --- drivers/gpu/drm/i915/gt/intel_gt.c | 148 +++++++++++++---------------- 1 file changed, 64 insertions(+), 84 deletions(-)