@@ -190,9 +190,10 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
gvt_dbg_core("invalidate TLB for ring %d\n", ring_id);
}
-static void load_mocs(struct intel_vgpu *vgpu, int ring_id)
+static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next,
+ int ring_id)
{
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+ struct drm_i915_private *dev_priv;
i915_reg_t offset, l3_offset;
u32 regs[] = {
[RCS] = 0xc800,
@@ -203,54 +204,44 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id)
};
int i;
+ dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv;
if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
return;
offset.reg = regs[ring_id];
- for (i = 0; i < 64; i++) {
- gen9_render_mocs[ring_id][i] = I915_READ_FW(offset);
- I915_WRITE_FW(offset, vgpu_vreg(vgpu, offset));
- offset.reg += 4;
- }
-
- if (ring_id == RCS) {
- l3_offset.reg = 0xb020;
- for (i = 0; i < 32; i++) {
- gen9_render_mocs_L3[i] = I915_READ_FW(l3_offset);
- I915_WRITE_FW(l3_offset, vgpu_vreg(vgpu, l3_offset));
- l3_offset.reg += 4;
- }
- }
-}
-static void restore_mocs(struct intel_vgpu *vgpu, int ring_id)
-{
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
- i915_reg_t offset, l3_offset;
- u32 regs[] = {
- [RCS] = 0xc800,
- [VCS] = 0xc900,
- [VCS2] = 0xca00,
- [BCS] = 0xcc00,
- [VECS] = 0xcb00,
- };
- int i;
+ for (i = 0; i < 64; i++) {
+ if (pre)
+ vgpu_vreg(pre, offset) =
+ I915_READ_FW(offset);
+ else
+ gen9_render_mocs[ring_id][i] =
+ I915_READ_FW(offset);
- if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
- return;
+ if (next)
+ I915_WRITE_FW(offset, vgpu_vreg(next, offset));
+ else
+ I915_WRITE_FW(offset, gen9_render_mocs[ring_id][i]);
- offset.reg = regs[ring_id];
- for (i = 0; i < 64; i++) {
- vgpu_vreg(vgpu, offset) = I915_READ_FW(offset);
- I915_WRITE_FW(offset, gen9_render_mocs[ring_id][i]);
offset.reg += 4;
}
if (ring_id == RCS) {
l3_offset.reg = 0xb020;
for (i = 0; i < 32; i++) {
- vgpu_vreg(vgpu, l3_offset) = I915_READ_FW(l3_offset);
- I915_WRITE_FW(l3_offset, gen9_render_mocs_L3[i]);
+ if (pre)
+ vgpu_vreg(pre, l3_offset) =
+ I915_READ_FW(l3_offset);
+ else
+ gen9_render_mocs_L3[i] =
+ I915_READ_FW(l3_offset);
+ if (next)
+ I915_WRITE_FW(l3_offset,
+ vgpu_vreg(next, l3_offset));
+ else
+ I915_WRITE_FW(l3_offset,
+ gen9_render_mocs_L3[i]);
+
l3_offset.reg += 4;
}
}
@@ -258,78 +249,25 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id)
#define CTX_CONTEXT_CONTROL_VAL 0x03
-/* Switch ring mmio values (context) from host to a vgpu. */
-static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id)
-{
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
- struct intel_vgpu_submission *s = &vgpu->submission;
- u32 *reg_state = s->shadow_ctx->engine[ring_id].lrc_reg_state;
- u32 ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL];
- u32 inhibit_mask =
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
- i915_reg_t last_reg = _MMIO(0);
- struct render_mmio *mmio;
- u32 v;
- int i, array_size;
-
- if (IS_SKYLAKE(vgpu->gvt->dev_priv)
- || IS_KABYLAKE(vgpu->gvt->dev_priv)) {
- mmio = gen9_render_mmio_list;
- array_size = ARRAY_SIZE(gen9_render_mmio_list);
- load_mocs(vgpu, ring_id);
- } else {
- mmio = gen8_render_mmio_list;
- array_size = ARRAY_SIZE(gen8_render_mmio_list);
- }
-
- for (i = 0; i < array_size; i++, mmio++) {
- if (mmio->ring_id != ring_id)
- continue;
-
- mmio->value = I915_READ_FW(mmio->reg);
-
- /*
- * if it is an inhibit context, load in_context mmio
- * into HW by mmio write. If it is not, skip this mmio
- * write.
- */
- if (mmio->in_context &&
- (ctx_ctrl & inhibit_mask) != inhibit_mask)
- continue;
-
- if (mmio->mask)
- v = vgpu_vreg(vgpu, mmio->reg) | (mmio->mask << 16);
- else
- v = vgpu_vreg(vgpu, mmio->reg);
-
- I915_WRITE_FW(mmio->reg, v);
- last_reg = mmio->reg;
-
- trace_render_mmio(vgpu->id, "load",
- i915_mmio_reg_offset(mmio->reg),
- mmio->value, v);
- }
-
- /* Make sure the swiched MMIOs has taken effect. */
- if (likely(INTEL_GVT_MMIO_OFFSET(last_reg)))
- I915_READ_FW(last_reg);
-
- handle_tlb_pending_event(vgpu, ring_id);
-}
-
-/* Switch ring mmio values (context) from vgpu to host. */
-static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
+/* Switch ring mmio values (context). */
+static void switch_mmio(struct intel_vgpu *pre, struct intel_vgpu *next,
+ int ring_id)
{
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+ struct drm_i915_private *dev_priv;
struct render_mmio *mmio;
i915_reg_t last_reg = _MMIO(0);
- u32 v;
+ u32 old_v, new_v;
int i, array_size;
+ struct intel_vgpu_submission *s;
+ u32 *reg_state, ctx_ctrl;
+ u32 inhibit_mask =
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+ dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv;
if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
mmio = gen9_render_mmio_list;
array_size = ARRAY_SIZE(gen9_render_mmio_list);
- restore_mocs(vgpu, ring_id);
+ switch_mocs(pre, next, ring_id);
} else {
mmio = gen8_render_mmio_list;
array_size = ARRAY_SIZE(gen8_render_mmio_list);
@@ -338,29 +276,61 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
for (i = 0; i < array_size; i++, mmio++) {
if (mmio->ring_id != ring_id)
continue;
+ // save
+ if (pre) {
+ vgpu_vreg(pre, mmio->reg) = I915_READ_FW(mmio->reg);
+ if (mmio->mask)
+ vgpu_vreg(pre, mmio->reg) &=
+ ~(mmio->mask << 16);
+ old_v = vgpu_vreg(pre, mmio->reg);
+ } else {
+ mmio->value = I915_READ_FW(mmio->reg);
+ old_v = mmio->value;
+ }
- vgpu_vreg(vgpu, mmio->reg) = I915_READ_FW(mmio->reg);
-
- if (mmio->mask) {
- vgpu_vreg(vgpu, mmio->reg) &= ~(mmio->mask << 16);
- v = mmio->value | (mmio->mask << 16);
- } else
- v = mmio->value;
-
- if (mmio->in_context)
- continue;
+ // restore
+ if (next) {
+ s = &next->submission;
+ reg_state =
+ s->shadow_ctx->engine[ring_id].lrc_reg_state;
+ ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL];
+ /*
+ * if it is an inhibit context, load in_context mmio
+ * into HW by mmio write. If it is not, skip this mmio
+ * write.
+ */
+ if (mmio->in_context &&
+ (ctx_ctrl & inhibit_mask) != inhibit_mask)
+ continue;
+
+ if (mmio->mask)
+ new_v = vgpu_vreg(next, mmio->reg) |
+ (mmio->mask << 16);
+ else
+ new_v = vgpu_vreg(next, mmio->reg);
+ } else {
+ if (mmio->in_context)
+ continue;
+ if (mmio->mask)
+ new_v = mmio->value | (mmio->mask << 16);
+ else
+ new_v = mmio->value;
+ }
- I915_WRITE_FW(mmio->reg, v);
+ I915_WRITE_FW(mmio->reg, new_v);
last_reg = mmio->reg;
-
- trace_render_mmio(vgpu->id, "restore",
+ trace_render_mmio(pre ? pre->id : 0,
+ next ? next->id : 0, "switch",
i915_mmio_reg_offset(mmio->reg),
- mmio->value, v);
+ old_v, new_v);
}
/* Make sure the swiched MMIOs has taken effect. */
if (likely(INTEL_GVT_MMIO_OFFSET(last_reg)))
I915_READ_FW(last_reg);
+
+ if (next)
+ handle_tlb_pending_event(next, ring_id);
}
/**
@@ -391,16 +361,6 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
* handle forcewake mannually.
*/
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
- /**
- * TODO: Optimize for vGPU to vGPU switch by merging
- * switch_mmio_to_host() and switch_mmio_to_vgpu().
- */
- if (pre)
- switch_mmio_to_host(pre, ring_id);
-
- if (next)
- switch_mmio_to_vgpu(next, ring_id);
-
+ switch_mmio(pre, next, ring_id);
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
}
@@ -330,13 +330,14 @@
);
TRACE_EVENT(render_mmio,
- TP_PROTO(int id, char *action, unsigned int reg,
+ TP_PROTO(int old_id, int new_id, char *action, unsigned int reg,
unsigned int old_val, unsigned int new_val),
- TP_ARGS(id, action, reg, new_val, old_val),
+ TP_ARGS(old_id, new_id, action, reg, new_val, old_val),
TP_STRUCT__entry(
- __field(int, id)
+ __field(int, old_id)
+ __field(int, new_id)
__array(char, buf, GVT_TEMP_STR_LEN)
__field(unsigned int, reg)
__field(unsigned int, old_val)
@@ -344,15 +345,17 @@
),
TP_fast_assign(
- __entry->id = id;
+ __entry->old_id = old_id;
+ __entry->new_id = new_id;
snprintf(__entry->buf, GVT_TEMP_STR_LEN, "%s", action);
__entry->reg = reg;
__entry->old_val = old_val;
__entry->new_val = new_val;
),
- TP_printk("VM%u %s reg %x, old %08x new %08x\n",
- __entry->id, __entry->buf, __entry->reg,
+ TP_printk("VM%u -> VM%u %s reg %x, old %08x new %08x\n",
+ __entry->old_id, __entry->new_id,
+ __entry->buf, __entry->reg,
__entry->old_val, __entry->new_val)
);
now mmio switch between vGPUs need to switch to host first then to expected vGPU, it waste one time mmio save/restore. r/w mmio usually is time-consuming, and there are so many mocs registers need to save/restore during vGPU switch. Combine the switch_to_host and switch_to_vgpu can reduce 1 time mmio save/restore, it will reduce the CPU utilization and performance while there is multi VMs with heavy work load. Signed-off-by: Weinan Li <weinan.z.li@intel.com> --- drivers/gpu/drm/i915/gvt/render.c | 212 ++++++++++++++++---------------------- drivers/gpu/drm/i915/gvt/trace.h | 15 +-- 2 files changed, 95 insertions(+), 132 deletions(-)