@@ -563,6 +563,42 @@ mi_set_context(struct intel_engine_cs *ring,
return ret;
}
+static inline bool should_skip_switch(struct intel_engine_cs *ring,
+ struct intel_context *from,
+ struct intel_context *to)
+{
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
+
+ if (to->remap_slice)
+ return false;
+
+ if (to->ppgtt) {
+ if (from == to && !test_bit(ring->id, &to->ppgtt->base.pd_reload_mask))
+ return true;
+ } else {
+ if (from == to && !test_bit(ring->id, &dev_priv->mm.aliasing_ppgtt->base.pd_reload_mask))
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+needs_pd_load_pre(struct intel_engine_cs *ring, struct intel_context *to)
+{
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
+
+ return ((INTEL_INFO(ring->dev)->gen < 8) ||
+ (ring != &dev_priv->ring[RCS])) && to->ppgtt;
+}
+
+static bool
+needs_pd_load_post(struct intel_engine_cs *ring, struct intel_context *to)
+{
+ return IS_GEN8(ring->dev) &&
+ (to->ppgtt || &to->ppgtt->base.pd_reload_mask);
+}
+
static int do_switch(struct intel_engine_cs *ring,
struct intel_context *to)
{
@@ -571,9 +607,6 @@ static int do_switch(struct intel_engine_cs *ring,
u32 hw_flags = 0;
bool uninitialized = false;
struct i915_vma *vma;
- bool needs_pd_load_pre = ((INTEL_INFO(ring->dev)->gen < 8) ||
- (ring != &dev_priv->ring[RCS])) && to->ppgtt;
- bool needs_pd_load_post = false;
int ret, i;
if (from != NULL && ring == &dev_priv->ring[RCS]) {
@@ -581,7 +614,7 @@ static int do_switch(struct intel_engine_cs *ring,
BUG_ON(!i915_gem_obj_is_pinned(from->legacy_hw_ctx.rcs_state));
}
- if (from == to && !to->remap_slice)
+ if (should_skip_switch(ring, from, to))
return 0;
/* Trying to pin first makes error handling easier. */
@@ -599,7 +632,7 @@ static int do_switch(struct intel_engine_cs *ring,
*/
from = ring->last_context;
- if (needs_pd_load_pre) {
+ if (needs_pd_load_pre(ring, to)) {
/* Older GENs and non render rings still want the load first,
* "PP_DCLV followed by PP_DIR_BASE register through Load
* Register Immediate commands in Ring Buffer before submitting
@@ -608,6 +641,12 @@ static int do_switch(struct intel_engine_cs *ring,
ret = to->ppgtt->switch_mm(to->ppgtt, ring);
if (ret)
goto unpin_out;
+
+ /* Doing a PD load always reloads the page dirs */
+ if (to->ppgtt)
+ clear_bit(ring->id, &to->ppgtt->base.pd_reload_mask);
+ else
+ clear_bit(ring->id, &dev_priv->mm.aliasing_ppgtt->base.pd_reload_mask);
}
if (ring != &dev_priv->ring[RCS]) {
@@ -644,16 +683,16 @@ static int do_switch(struct intel_engine_cs *ring,
* XXX: If we implemented page directory eviction code, this
* optimization needs to be removed.
*/
- if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to)) {
+ if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to))
hw_flags |= MI_RESTORE_INHIBIT;
- needs_pd_load_post = to->ppgtt && IS_GEN8(ring->dev);
- }
+ else if (to->ppgtt && test_and_clear_bit(ring->id, &to->ppgtt->base.pd_reload_mask))
+ hw_flags |= MI_FORCE_RESTORE;
ret = mi_set_context(ring, to, hw_flags);
if (ret)
goto unpin_out;
- if (needs_pd_load_post) {
+ if (needs_pd_load_post(ring, to)) {
ret = to->ppgtt->switch_mm(to->ppgtt, ring);
/* The hardware context switch is emitted, but we haven't
* actually changed the state - so it's probably safe to bail
@@ -726,16 +765,6 @@ unpin_out:
return ret;
}
-static inline bool should_skip_switch(struct intel_engine_cs *ring,
- struct intel_context *from,
- struct intel_context *to)
-{
- if (from == to && !to->remap_slice)
- return true;
-
- return false;
-}
-
/**
* i915_switch_context() - perform a GPU context switch.
* @ring: ring for which we'll execute the context switch
@@ -1199,6 +1199,13 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
if (ret)
goto error;
+ if (ctx->ppgtt)
+ WARN(ctx->ppgtt->base.pd_reload_mask & (1<<ring->id),
+ "%s didn't clear reload\n", ring->name);
+ else
+ WARN(dev_priv->mm.aliasing_ppgtt->base.pd_reload_mask &
+ (1<<ring->id), "%s didn't clear reload\n", ring->name);
+
instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
instp_mask = I915_EXEC_CONSTANTS_MASK;
switch (instp_mode) {
@@ -1446,6 +1453,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
if (ret)
goto err;
+ /* XXX: Reserve has possibly change PDEs which means we must do a
+ * context switch before we can coherently read some of the reserved
+ * VMAs. */
+
/* The objects are in their final locations, apply the relocations. */
if (need_relocs)
ret = i915_gem_execbuffer_relocate(eb);
@@ -1398,6 +1398,15 @@ i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
return ppgtt;
}
+/* PDE TLBs are a pain invalidate pre GEN8. It requires a context reload. If we
+ * are switching between contexts with the same LRCA, we also must do a force
+ * restore.
+ */
+#define ppgtt_invalidate_tlbs(vm) do {\
+ /* If current vm != vm, */ \
+ vm->pd_reload_mask = INTEL_INFO(vm->dev)->ring_mask; \
+} while (0)
+
void i915_ppgtt_release(struct kref *kref)
{
struct i915_hw_ppgtt *ppgtt =
@@ -1433,6 +1442,8 @@ ppgtt_bind_vma(struct i915_vma *vma,
vma->node.size);
if (ret)
return ret;
+
+ ppgtt_invalidate_tlbs(vma->vm);
}
vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
@@ -1446,9 +1457,11 @@ static void ppgtt_unbind_vma(struct i915_vma *vma)
vma->node.start,
vma->obj->base.size,
true);
- if (vma->vm->teardown_va_range)
+ if (vma->vm->teardown_va_range) {
vma->vm->teardown_va_range(vma->vm,
vma->node.start, vma->node.size);
+ ppgtt_invalidate_tlbs(vma->vm);
+ }
}
extern int intel_iommu_gfx_mapped;
@@ -226,6 +226,8 @@ struct i915_address_space {
struct page *page;
} scratch;
+ unsigned long pd_reload_mask;
+
/**
* List of objects currently involved in rendering.
*