@@ -636,9 +636,18 @@ mi_set_context(struct intel_ring_buffer *ring,
static inline bool should_skip_switch(struct intel_ring_buffer *ring,
struct i915_hw_context *from,
- struct i915_hw_context *to)
+ struct i915_hw_context *to,
+ u32 *flags)
{
- if (from == to && from->last_ring == ring && !to->remap_slice)
+ if (test_and_clear_bit(ring->id, &to->vm->pd_reload_mask)) {
+ *flags |= MI_FORCE_RESTORE;
+ return false;
+ }
+
+ if (to->remap_slice)
+ return false;
+
+ if (from == to && from->last_ring == ring)
return true;
return false;
@@ -658,7 +667,7 @@ static int do_switch(struct intel_ring_buffer *ring,
BUG_ON(!i915_gem_obj_is_pinned(from->obj));
}
- if (should_skip_switch(ring, from, to))
+ if (should_skip_switch(ring, from, to, &hw_flags))
return 0;
/* Trying to pin first makes error handling easier. */
@@ -1162,6 +1162,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
if (ret)
goto err;
+ /* XXX: Reserve has possibly change PDEs which means we must do a
+ * context switch before we can coherently read some of the reserved
+ * VMAs. */
+
/* The objects are in their final locations, apply the relocations. */
if (need_relocs)
ret = i915_gem_execbuffer_relocate(eb);
@@ -1263,6 +1267,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto err;
}
} else {
+ WARN_ON(vm->pd_reload_mask & (1<<ring->id));
ret = ring->dispatch_execbuffer(ring,
exec_start, exec_len,
flags);
@@ -1228,6 +1228,16 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
return 0;
}
+/* PDE TLBs are a pain invalidate pre GEN8. It requires a context reload. If we
+ * are switching between contexts with the same LRCA, we also must do a force
+ * restore.
+ */
+#define ppgtt_invalidate_tlbs(vm) do {\
+ if (INTEL_INFO(vm->dev)->gen < 8) { \
+ vm->pd_reload_mask = INTEL_INFO(vm->dev)->ring_mask; \
+ } \
+} while(0)
+
static int
ppgtt_bind_vma(struct i915_vma *vma,
enum i915_cache_level cache_level,
@@ -1242,10 +1252,13 @@ ppgtt_bind_vma(struct i915_vma *vma,
vma->node.size);
if (ret)
return ret;
+
+ ppgtt_invalidate_tlbs(vma->vm);
}
vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
cache_level);
+
return 0;
}
@@ -1255,9 +1268,11 @@ static void ppgtt_unbind_vma(struct i915_vma *vma)
vma->node.start,
vma->obj->base.size,
true);
- if (vma->vm->teardown_va_range)
+ if (vma->vm->teardown_va_range) {
vma->vm->teardown_va_range(vma->vm,
vma->node.start, vma->node.size);
+ ppgtt_invalidate_tlbs(vma->vm);
+ }
}
extern int intel_iommu_gfx_mapped;
@@ -280,6 +280,8 @@ struct i915_address_space {
struct page *page;
} scratch;
+ unsigned long pd_reload_mask;
+
/**
* List of objects currently involved in rendering.
*
The docs say you cannot change the PDEs of a currently running context. If you are changing the PDEs of the currently running context then. We never map new PDEs of a running context, and expect them to be present - so I think this is okay. (We can unmap, but this should also be okay since we only unmap unreferenced objects that the GPU shouldn't be tryingto va->pa xlate.) The MI_SET_CONTEXT command does have a flag to signal that even if the context is the same, force a reload. It's unclear exactly what this does, but I have a hunch it's the right thing to do. The logic assumes that we always emit a context switch after mapping new PDEs, and before we submit a batch. This is the case today, and has been the case since the inception of hardware contexts. A note in the comment let's the user know. NOTE: I have no evidence to suggest this is actually needed other than a few tidbits which lead me to believe there are some corner cases that will require it. I'm mostly depending on the reload of DCLV to invalidate the old TLBs. We can try to remove this patch and see what happens. Signed-off-by: Ben Widawsky <ben@bwidawsk.net> --- drivers/gpu/drm/i915/i915_gem_context.c | 15 ++++++++++++--- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 +++++ drivers/gpu/drm/i915/i915_gem_gtt.c | 17 ++++++++++++++++- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++ 4 files changed, 35 insertions(+), 4 deletions(-)