@@ -101,6 +101,32 @@ static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
struct drm_device *dev = ppgtt->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct i915_address_space *vm = &ppgtt->base;
+ bool do_idle = false;
+ int ret;
+
+ /* If we get here while in reset, we need to let the reset handler run
+ * first, or else our VM teardown isn't going to go smoothly. There are
+ * a could of options at this point, but letting the reset handler do
+ * it's thing is the most desirable. The reset handler will take care of
+ * retiring the stuck requests.
+ */
+ if (i915_reset_in_progress(&dev_priv->gpu_error)) {
+ mutex_unlock(&dev->struct_mutex);
+#define EXIT_COND (!i915_reset_in_progress(&dev_priv->gpu_error) || \
+ i915_terminally_wedged(&dev_priv->gpu_error))
+ ret = wait_event_timeout(dev_priv->gpu_error.reset_queue,
+ EXIT_COND,
+ 10 * HZ);
+ if (!ret) {
+ /* it's unlikely idling will solve anything, but it
+ * shouldn't hurt to try. */
+ do_idle = true;
+ /* TODO: go down kicking and screaming harder */
+ }
+#undef EXIT_COND
+
+ mutex_lock(&dev->struct_mutex);
+ }
if (ppgtt == dev_priv->mm.aliasing_ppgtt ||
(list_empty(&vm->active_list) && list_empty(&vm->inactive_list))) {
@@ -117,14 +143,33 @@ static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
if (!list_empty(&vm->active_list)) {
struct i915_vma *vma;
+ do_idle = true;
list_for_each_entry(vma, &vm->active_list, mm_list)
if (WARN_ON(list_empty(&vma->vma_link) ||
list_is_singular(&vma->vma_link)))
break;
- i915_gem_evict_vm(&ppgtt->base, true, true);
- } else {
+ } else
i915_gem_retire_requests(dev);
- i915_gem_evict_vm(&ppgtt->base, false, true);
+
+ /* We have a problem here where VM teardown cannot be interrupted, or
+ * else the ppgtt cleanup will fail. As an example, a precisely timed
+ * SIGKILL could leads to an OOPS, or worse. There are two options:
+ * 1. Make the eviction uninterruptible
+ * 2. Defer the eviction if it was interrupted.
+ *
+ * Option #1 is not the friendliest, but it's the easiest to implement,
+ * and least error prone.
+ * TODO: Implement option 2
+ */
+ ret = i915_gem_evict_vm(&ppgtt->base, do_idle, !do_idle);
+ if (ret == -ERESTARTSYS)
+ ret = i915_gem_evict_vm(&ppgtt->base, do_idle, false);
+ WARN_ON(ret);
+ WARN_ON(!list_empty(&vm->active_list));
+
+ /* This is going to blow up badly if the mm is unclean */
+ if (WARN_ON(!list_empty(&ppgtt->base.mm.head_node.node_list))) {
+ /* TODO: go down kicking and screaming harder++ */
}
ppgtt->base.cleanup(&ppgtt->base);