@@ -1848,6 +1848,7 @@ struct drm_i915_private {
/* hda/i915 audio component */
bool audio_component_registered;
+ bool contexts_ready;
uint32_t hw_context_size;
struct list_head context_list;
@@ -2637,6 +2638,7 @@ void i915_queue_hangcheck(struct drm_device *dev);
__printf(3, 4)
void i915_handle_error(struct drm_device *dev, bool wedged,
const char *fmt, ...);
+void i915_handle_guc_error(struct drm_device *dev, int err);
extern void intel_irq_init(struct drm_i915_private *dev_priv);
int intel_irq_install(struct drm_i915_private *dev_priv);
@@ -5121,9 +5121,15 @@ i915_gem_init_hw(struct drm_device *dev)
}
/* We can't enable contexts until all firmware is loaded */
- ret = intel_guc_ucode_load(dev, true);
+ ret = intel_guc_ucode_load(dev, false);
+ if (ret == -EAGAIN) {
+ ret = 0;
+ goto out; /* too early */
+ }
ret = i915_gem_init_hw_late(dev);
+ if (ret == 0)
+ dev_priv->contexts_ready = true;
out:
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
@@ -438,23 +438,65 @@ static int context_idr_cleanup(int id, void *p, void *data)
return 0;
}
+/* Complete any late initialisation here */
+static int i915_gem_context_first_open(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ int ret;
+
+ /*
+ * We can't enable contexts until all firmware is loaded. This
+ * call shouldn't return -EAGAIN because we pass wait=true, but
+ * it can still fail with code -EIO if the GuC doesn't respond,
+ * or -ENOEXEC if the GuC firmware image is invalid.
+ */
+ ret = intel_guc_ucode_load(dev, true);
+ WARN_ON(ret == -EAGAIN);
+
+ /*
+ * If an error occurred and GuC submission has been requested, we can
+ * attempt recovery by disabling GuC submission and reinitialising
+ * the GPU and driver. We then fail this open() anyway, but the next
+ * attempt will find that GuC submission is already disabled, and so
+ * proceed to complete context initialisation in non-GuC mode instead.
+ */
+ if (ret && i915.enable_guc_submission) {
+ i915_handle_guc_error(dev, ret);
+ return ret;
+ }
+
+ ret = i915_gem_init_hw_late(dev);
+ if (ret == 0)
+ dev_priv->contexts_ready = true;
+ return ret;
+}
+
int i915_gem_context_open(struct drm_device *dev, struct drm_file *file)
{
+ struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_file_private *file_priv = file->driver_priv;
struct intel_context *ctx;
+ int ret = 0;
idr_init(&file_priv->context_idr);
mutex_lock(&dev->struct_mutex);
- ctx = i915_gem_create_context(dev, file_priv);
+
+ if (!dev_priv->contexts_ready)
+ ret = i915_gem_context_first_open(dev);
+
+ if (ret == 0) {
+ ctx = i915_gem_create_context(dev, file_priv);
+ if (IS_ERR(ctx))
+ ret = PTR_ERR(ctx);
+ }
+
mutex_unlock(&dev->struct_mutex);
- if (IS_ERR(ctx)) {
+ if (ret)
idr_destroy(&file_priv->context_idr);
- return PTR_ERR(ctx);
- }
- return 0;
+ return ret;
}
void i915_gem_context_close(struct drm_device *dev, struct drm_file *file)
@@ -2374,6 +2374,54 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
i915_reset_and_wakeup(dev);
}
+/**
+ * i915_handle_error - handle a GuC error
+ * @dev: drm device
+ *
+ * If the GuC can't be (re-)initialised, disable GuC submission and
+ * then reset and reinitialise the rest of the GPU, so that we can
+ * fall back to operating in ELSP mode. Don't bother capturing error
+ * state, because it probably isn't relevant here.
+ *
+ * Unlike i915_handle_error() above, this is called with the global
+ * struct_mutex held, so we need to release it after setting the
+ * reset-in-progress bit so that other threads can make progress,
+ * and reacquire it after the reset is complete.
+ */
+void i915_handle_guc_error(struct drm_device *dev, int err)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ DRM_ERROR("GuC failure %d, disabling GuC submission\n", err);
+ i915.enable_guc_submission = false;
+
+ i915_report_and_clear_eir(dev); /* unlikely? */
+
+ atomic_set_mask(I915_RESET_IN_PROGRESS_FLAG,
+ &dev_priv->gpu_error.reset_counter);
+
+ mutex_unlock(&dev->struct_mutex);
+
+ /*
+ * Wakeup waiting processes so that the reset function
+ * i915_reset_and_wakeup doesn't deadlock trying to grab
+ * various locks. By bumping the reset counter first, the woken
+ * processes will see a reset in progress and back off,
+ * releasing their locks and then wait for the reset completion.
+ * We must do this for _all_ gpu waiters that might hold locks
+ * that the reset work needs to acquire.
+ *
+ * Note: The wake_up serves as the required memory barrier to
+ * ensure that the waiters see the updated value of the reset
+ * counter atomic_t.
+ */
+ i915_error_wake_up(dev_priv, false);
+
+ i915_reset_and_wakeup(dev);
+
+ mutex_lock(&dev->struct_mutex);
+}
+
/* Called from drm generic code, passed 'crtc' which
* we use as a pipe index
*/