@@ -1877,6 +1877,19 @@ void i915_reset(struct drm_i915_private *dev_priv)
goto finish;
}
+/**
+ * i915_reset_engine - reset GPU engine to recover from a hang
+ * @engine: engine to reset
+ *
+ * Reset a specific GPU engine. Useful if a hang is detected.
+ * Returns zero on successful reset or otherwise an error code.
+ */
+int i915_reset_engine(struct intel_engine_cs *engine)
+{
+ /* FIXME: replace me with engine reset sequence */
+ return -ENODEV;
+}
+
static int i915_pm_suspend(struct device *kdev)
{
struct pci_dev *pdev = to_pci_dev(kdev);
@@ -705,6 +705,7 @@ struct intel_csr {
func(has_ddi); \
func(has_decoupled_mmio); \
func(has_dp_mst); \
+ func(has_reset_engine); \
func(has_fbc); \
func(has_fpga_dbg); \
func(has_full_ppgtt); \
@@ -1498,6 +1499,10 @@ struct i915_gpu_error {
* inspect the bit and do the reset directly, otherwise the worker
* waits for the struct_mutex.
*
+ * #I915_RESET_ENGINE_IN_PROGRESS - Since the driver doesn't need to
+ * acquire the struct_mutex to reset an engine, we need an explicit
+ * flag to prevent two concurrent reset-engine attempts.
+ *
* #I915_WEDGED - If reset fails and we can no longer use the GPU,
* we set the #I915_WEDGED bit. Prior to command submission, e.g.
* i915_gem_request_alloc(), this bit is checked and the sequence
@@ -1506,6 +1511,7 @@ struct i915_gpu_error {
unsigned long flags;
#define I915_RESET_BACKOFF 0
#define I915_RESET_HANDOFF 1
+#define I915_RESET_ENGINE_IN_PROGRESS 2
#define I915_WEDGED (BITS_PER_LONG - 1)
/**
@@ -2989,6 +2995,8 @@ extern void i915_driver_unload(struct drm_device *dev);
extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
extern void i915_reset(struct drm_i915_private *dev_priv);
+extern int i915_reset_engine(struct intel_engine_cs *engine);
+extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv);
extern int intel_guc_reset(struct drm_i915_private *dev_priv);
extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
extern void intel_hangcheck_init(struct drm_i915_private *dev_priv);
@@ -2741,6 +2741,30 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
if (!engine_mask)
goto out;
+ /* try engine reset first, and continue if fails */
+ if (intel_has_reset_engine(dev_priv)) {
+ struct intel_engine_cs *engine;
+ unsigned int tmp;
+
+ /* protect against concurrent reset attempts */
+ if (test_and_set_bit(I915_RESET_ENGINE_IN_PROGRESS,
+ &dev_priv->gpu_error.flags))
+ goto out;
+
+ for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
+ if (i915_reset_engine(engine) == 0)
+ engine_mask &= ~intel_engine_flag(engine);
+ }
+
+ /* clear unconditionally, full reset won't care about it */
+ clear_bit(I915_RESET_ENGINE_IN_PROGRESS,
+ &dev_priv->gpu_error.flags);
+
+ if (!engine_mask)
+ goto out;
+ }
+
+ /* full reset needs the mutex, stop any other user trying to do so */
if (test_and_set_bit(I915_RESET_BACKOFF,
&dev_priv->gpu_error.flags))
goto out;
@@ -310,7 +310,8 @@ static const struct intel_device_info intel_haswell_info = {
BDW_COLORS, \
.has_logical_ring_contexts = 1, \
.has_full_48bit_ppgtt = 1, \
- .has_64bit_reloc = 1
+ .has_64bit_reloc = 1, \
+ .has_reset_engine = 1
static const struct intel_device_info intel_broadwell_info = {
BDW_FEATURES,
@@ -341,6 +342,7 @@ static const struct intel_device_info intel_cherryview_info = {
.has_gmch_display = 1,
.has_aliasing_ppgtt = 1,
.has_full_ppgtt = 1,
+ .has_reset_engine = 1,
.display_mmio_offset = VLV_DISPLAY_BASE,
GEN_CHV_PIPEOFFSETS,
CURSOR_OFFSETS,
@@ -389,6 +391,7 @@ static const struct intel_device_info intel_skylake_gt3_info = {
.has_aliasing_ppgtt = 1, \
.has_full_ppgtt = 1, \
.has_full_48bit_ppgtt = 1, \
+ .has_reset_engine = 1, \
GEN_DEFAULT_PIPEOFFSETS, \
IVB_CURSOR_OFFSETS, \
BDW_COLORS
@@ -1776,6 +1776,17 @@ bool intel_has_gpu_reset(struct drm_i915_private *dev_priv)
return intel_get_gpu_reset(dev_priv) != NULL;
}
+/*
+ * When GuC submission is enabled, GuC manages ELSP and can initiate the
+ * engine reset too. For now, fall back to full GPU reset if it is enabled.
+ */
+bool intel_has_reset_engine(struct drm_i915_private *dev_priv)
+{
+ return (dev_priv->info.has_reset_engine &&
+ !dev_priv->guc.execbuf_client &&
+ i915.reset >= 2);
+}
+
int intel_guc_reset(struct drm_i915_private *dev_priv)
{
int ret;