diff mbox

[2/4] drm/i915: Fix context/engine cleanup order

Message ID 1453405067-32890-3-git-send-email-david.s.gordon@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Gordon Jan. 21, 2016, 7:37 p.m. UTC
From: Nick Hoath <nicholas.hoath@intel.com>

Swap the order of context & engine cleanup, so that contexts are cleaned
up first, and *then* engines. This is a more sensible order anyway, but
in particular has become necessary since the 'intel_ring_initialized()
must be simple and inline' patch, which now uses ring->dev as an
'initialised' flag, so it can now be NULL after engine teardown. This
in turn can cause a problem in the context code, which (used to) check
the ring->dev->struct_mutex -- causing a fault if ring->dev was NULL.

Also rename the cleanup function to reflect what it actually does
(cleanup engines, not a ringbuffer), and fix an annoying whitespace issue.

v2: Also make the fix in i915_load_modeset_init, not just in
    i915_driver_unload (Chris Wilson)
v3: Had extra stuff in it.
v4: Reverted extra stuff (so we're back to v2).
    Rebased and updated commentary above (Dave Gordon).

Signed-off-by: Nick Hoath <nicholas.hoath@intel.com>
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v2)

Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Chris Wilson <chris@chris-wilson.co.uk>

---
 drivers/gpu/drm/i915/i915_dma.c |  4 ++--
 drivers/gpu/drm/i915/i915_drv.h |  2 +-
 drivers/gpu/drm/i915/i915_gem.c | 23 ++++++++++++-----------
 3 files changed, 15 insertions(+), 14 deletions(-)

Comments

Daniel Vetter Jan. 25, 2016, 6:09 p.m. UTC | #1
On Thu, Jan 21, 2016 at 07:37:45PM +0000, Dave Gordon wrote:
> From: Nick Hoath <nicholas.hoath@intel.com>
> 
> Swap the order of context & engine cleanup, so that contexts are cleaned
> up first, and *then* engines. This is a more sensible order anyway, but
> in particular has become necessary since the 'intel_ring_initialized()
> must be simple and inline' patch, which now uses ring->dev as an
> 'initialised' flag, so it can now be NULL after engine teardown. This
> in turn can cause a problem in the context code, which (used to) check
> the ring->dev->struct_mutex -- causing a fault if ring->dev was NULL.
> 
> Also rename the cleanup function to reflect what it actually does
> (cleanup engines, not a ringbuffer), and fix an annoying whitespace issue.
> 
> v2: Also make the fix in i915_load_modeset_init, not just in
>     i915_driver_unload (Chris Wilson)
> v3: Had extra stuff in it.
> v4: Reverted extra stuff (so we're back to v2).
>     Rebased and updated commentary above (Dave Gordon).
> 
> Signed-off-by: Nick Hoath <nicholas.hoath@intel.com>
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v2)

Trusting that v2==v4 and applied it.
-Daniel

> 
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> 
> ---
>  drivers/gpu/drm/i915/i915_dma.c |  4 ++--
>  drivers/gpu/drm/i915/i915_drv.h |  2 +-
>  drivers/gpu/drm/i915/i915_gem.c | 23 ++++++++++++-----------
>  3 files changed, 15 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index d70d96f..4725e8d 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -451,8 +451,8 @@ static int i915_load_modeset_init(struct drm_device *dev)
>  
>  cleanup_gem:
>  	mutex_lock(&dev->struct_mutex);
> -	i915_gem_cleanup_ringbuffer(dev);
>  	i915_gem_context_fini(dev);
> +	i915_gem_cleanup_engines(dev);
>  	mutex_unlock(&dev->struct_mutex);
>  cleanup_irq:
>  	intel_guc_ucode_fini(dev);
> @@ -1196,8 +1196,8 @@ int i915_driver_unload(struct drm_device *dev)
>  
>  	intel_guc_ucode_fini(dev);
>  	mutex_lock(&dev->struct_mutex);
> -	i915_gem_cleanup_ringbuffer(dev);
>  	i915_gem_context_fini(dev);
> +	i915_gem_cleanup_engines(dev);
>  	mutex_unlock(&dev->struct_mutex);
>  	intel_fbc_cleanup_cfb(dev_priv);
>  	i915_gem_cleanup_stolen(dev);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 204661f..021e88a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3012,7 +3012,7 @@ int i915_gem_init_rings(struct drm_device *dev);
>  int __must_check i915_gem_init_hw(struct drm_device *dev);
>  int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice);
>  void i915_gem_init_swizzling(struct drm_device *dev);
> -void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
> +void i915_gem_cleanup_engines(struct drm_device *dev);
>  int __must_check i915_gpu_idle(struct drm_device *dev);
>  int __must_check i915_gem_suspend(struct drm_device *dev);
>  void __i915_add_request(struct drm_i915_gem_request *req,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 371bbb2..799a53a 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4912,7 +4912,7 @@ int i915_gem_init_rings(struct drm_device *dev)
>  		req = i915_gem_request_alloc(ring, NULL);
>  		if (IS_ERR(req)) {
>  			ret = PTR_ERR(req);
> -			i915_gem_cleanup_ringbuffer(dev);
> +			i915_gem_cleanup_engines(dev);
>  			goto out;
>  		}
>  
> @@ -4925,7 +4925,7 @@ int i915_gem_init_rings(struct drm_device *dev)
>  		if (ret && ret != -EIO) {
>  			DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret);
>  			i915_gem_request_cancel(req);
> -			i915_gem_cleanup_ringbuffer(dev);
> +			i915_gem_cleanup_engines(dev);
>  			goto out;
>  		}
>  
> @@ -4933,7 +4933,7 @@ int i915_gem_init_rings(struct drm_device *dev)
>  		if (ret && ret != -EIO) {
>  			DRM_ERROR("Context enable ring #%d failed %d\n", i, ret);
>  			i915_gem_request_cancel(req);
> -			i915_gem_cleanup_ringbuffer(dev);
> +			i915_gem_cleanup_engines(dev);
>  			goto out;
>  		}
>  
> @@ -5008,7 +5008,7 @@ int i915_gem_init(struct drm_device *dev)
>  }
>  
>  void
> -i915_gem_cleanup_ringbuffer(struct drm_device *dev)
> +i915_gem_cleanup_engines(struct drm_device *dev)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_engine_cs *ring;
> @@ -5017,13 +5017,14 @@ int i915_gem_init(struct drm_device *dev)
>  	for_each_ring(ring, dev_priv, i)
>  		dev_priv->gt.cleanup_ring(ring);
>  
> -    if (i915.enable_execlists)
> -            /*
> -             * Neither the BIOS, ourselves or any other kernel
> -             * expects the system to be in execlists mode on startup,
> -             * so we need to reset the GPU back to legacy mode.
> -             */
> -            intel_gpu_reset(dev);
> +	if (i915.enable_execlists) {
> +		/*
> +		 * Neither the BIOS, ourselves or any other kernel
> +		 * expects the system to be in execlists mode on startup,
> +		 * so we need to reset the GPU back to legacy mode.
> +		 */
> +		intel_gpu_reset(dev);
> +	}
>  }
>  
>  static void
> -- 
> 1.9.1
>
Tvrtko Ursulin Jan. 26, 2016, 10:45 a.m. UTC | #2
On 21/01/16 19:37, Dave Gordon wrote:
> From: Nick Hoath <nicholas.hoath@intel.com>
>
> Swap the order of context & engine cleanup, so that contexts are cleaned
> up first, and *then* engines. This is a more sensible order anyway, but
> in particular has become necessary since the 'intel_ring_initialized()
> must be simple and inline' patch, which now uses ring->dev as an
> 'initialised' flag, so it can now be NULL after engine teardown. This
> in turn can cause a problem in the context code, which (used to) check
> the ring->dev->struct_mutex -- causing a fault if ring->dev was NULL.
>
> Also rename the cleanup function to reflect what it actually does
> (cleanup engines, not a ringbuffer), and fix an annoying whitespace issue.
>
> v2: Also make the fix in i915_load_modeset_init, not just in
>      i915_driver_unload (Chris Wilson)
> v3: Had extra stuff in it.
> v4: Reverted extra stuff (so we're back to v2).
>      Rebased and updated commentary above (Dave Gordon).
>
> Signed-off-by: Nick Hoath <nicholas.hoath@intel.com>
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v2)
>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>
> ---
>   drivers/gpu/drm/i915/i915_dma.c |  4 ++--
>   drivers/gpu/drm/i915/i915_drv.h |  2 +-
>   drivers/gpu/drm/i915/i915_gem.c | 23 ++++++++++++-----------
>   3 files changed, 15 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index d70d96f..4725e8d 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -451,8 +451,8 @@ static int i915_load_modeset_init(struct drm_device *dev)
>
>   cleanup_gem:
>   	mutex_lock(&dev->struct_mutex);
> -	i915_gem_cleanup_ringbuffer(dev);
>   	i915_gem_context_fini(dev);
> +	i915_gem_cleanup_engines(dev);
>   	mutex_unlock(&dev->struct_mutex);
>   cleanup_irq:
>   	intel_guc_ucode_fini(dev);
> @@ -1196,8 +1196,8 @@ int i915_driver_unload(struct drm_device *dev)
>
>   	intel_guc_ucode_fini(dev);
>   	mutex_lock(&dev->struct_mutex);
> -	i915_gem_cleanup_ringbuffer(dev);
>   	i915_gem_context_fini(dev);
> +	i915_gem_cleanup_engines(dev);

context_fini assumes it is the one who is dropping the last reference to 
the default/kernel context and NULLs the pointer.

Going forward this means there is this hidden dependency that engines 
must not hold the reference to the kernel context, at least not rely on 
dev_priv->kernel_context, or we would either leak the reference or null 
ptr deref.

I don't know, somehow feels a bit fragile. But a lot of things seem 
fragile around here.

If the main motivation is to ensure access to ring->dev, then ctx->i915 
could be used from the context cleanup code. For the ordering itself I 
am not sure what is more logical. Idle GPU -> shutdown engines -> 
cleanup contexts sounds OK to me since the engines are the ones which 
can hold references to contexts. So logically cleaning up context before 
engines feels strange. But I am sure an opposite argument could also be 
made?

Regards,

Tvrtko

>   	mutex_unlock(&dev->struct_mutex);
>   	intel_fbc_cleanup_cfb(dev_priv);
>   	i915_gem_cleanup_stolen(dev);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 204661f..021e88a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3012,7 +3012,7 @@ int i915_gem_init_rings(struct drm_device *dev);
>   int __must_check i915_gem_init_hw(struct drm_device *dev);
>   int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice);
>   void i915_gem_init_swizzling(struct drm_device *dev);
> -void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
> +void i915_gem_cleanup_engines(struct drm_device *dev);
>   int __must_check i915_gpu_idle(struct drm_device *dev);
>   int __must_check i915_gem_suspend(struct drm_device *dev);
>   void __i915_add_request(struct drm_i915_gem_request *req,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 371bbb2..799a53a 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4912,7 +4912,7 @@ int i915_gem_init_rings(struct drm_device *dev)
>   		req = i915_gem_request_alloc(ring, NULL);
>   		if (IS_ERR(req)) {
>   			ret = PTR_ERR(req);
> -			i915_gem_cleanup_ringbuffer(dev);
> +			i915_gem_cleanup_engines(dev);
>   			goto out;
>   		}
>
> @@ -4925,7 +4925,7 @@ int i915_gem_init_rings(struct drm_device *dev)
>   		if (ret && ret != -EIO) {
>   			DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret);
>   			i915_gem_request_cancel(req);
> -			i915_gem_cleanup_ringbuffer(dev);
> +			i915_gem_cleanup_engines(dev);
>   			goto out;
>   		}
>
> @@ -4933,7 +4933,7 @@ int i915_gem_init_rings(struct drm_device *dev)
>   		if (ret && ret != -EIO) {
>   			DRM_ERROR("Context enable ring #%d failed %d\n", i, ret);
>   			i915_gem_request_cancel(req);
> -			i915_gem_cleanup_ringbuffer(dev);
> +			i915_gem_cleanup_engines(dev);
>   			goto out;
>   		}
>
> @@ -5008,7 +5008,7 @@ int i915_gem_init(struct drm_device *dev)
>   }
>
>   void
> -i915_gem_cleanup_ringbuffer(struct drm_device *dev)
> +i915_gem_cleanup_engines(struct drm_device *dev)
>   {
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	struct intel_engine_cs *ring;
> @@ -5017,13 +5017,14 @@ int i915_gem_init(struct drm_device *dev)
>   	for_each_ring(ring, dev_priv, i)
>   		dev_priv->gt.cleanup_ring(ring);
>
> -    if (i915.enable_execlists)
> -            /*
> -             * Neither the BIOS, ourselves or any other kernel
> -             * expects the system to be in execlists mode on startup,
> -             * so we need to reset the GPU back to legacy mode.
> -             */
> -            intel_gpu_reset(dev);
> +	if (i915.enable_execlists) {
> +		/*
> +		 * Neither the BIOS, ourselves or any other kernel
> +		 * expects the system to be in execlists mode on startup,
> +		 * so we need to reset the GPU back to legacy mode.
> +		 */
> +		intel_gpu_reset(dev);
> +	}
>   }
>
>   static void
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index d70d96f..4725e8d 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -451,8 +451,8 @@  static int i915_load_modeset_init(struct drm_device *dev)
 
 cleanup_gem:
 	mutex_lock(&dev->struct_mutex);
-	i915_gem_cleanup_ringbuffer(dev);
 	i915_gem_context_fini(dev);
+	i915_gem_cleanup_engines(dev);
 	mutex_unlock(&dev->struct_mutex);
 cleanup_irq:
 	intel_guc_ucode_fini(dev);
@@ -1196,8 +1196,8 @@  int i915_driver_unload(struct drm_device *dev)
 
 	intel_guc_ucode_fini(dev);
 	mutex_lock(&dev->struct_mutex);
-	i915_gem_cleanup_ringbuffer(dev);
 	i915_gem_context_fini(dev);
+	i915_gem_cleanup_engines(dev);
 	mutex_unlock(&dev->struct_mutex);
 	intel_fbc_cleanup_cfb(dev_priv);
 	i915_gem_cleanup_stolen(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 204661f..021e88a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3012,7 +3012,7 @@  int i915_gem_init_rings(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice);
 void i915_gem_init_swizzling(struct drm_device *dev);
-void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
+void i915_gem_cleanup_engines(struct drm_device *dev);
 int __must_check i915_gpu_idle(struct drm_device *dev);
 int __must_check i915_gem_suspend(struct drm_device *dev);
 void __i915_add_request(struct drm_i915_gem_request *req,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 371bbb2..799a53a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4912,7 +4912,7 @@  int i915_gem_init_rings(struct drm_device *dev)
 		req = i915_gem_request_alloc(ring, NULL);
 		if (IS_ERR(req)) {
 			ret = PTR_ERR(req);
-			i915_gem_cleanup_ringbuffer(dev);
+			i915_gem_cleanup_engines(dev);
 			goto out;
 		}
 
@@ -4925,7 +4925,7 @@  int i915_gem_init_rings(struct drm_device *dev)
 		if (ret && ret != -EIO) {
 			DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret);
 			i915_gem_request_cancel(req);
-			i915_gem_cleanup_ringbuffer(dev);
+			i915_gem_cleanup_engines(dev);
 			goto out;
 		}
 
@@ -4933,7 +4933,7 @@  int i915_gem_init_rings(struct drm_device *dev)
 		if (ret && ret != -EIO) {
 			DRM_ERROR("Context enable ring #%d failed %d\n", i, ret);
 			i915_gem_request_cancel(req);
-			i915_gem_cleanup_ringbuffer(dev);
+			i915_gem_cleanup_engines(dev);
 			goto out;
 		}
 
@@ -5008,7 +5008,7 @@  int i915_gem_init(struct drm_device *dev)
 }
 
 void
-i915_gem_cleanup_ringbuffer(struct drm_device *dev)
+i915_gem_cleanup_engines(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring;
@@ -5017,13 +5017,14 @@  int i915_gem_init(struct drm_device *dev)
 	for_each_ring(ring, dev_priv, i)
 		dev_priv->gt.cleanup_ring(ring);
 
-    if (i915.enable_execlists)
-            /*
-             * Neither the BIOS, ourselves or any other kernel
-             * expects the system to be in execlists mode on startup,
-             * so we need to reset the GPU back to legacy mode.
-             */
-            intel_gpu_reset(dev);
+	if (i915.enable_execlists) {
+		/*
+		 * Neither the BIOS, ourselves or any other kernel
+		 * expects the system to be in execlists mode on startup,
+		 * so we need to reset the GPU back to legacy mode.
+		 */
+		intel_gpu_reset(dev);
+	}
 }
 
 static void