diff mbox

[v2] drm/i915: Replaced Blitter ring based flips with MMIO flips for VLV

Message ID 1394701297-7261-1-git-send-email-sourab.gupta@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

sourab.gupta@intel.com March 13, 2014, 9:01 a.m. UTC
From: Sourab Gupta <sourab.gupta@intel.com>

Using MMIO based flips on VLV for Media power well residency optimization.
The blitter ring is currently being used just for command streamer based
flip calls. For pure 3D workloads, with MMIO flips, there will be no use
of blitter ring and this will ensure the 100% residency for Media well.

v2: The MMIO flips now use the interrupt driven mechanism for issuing the
flips when target seqno is reached. (Incorporating Ville's idea)

Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>

Cc: Daniel Vetter <daniel@ffwl.ch>

Cc: Chris Wilson <chris@chris-wilson.co.uk>

Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>

Signed-off-by: Akash Goel <akash.goel@intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c      |   1 +
 drivers/gpu/drm/i915/i915_drv.h      |  13 ++++
 drivers/gpu/drm/i915/i915_irq.c      |   2 +
 drivers/gpu/drm/i915/intel_display.c | 125 +++++++++++++++++++++++++++++++++++
 4 files changed, 141 insertions(+)

Comments

Lespiau, Damien March 21, 2014, 6:15 p.m. UTC | #1
On Thu, Mar 13, 2014 at 02:31:37PM +0530, sourab.gupta@intel.com wrote:
> From: Sourab Gupta <sourab.gupta@intel.com>
> 
> Using MMIO based flips on VLV for Media power well residency optimization.
> The blitter ring is currently being used just for command streamer based
> flip calls. For pure 3D workloads, with MMIO flips, there will be no use
> of blitter ring and this will ensure the 100% residency for Media well.
> 
> v2: The MMIO flips now use the interrupt driven mechanism for issuing the
> flips when target seqno is reached. (Incorporating Ville's idea)
> 
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> 
> Cc: Daniel Vetter <daniel@ffwl.ch>
> 
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
> 
> Signed-off-by: Akash Goel <akash.goel@intel.com>

A light pass that does't actually look much at the correctness.
Submitting patches with obvious trivial issues creates reviewing
overhead. scripts/checkpatch.pl can catch a lot of small issues.

Also note that someone from your team can also review patches, it's an
interesting exercise and would help moving the work forward instead of
waiting.

> ---
>  drivers/gpu/drm/i915/i915_dma.c      |   1 +
>  drivers/gpu/drm/i915/i915_drv.h      |  13 ++++
>  drivers/gpu/drm/i915/i915_irq.c      |   2 +
>  drivers/gpu/drm/i915/intel_display.c | 125 +++++++++++++++++++++++++++++++++++
>  4 files changed, 141 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index e4d2b9f..d6ae334 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1566,6 +1566,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
>  	spin_lock_init(&dev_priv->backlight_lock);
>  	spin_lock_init(&dev_priv->uncore.lock);
>  	spin_lock_init(&dev_priv->mm.object_stat_lock);
> +	spin_lock_init(&dev_priv->flip_lock);

Let's try to be more descriptive, mmio_flip_lock. One has to understand
what a variable do with its name. This is not a generic "flip lock" at
this point.

>  	mutex_init(&dev_priv->dpio_lock);
>  	mutex_init(&dev_priv->modeset_restore_lock);
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a0d90ef..af35197 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1436,6 +1436,12 @@ struct intel_pipe_crc {
>  	wait_queue_head_t wq;
>  };
>  
> +struct i915_flip_data {
> +	struct drm_crtc *crtc;
> +	u32 seqno;
> +	u32 ring_id;
> +};
> +
>  typedef struct drm_i915_private {
>  	struct drm_device *dev;
>  	struct kmem_cache *slab;
> @@ -1643,6 +1649,11 @@ typedef struct drm_i915_private {
>  	struct i915_ums_state ums;
>  
>  	u32 suspend_count;
> +
> +	/* protects the flip_data */
> +	spinlock_t flip_lock;
> +
> +	struct i915_flip_data	flip_data[I915_MAX_PIPES];

If we need one of those per-pipe, why not put that structure on the
CRTC? Writing this is a good hint this data belongs to the CRTC object.

We try to reserve the i915 prefix for GT stuff. It's also related to
mmio flip, so how about calling it intel_mmio_flip?

>  } drm_i915_private_t;
>  
>  static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
> @@ -2681,6 +2692,8 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data,
>  int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
>  			       struct drm_file *file);
>  
> +void intel_notify_mmio_flip(struct drm_device *dev,
> +			struct intel_ring_buffer *ring);
>  /* overlay */
>  extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
>  extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index be2713f..9b2007e 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1062,6 +1062,8 @@ static void ironlake_rps_change_irq_handler(struct drm_device *dev)
>  static void notify_ring(struct drm_device *dev,
>  			struct intel_ring_buffer *ring)
>  {
> +	intel_notify_mmio_flip(dev, ring);
> +
>  	if (ring->obj == NULL)
>  		return;
  
It looks like the wrong place to put it. It should be after that check,
and also after the trace point so one tracing the events doesn't see the
mmio flip complete before the request_complete event.

> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 2bccc68..8bd2f57 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -8813,6 +8813,122 @@ err:
>  	return ret;
>  }
>  
> +static void intel_do_mmio_flip(struct drm_crtc *crtc)
> +{
> +	struct intel_crtc *intel_crtc;
> +
> +	intel_crtc = to_intel_crtc(crtc) ;
> +
> +	intel_mark_page_flip_active(intel_crtc);
> +	i9xx_update_plane(crtc, crtc->fb, 0, 0);

This function has changed name. Could be please rebase your patch
against -nightly?

Also let's try to be more generic here by calling the
->update_primary_plane() vfunc.

> +}
> +
> +static bool intel_postpone_flip(struct drm_i915_gem_object *obj)
> +{
> +	int ret;

Space between variable declaration and code. You have various whitespace
issues you need to fix (use ./script/checkpatch.pl to get the list).

> +	if(!obj->ring)
> +		return false;
> +
> +	if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, false),
> +			      obj->last_write_seqno))
> +		return false;
> +
> +	if (obj->last_write_seqno == obj->ring->outstanding_lazy_seqno) {
> +		ret = i915_add_request(obj->ring, NULL);
> +		if(WARN_ON(ret))
> +			return false;
> +	}
> +
> +	if(WARN_ON(!obj->ring->irq_get(obj->ring)))
> +		return false;
> +
> +	return true;
> +}
> +
> +void intel_notify_mmio_flip(struct drm_device *dev,
> +			struct intel_ring_buffer *ring)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct intel_crtc *intel_crtc;
> +	struct i915_flip_data *flip_data;
> +	unsigned long irq_flags;
> +	u32 seqno, count;
> +
> +	BUG_ON(!ring);
> +
> +	seqno = ring->get_seqno(ring, false);
> +
> +	spin_lock_irqsave(&dev_priv->flip_lock, irq_flags);
> +
> +	for(count=0;count<I915_MAX_PIPES;count++) {

for_each_pipe(). I915_MAX_PIPES is the maximum number of pipes across
all platforms, not what you want to loop through the existing pipes of
the device.

> +		flip_data =  &(dev_priv->flip_data[count]);
> +		intel_crtc = to_intel_crtc(flip_data->crtc);
> +		if ((flip_data->seqno != 0) &&
> +				(ring->id == flip_data->ring_id) &&
> +				( seqno >= flip_data->seqno ) ) {
> +			/*FIXME: Can move do_mmio_flip out of spinlock protection */

There's a FIXME here, what is its status?

> +			intel_do_mmio_flip(flip_data->crtc);
> +			flip_data->seqno = 0;
> +			ring->irq_put(ring);
> +		}
> +	}
> +	spin_unlock_irqrestore(&dev_priv->flip_lock, irq_flags);
> +}
> +
> +/* Using MMIO based flips starting from VLV, for Media power well
> + * residency optimization. The other alternative of having Render
> + * ring based flip calls is not being used, as the performance
> + * (FPS) of certain 3D Apps was getting severly affected.
> + */
> +static int intel_gen7_queue_mmio_flip(struct drm_device *dev,
> +			struct drm_crtc *crtc,
> +			struct drm_framebuffer *fb,
> +			struct drm_i915_gem_object *obj,
> +			uint32_t flags)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +	struct i915_flip_data *flip_data = &(dev_priv->flip_data[intel_crtc->pipe]);

No need of the pair of () here (and elsewhere in the patch).

> +	unsigned long irq_flags;
> +	int ret;
> +
> +	ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring);
> +	if (ret)
> +		goto err;
> +
> +	switch (intel_crtc->plane) {
> +	case PLANE_A:
> +	case PLANE_B:
> +	case PLANE_C:
> +	break;
> +	default:
> +		WARN_ONCE(1, "unknown plane in flip command\n");
> +		ret = -ENODEV;
> +		goto err_unpin;
> +	}
> +
> +	if(!intel_postpone_flip(obj)) {
> +		intel_do_mmio_flip(crtc);
> +		return 0;
> +	}
> +
> +	spin_lock_irqsave(&dev_priv->flip_lock, irq_flags);
> +	flip_data->seqno = obj->last_write_seqno;
> +	flip_data->ring_id = obj->ring->id;
> +	spin_unlock_irqrestore(&dev_priv->flip_lock, irq_flags);
> +
> +	/* Double check to catch cases where irq fired before
> +	 * flip data was ready
> +	 */
> +	intel_notify_mmio_flip(dev, obj->ring);
> +	return 0;
> +
> +err_unpin:
> +	intel_unpin_fb_obj(obj);
> +err:
> +	return ret;
> +}
> +
>  static int intel_gen7_queue_flip(struct drm_device *dev,
>  				 struct drm_crtc *crtc,
>  				 struct drm_framebuffer *fb,
> @@ -10581,6 +10697,12 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
>  	dev_priv->pipe_to_crtc_mapping[intel_crtc->pipe] = &intel_crtc->base;
>  
>  	drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs);
> +
> +	if (IS_VALLEYVIEW(dev)) {
> +			dev_priv->flip_data[pipe].crtc =
> +				dev_priv->pipe_to_crtc_mapping[pipe];
> +			dev_priv->flip_data[pipe].seqno = 0;
> +	}
>  }
>  
>  enum pipe intel_get_pipe_from_connector(struct intel_connector *connector)
> @@ -11103,6 +11225,9 @@ static void intel_init_display(struct drm_device *dev)
>  		dev_priv->display.queue_flip = intel_gen7_queue_flip;
>  		break;
>  	}
> +	if (IS_VALLEYVIEW(dev)) {
> +		dev_priv->display.queue_flip = intel_gen7_queue_mmio_flip;
> +	}
>  
>  	intel_panel_init_backlight_funcs(dev);
>  }
> -- 
> 1.8.5.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index e4d2b9f..d6ae334 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1566,6 +1566,7 @@  int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	spin_lock_init(&dev_priv->backlight_lock);
 	spin_lock_init(&dev_priv->uncore.lock);
 	spin_lock_init(&dev_priv->mm.object_stat_lock);
+	spin_lock_init(&dev_priv->flip_lock);
 	mutex_init(&dev_priv->dpio_lock);
 	mutex_init(&dev_priv->modeset_restore_lock);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a0d90ef..af35197 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1436,6 +1436,12 @@  struct intel_pipe_crc {
 	wait_queue_head_t wq;
 };
 
+struct i915_flip_data {
+	struct drm_crtc *crtc;
+	u32 seqno;
+	u32 ring_id;
+};
+
 typedef struct drm_i915_private {
 	struct drm_device *dev;
 	struct kmem_cache *slab;
@@ -1643,6 +1649,11 @@  typedef struct drm_i915_private {
 	struct i915_ums_state ums;
 
 	u32 suspend_count;
+
+	/* protects the flip_data */
+	spinlock_t flip_lock;
+
+	struct i915_flip_data	flip_data[I915_MAX_PIPES];
 } drm_i915_private_t;
 
 static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
@@ -2681,6 +2692,8 @@  int i915_reg_read_ioctl(struct drm_device *dev, void *data,
 int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
 			       struct drm_file *file);
 
+void intel_notify_mmio_flip(struct drm_device *dev,
+			struct intel_ring_buffer *ring);
 /* overlay */
 extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
 extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index be2713f..9b2007e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1062,6 +1062,8 @@  static void ironlake_rps_change_irq_handler(struct drm_device *dev)
 static void notify_ring(struct drm_device *dev,
 			struct intel_ring_buffer *ring)
 {
+	intel_notify_mmio_flip(dev, ring);
+
 	if (ring->obj == NULL)
 		return;
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 2bccc68..8bd2f57 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -8813,6 +8813,122 @@  err:
 	return ret;
 }
 
+static void intel_do_mmio_flip(struct drm_crtc *crtc)
+{
+	struct intel_crtc *intel_crtc;
+
+	intel_crtc = to_intel_crtc(crtc) ;
+
+	intel_mark_page_flip_active(intel_crtc);
+	i9xx_update_plane(crtc, crtc->fb, 0, 0);
+}
+
+static bool intel_postpone_flip(struct drm_i915_gem_object *obj)
+{
+	int ret;
+	if(!obj->ring)
+		return false;
+
+	if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, false),
+			      obj->last_write_seqno))
+		return false;
+
+	if (obj->last_write_seqno == obj->ring->outstanding_lazy_seqno) {
+		ret = i915_add_request(obj->ring, NULL);
+		if(WARN_ON(ret))
+			return false;
+	}
+
+	if(WARN_ON(!obj->ring->irq_get(obj->ring)))
+		return false;
+
+	return true;
+}
+
+void intel_notify_mmio_flip(struct drm_device *dev,
+			struct intel_ring_buffer *ring)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc;
+	struct i915_flip_data *flip_data;
+	unsigned long irq_flags;
+	u32 seqno, count;
+
+	BUG_ON(!ring);
+
+	seqno = ring->get_seqno(ring, false);
+
+	spin_lock_irqsave(&dev_priv->flip_lock, irq_flags);
+
+	for(count=0;count<I915_MAX_PIPES;count++) {
+		flip_data =  &(dev_priv->flip_data[count]);
+		intel_crtc = to_intel_crtc(flip_data->crtc);
+		if ((flip_data->seqno != 0) &&
+				(ring->id == flip_data->ring_id) &&
+				( seqno >= flip_data->seqno ) ) {
+			/*FIXME: Can move do_mmio_flip out of spinlock protection */
+			intel_do_mmio_flip(flip_data->crtc);
+			flip_data->seqno = 0;
+			ring->irq_put(ring);
+		}
+	}
+	spin_unlock_irqrestore(&dev_priv->flip_lock, irq_flags);
+}
+
+/* Using MMIO based flips starting from VLV, for Media power well
+ * residency optimization. The other alternative of having Render
+ * ring based flip calls is not being used, as the performance
+ * (FPS) of certain 3D Apps was getting severly affected.
+ */
+static int intel_gen7_queue_mmio_flip(struct drm_device *dev,
+			struct drm_crtc *crtc,
+			struct drm_framebuffer *fb,
+			struct drm_i915_gem_object *obj,
+			uint32_t flags)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct i915_flip_data *flip_data = &(dev_priv->flip_data[intel_crtc->pipe]);
+	unsigned long irq_flags;
+	int ret;
+
+	ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring);
+	if (ret)
+		goto err;
+
+	switch (intel_crtc->plane) {
+	case PLANE_A:
+	case PLANE_B:
+	case PLANE_C:
+	break;
+	default:
+		WARN_ONCE(1, "unknown plane in flip command\n");
+		ret = -ENODEV;
+		goto err_unpin;
+	}
+
+	if(!intel_postpone_flip(obj)) {
+		intel_do_mmio_flip(crtc);
+		return 0;
+	}
+
+	spin_lock_irqsave(&dev_priv->flip_lock, irq_flags);
+	flip_data->seqno = obj->last_write_seqno;
+	flip_data->ring_id = obj->ring->id;
+	spin_unlock_irqrestore(&dev_priv->flip_lock, irq_flags);
+
+	/* Double check to catch cases where irq fired before
+	 * flip data was ready
+	 */
+	intel_notify_mmio_flip(dev, obj->ring);
+	return 0;
+
+err_unpin:
+	intel_unpin_fb_obj(obj);
+err:
+	return ret;
+}
+
 static int intel_gen7_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
@@ -10581,6 +10697,12 @@  static void intel_crtc_init(struct drm_device *dev, int pipe)
 	dev_priv->pipe_to_crtc_mapping[intel_crtc->pipe] = &intel_crtc->base;
 
 	drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs);
+
+	if (IS_VALLEYVIEW(dev)) {
+			dev_priv->flip_data[pipe].crtc =
+				dev_priv->pipe_to_crtc_mapping[pipe];
+			dev_priv->flip_data[pipe].seqno = 0;
+	}
 }
 
 enum pipe intel_get_pipe_from_connector(struct intel_connector *connector)
@@ -11103,6 +11225,9 @@  static void intel_init_display(struct drm_device *dev)
 		dev_priv->display.queue_flip = intel_gen7_queue_flip;
 		break;
 	}
+	if (IS_VALLEYVIEW(dev)) {
+		dev_priv->display.queue_flip = intel_gen7_queue_mmio_flip;
+	}
 
 	intel_panel_init_backlight_funcs(dev);
 }