diff mbox

[v6,1/3] drm/i915: Replaced Blitter ring based flips with MMIO flips

Message ID 1400582988-28602-2-git-send-email-sourab.gupta@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

sourab.gupta@intel.com May 20, 2014, 10:49 a.m. UTC
From: Sourab Gupta <sourab.gupta@intel.com>

Using MMIO based flips on Gen5+ for Media power well residency optimization.
The blitter ring is currently being used just for command streamer based
flip calls. For pure 3D workloads, with MMIO flips, there will be no use
of blitter ring and this will ensure the 100% residency for Media well.

v2: The MMIO flips now use the interrupt driven mechanism for issuing the
flips when target seqno is reached. (Incorporating Ville's idea)

v3: Rebasing on latest code. Code restructuring after incorporating
Damien's comments

v4: Addressing Ville's review comments
    -general cleanup
    -updating only base addr instead of calling update_primary_plane
    -extending patch for gen5+ platforms

v5: Addressed Ville's review comments
    -Making mmio flip vs cs flip selection based on module parameter
    -Adding check for DRIVER_MODESET feature in notify_ring before calling
     notify mmio flip.
    -Other changes mostly in function arguments

v6: -Having a seperate function to check condition for using mmio flips (Ville)
    -propogating error code from i915_gem_check_olr (Ville)

Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Akash Goel <akash.goel@intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c      |   1 +
 drivers/gpu/drm/i915/i915_drv.h      |   6 ++
 drivers/gpu/drm/i915/i915_gem.c      |   2 +-
 drivers/gpu/drm/i915/i915_irq.c      |   3 +
 drivers/gpu/drm/i915/i915_params.c   |   4 ++
 drivers/gpu/drm/i915/intel_display.c | 123 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_drv.h     |   6 ++
 7 files changed, 144 insertions(+), 1 deletion(-)

Comments

Chris Wilson May 20, 2014, 11:59 a.m. UTC | #1
On Tue, May 20, 2014 at 04:19:46PM +0530, sourab.gupta@intel.com wrote:
> +int i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno);

Be strict and add __must_check

> +static bool intel_use_mmio_flip(struct drm_device *dev)
> +{
> +	/* If module parameter is disabled, use CS flips.
> +	 * Otherwise, use MMIO flips starting from Gen5.
> +	 * This is not being used for older platforms because of
> +	 * non-availability of flip done interrupt.
> +	 */

What? Where is the dependence on flip-done?

> +	if (i915.use_mmio_flip == 0)
> +		return false;
> +
> +	if (INTEL_INFO(dev)->gen >= 5)
> +		return true;
> +	else
> +		return false;

You have not justified the change in default settings for existing hw.
Your argument is based on media power wells which does not support the
general change. It would seem that we may want to mix mmio / CS flips
depending on workload based on your vague statements.

I quite fancy a tristate here for force-CS flips, force-MMIO flips, at
driver discretion. Then enabling it on an architecture as a seperate
patch with justification - it is then easier to do each architecture on
a case-by-case basis and revert if need be.

> +static int intel_postpone_flip(struct drm_i915_gem_object *obj)
> +{
> +	int ret;

if (WARN_ON(crtc->mmio_flip_data.seqno)) return -EBUSY;

You need a tiling check here as you do not update dspcntr. Or fix
mmio_done.

> +	if (!obj->ring)
> +		return 0;
> +
> +	if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, true),
> +				obj->last_write_seqno))
> +		return 0;
> +
> +	if (ret = i915_gem_check_olr(obj->ring, obj->last_write_seqno))
> +		return ret;

Please don't anger gcc.

> +
> +	if (WARN_ON(!obj->ring->irq_get(obj->ring)))
> +		return 0;
> +
> +	return 1;
> +}

> @@ -11377,6 +11497,9 @@ static void intel_init_display(struct drm_device *dev)
>  		break;
>  	}
>  
> +	if(intel_use_mmio_flip(dev))

Please don't anger checkpatch.

> +		dev_priv->display.queue_flip = intel_queue_mmio_flip;
> +
>  	intel_panel_init_backlight_funcs(dev);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 32a74e1..08d65a4 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -351,6 +351,11 @@ struct intel_pipe_wm {
>  	bool sprites_scaled;
>  };
>  
> +struct intel_mmio_flip {
> +	u32 seqno;
> +	u32 ring_id;
> +};
> +
>  struct intel_crtc {
>  	struct drm_crtc base;
>  	enum pipe pipe;
> @@ -403,6 +408,7 @@ struct intel_crtc {
>  	} wm;
>  
>  	wait_queue_head_t vbl_wait;
> +	struct intel_mmio_flip mmio_flip_data;

Does _data add anything meaningful here to the description of mmio_flip?
Just mmio_flip will suffice, as pending_mmio_flip is overkill but would
make a useful comment.
-Chris
sourab.gupta@intel.com May 20, 2014, 6:01 p.m. UTC | #2
On Tue, 2014-05-20 at 11:59 +0000, Chris Wilson wrote:
> On Tue, May 20, 2014 at 04:19:46PM +0530, sourab.gupta@intel.com wrote:
> > +int i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno);
> 
> Be strict and add __must_check
> 
We'll add this.

> > +static bool intel_use_mmio_flip(struct drm_device *dev)
> > +{
> > +	/* If module parameter is disabled, use CS flips.
> > +	 * Otherwise, use MMIO flips starting from Gen5.
> > +	 * This is not being used for older platforms because of
> > +	 * non-availability of flip done interrupt.
> > +	 */
> 
> What? Where is the dependence on flip-done?

Hi Chris,
From an earlier mail by Ville, 
"It should work on gen5+ since all of those have a flip done interrupt.
For older platforms we use some clever tricks involving the flip_pending
status bits and vblank irqs. That code won't work for mmio flips. We'd
need to add another way to complete the flips based. That would involve
using the frame counter to make it accurate. To avoid races there we'd
definitely need to use the vblank evade mechanism to make sure we sample
the frame counter within the same frame as when we write the registers.
Also gen2 has the extra complication that it lacks a hardware frame
counter."
So, we had put the Gen5+ check here.

> 
> > +	if (i915.use_mmio_flip == 0)
> > +		return false;
> > +
> > +	if (INTEL_INFO(dev)->gen >= 5)
> > +		return true;
> > +	else
> > +		return false;
> 
> You have not justified the change in default settings for existing hw.
> Your argument is based on media power wells which does not support the
> general change. It would seem that we may want to mix mmio / CS flips
> depending on workload based on your vague statements.
> 

> I quite fancy a tristate here for force-CS flips, force-MMIO flips, at
> driver discretion. Then enabling it on an architecture as a seperate
> patch with justification - it is then easier to do each architecture on
> a case-by-case basis and revert if need be.
> 
We agree that the using mmio flips gives better residency in cases where
render and blitter engines reside in different power wells. This is
helpful in case of pure 3D workloads on valleyview. We have enabled it
in the second patch of series for valleyview.
This patch has put forth 2 states - 0 for force-CS flips and 1 for
force-MMIO flips. The second patch in this series enables it for
Valleyview architecture.

> > +static int intel_postpone_flip(struct drm_i915_gem_object *obj)
> > +{
> > +	int ret;
> 
> if (WARN_ON(crtc->mmio_flip_data.seqno)) return -EBUSY;
> 
> You need a tiling check here as you do not update dspcntr. Or fix
> mmio_done.
> 
We were not updating dspcntr here because atomicity concerns. We could
add tiling update also if its ok in that regard.
Ville, what's your opinion here.
> > +	if (!obj->ring)
> > +		return 0;
> > +
> > +	if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, true),
> > +				obj->last_write_seqno))
> > +		return 0;
> > +
> > +	if (ret = i915_gem_check_olr(obj->ring, obj->last_write_seqno))
> > +		return ret;
> 
> Please don't anger gcc.
> 
> > +
> > +	if (WARN_ON(!obj->ring->irq_get(obj->ring)))
> > +		return 0;
> > +
> > +	return 1;
> > +}
> 
> > @@ -11377,6 +11497,9 @@ static void intel_init_display(struct drm_device *dev)
> >  		break;
> >  	}
> >  
> > +	if(intel_use_mmio_flip(dev))
> 
> Please don't anger checkpatch.
> 
> > +		dev_priv->display.queue_flip = intel_queue_mmio_flip;
> > +
> >  	intel_panel_init_backlight_funcs(dev);
> >  }
> >  
> > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> > index 32a74e1..08d65a4 100644
> > --- a/drivers/gpu/drm/i915/intel_drv.h
> > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > @@ -351,6 +351,11 @@ struct intel_pipe_wm {
> >  	bool sprites_scaled;
> >  };
> >  
> > +struct intel_mmio_flip {
> > +	u32 seqno;
> > +	u32 ring_id;
> > +};
> > +
> >  struct intel_crtc {
> >  	struct drm_crtc base;
> >  	enum pipe pipe;
> > @@ -403,6 +408,7 @@ struct intel_crtc {
> >  	} wm;
> >  
> >  	wait_queue_head_t vbl_wait;
> > +	struct intel_mmio_flip mmio_flip_data;
> 
> Does _data add anything meaningful here to the description of mmio_flip?
> Just mmio_flip will suffice, as pending_mmio_flip is overkill but would
> make a useful comment.
> -Chris
sourab.gupta@intel.com May 22, 2014, 2:36 p.m. UTC | #3
From: Sourab Gupta <sourab.gupta@intel.com>

This patch series replaces Blitter ring based flips with MMIO based flips.
This is useful for Media power well residency optimization. These may be
enabled on architectures where Render and Blitter engines reside in different
power wells.
The blitter ring is currently being used just for command streamer based
flip calls. For pure 3D workloads, with MMIO flips, there will be no use
of blitter ring and this will ensure the 100% residency for Media well.

Sourab Gupta (3):
  drm/i915: Replaced Blitter ring based flips with MMIO flips
  drm/i915: Default to mmio flips on VLV
  drm/i915: Fix mmio page flip vs mmio set base race

 drivers/gpu/drm/i915/i915_dma.c      |   1 +
 drivers/gpu/drm/i915/i915_drv.h      |   6 ++
 drivers/gpu/drm/i915/i915_gem.c      |   2 +-
 drivers/gpu/drm/i915/i915_irq.c      |   3 +
 drivers/gpu/drm/i915/i915_params.c   |   5 ++
 drivers/gpu/drm/i915/intel_display.c | 135 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_drv.h     |   6 ++
 7 files changed, 157 insertions(+), 1 deletion(-)
sourab.gupta@intel.com May 26, 2014, 8:51 a.m. UTC | #4
On Thu, 2014-05-22 at 14:36 +0000, Gupta, Sourab wrote:
> From: Sourab Gupta <sourab.gupta@intel.com>
> 
> This patch series replaces Blitter ring based flips with MMIO based flips.
> This is useful for Media power well residency optimization. These may be
> enabled on architectures where Render and Blitter engines reside in different
> power wells.
> The blitter ring is currently being used just for command streamer based
> flip calls. For pure 3D workloads, with MMIO flips, there will be no use
> of blitter ring and this will ensure the 100% residency for Media well.
> 
> Sourab Gupta (3):
>   drm/i915: Replaced Blitter ring based flips with MMIO flips
>   drm/i915: Default to mmio flips on VLV
>   drm/i915: Fix mmio page flip vs mmio set base race
> 
>  drivers/gpu/drm/i915/i915_dma.c      |   1 +
>  drivers/gpu/drm/i915/i915_drv.h      |   6 ++
>  drivers/gpu/drm/i915/i915_gem.c      |   2 +-
>  drivers/gpu/drm/i915/i915_irq.c      |   3 +
>  drivers/gpu/drm/i915/i915_params.c   |   5 ++
>  drivers/gpu/drm/i915/intel_display.c | 135 +++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_drv.h     |   6 ++
>  7 files changed, 157 insertions(+), 1 deletion(-)
> 
Hi Ville/Chris,
I have tried to address your comments in these series of patches. Can
you please review them.
Thanks,
Sourab
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 46f1dec..672c28f 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1570,6 +1570,7 @@  int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	spin_lock_init(&dev_priv->backlight_lock);
 	spin_lock_init(&dev_priv->uncore.lock);
 	spin_lock_init(&dev_priv->mm.object_stat_lock);
+	spin_lock_init(&dev_priv->mmio_flip_lock);
 	dev_priv->ring_index = 0;
 	mutex_init(&dev_priv->dpio_lock);
 	mutex_init(&dev_priv->modeset_restore_lock);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4006dfe..9f1d042 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1543,6 +1543,8 @@  struct drm_i915_private {
 	struct i915_ums_state ums;
 	/* the indicator for dispatch video commands on two BSD rings */
 	int ring_index;
+	/* protects the mmio flip data */
+	spinlock_t mmio_flip_lock;
 };
 
 static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
@@ -2019,6 +2021,7 @@  struct i915_params {
 	bool reset;
 	bool disable_display;
 	bool disable_vtd_wa;
+	bool use_mmio_flip;
 };
 extern struct i915_params i915 __read_mostly;
 
@@ -2209,6 +2212,7 @@  bool i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring);
 int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
 				      bool interruptible);
+int i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno);
 static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
 {
 	return unlikely(atomic_read(&error->reset_counter)
@@ -2580,6 +2584,8 @@  int i915_reg_read_ioctl(struct drm_device *dev, void *data,
 int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
 			       struct drm_file *file);
 
+void intel_notify_mmio_flip(struct intel_ring_buffer *ring);
+
 /* overlay */
 extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
 extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fa5b5ab..5b4e953 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -975,7 +975,7 @@  i915_gem_check_wedge(struct i915_gpu_error *error,
  * Compare seqno against outstanding lazy request. Emit a request if they are
  * equal.
  */
-static int
+int
 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
 {
 	int ret;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b10fbde..31e98e2 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1084,6 +1084,9 @@  static void notify_ring(struct drm_device *dev,
 
 	trace_i915_gem_request_complete(ring);
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		intel_notify_mmio_flip(ring);
+
 	wake_up_all(&ring->irq_queue);
 	i915_queue_hangcheck(dev);
 }
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index d05a2af..e0d44df 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -48,6 +48,7 @@  struct i915_params i915 __read_mostly = {
 	.disable_display = 0,
 	.enable_cmd_parser = 1,
 	.disable_vtd_wa = 0,
+	.use_mmio_flip = 0,
 };
 
 module_param_named(modeset, i915.modeset, int, 0400);
@@ -156,3 +157,6 @@  MODULE_PARM_DESC(disable_vtd_wa, "Disable all VT-d workarounds (default: false)"
 module_param_named(enable_cmd_parser, i915.enable_cmd_parser, int, 0600);
 MODULE_PARM_DESC(enable_cmd_parser,
 		 "Enable command parsing (1=enabled [default], 0=disabled)");
+
+module_param_named(use_mmio_flip, i915.use_mmio_flip, bool, 0600);
+MODULE_PARM_DESC(use_mmio_flip, "use MMIO flips (default: false)");
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 0f8f9bc..d8bc30b 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9037,6 +9037,126 @@  err:
 	return ret;
 }
 
+static bool intel_use_mmio_flip(struct drm_device *dev)
+{
+	/* If module parameter is disabled, use CS flips.
+	 * Otherwise, use MMIO flips starting from Gen5.
+	 * This is not being used for older platforms because of
+	 * non-availability of flip done interrupt.
+	 */
+	if (i915.use_mmio_flip == 0)
+		return false;
+
+	if (INTEL_INFO(dev)->gen >= 5)
+		return true;
+	else
+		return false;
+}
+
+static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
+{
+	struct drm_i915_private *dev_priv =
+		intel_crtc->base.dev->dev_private;
+	struct intel_framebuffer *intel_fb =
+		to_intel_framebuffer(intel_crtc->base.primary->fb);
+	struct drm_i915_gem_object *obj = intel_fb->obj;
+
+	intel_mark_page_flip_active(intel_crtc);
+
+	I915_WRITE(DSPSURF(intel_crtc->plane), i915_gem_obj_ggtt_offset(obj) +
+			intel_crtc->dspaddr_offset);
+	POSTING_READ(DSPSURF(intel_crtc->plane));
+}
+
+static int intel_postpone_flip(struct drm_i915_gem_object *obj)
+{
+	int ret;
+
+	if (!obj->ring)
+		return 0;
+
+	if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, true),
+				obj->last_write_seqno))
+		return 0;
+
+	if (ret = i915_gem_check_olr(obj->ring, obj->last_write_seqno))
+		return ret;
+
+	if (WARN_ON(!obj->ring->irq_get(obj->ring)))
+		return 0;
+
+	return 1;
+}
+
+void intel_notify_mmio_flip(struct intel_ring_buffer *ring)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct intel_crtc *intel_crtc;
+	unsigned long irq_flags;
+	u32 seqno;
+
+	seqno = ring->get_seqno(ring, false);
+
+	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
+	for_each_intel_crtc(ring->dev, intel_crtc) {
+		struct intel_mmio_flip *mmio_flip_data;
+
+		mmio_flip_data = &intel_crtc->mmio_flip_data;
+
+		if (mmio_flip_data->seqno == 0)
+			continue;
+		if (ring->id != mmio_flip_data->ring_id)
+			continue;
+
+		if (i915_seqno_passed(seqno, mmio_flip_data->seqno)) {
+			intel_do_mmio_flip(intel_crtc);
+			mmio_flip_data->seqno = 0;
+			ring->irq_put(ring);
+		}
+	}
+	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
+}
+
+static int intel_queue_mmio_flip(struct drm_device *dev,
+			struct drm_crtc *crtc,
+			struct drm_framebuffer *fb,
+			struct drm_i915_gem_object *obj,
+			uint32_t flags)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	unsigned long irq_flags;
+	int ret;
+
+	ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring);
+	if (ret)
+		goto err;
+
+	ret = intel_postpone_flip(obj);
+	if (ret < 0) {
+		goto err_unpin;
+	} else if (ret == 0) {
+		intel_do_mmio_flip(intel_crtc);
+		return 0;
+	}
+
+	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
+	intel_crtc->mmio_flip_data.seqno = obj->last_write_seqno;
+	intel_crtc->mmio_flip_data.ring_id = obj->ring->id;
+	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
+
+	/* Double check to catch cases where irq fired before
+	 * mmio flip data was ready
+	 */
+	intel_notify_mmio_flip(obj->ring);
+	return 0;
+
+err_unpin:
+	intel_unpin_fb_obj(obj);
+err:
+	return ret;
+}
+
 static int intel_gen7_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
@@ -11377,6 +11497,9 @@  static void intel_init_display(struct drm_device *dev)
 		break;
 	}
 
+	if(intel_use_mmio_flip(dev))
+		dev_priv->display.queue_flip = intel_queue_mmio_flip;
+
 	intel_panel_init_backlight_funcs(dev);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 32a74e1..08d65a4 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -351,6 +351,11 @@  struct intel_pipe_wm {
 	bool sprites_scaled;
 };
 
+struct intel_mmio_flip {
+	u32 seqno;
+	u32 ring_id;
+};
+
 struct intel_crtc {
 	struct drm_crtc base;
 	enum pipe pipe;
@@ -403,6 +408,7 @@  struct intel_crtc {
 	} wm;
 
 	wait_queue_head_t vbl_wait;
+	struct intel_mmio_flip mmio_flip_data;
 };
 
 struct intel_plane_wm_parameters {