diff mbox

[v7,1/3] drm/i915: Replaced Blitter ring based flips with MMIO flips

Message ID 1400769393-15403-2-git-send-email-sourab.gupta@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

sourab.gupta@intel.com May 22, 2014, 2:36 p.m. UTC
From: Sourab Gupta <sourab.gupta@intel.com>

Using MMIO based flips on Gen5+. The MMIO flips are useful for the Media power
well residency optimization. These maybe enabled on architectures where
Render and Blitter engines reside in different power wells.
The blitter ring is currently being used just for command streamer based
flip calls. For pure 3D workloads in such cases, with MMIO flips, there will
be no use of blitter ring and this will ensure the 100% residency for Media well.

v2: The MMIO flips now use the interrupt driven mechanism for issuing the
flips when target seqno is reached. (Incorporating Ville's idea)

v3: Rebasing on latest code. Code restructuring after incorporating
Damien's comments

v4: Addressing Ville's review comments
    -general cleanup
    -updating only base addr instead of calling update_primary_plane
    -extending patch for gen5+ platforms

v5: Addressed Ville's review comments
    -Making mmio flip vs cs flip selection based on module parameter
    -Adding check for DRIVER_MODESET feature in notify_ring before calling
     notify mmio flip.
    -Other changes mostly in function arguments

v6: -Having a seperate function to check condition for using mmio flips (Ville)
    -propogating error code from i915_gem_check_olr (Ville)

v7: -Adding __must_check with i915_gem_check_olr (Chris)
    -Renaming mmio_flip_data to mmio_flip (Chris)
    -Rebasing on latest nightly

Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Akash Goel <akash.goel@intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c      |   1 +
 drivers/gpu/drm/i915/i915_drv.h      |   7 ++
 drivers/gpu/drm/i915/i915_gem.c      |   2 +-
 drivers/gpu/drm/i915/i915_irq.c      |   3 +
 drivers/gpu/drm/i915/i915_params.c   |   4 ++
 drivers/gpu/drm/i915/intel_display.c | 133 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_drv.h     |   6 ++
 7 files changed, 155 insertions(+), 1 deletion(-)

Comments

Ville Syrjala May 27, 2014, 12:52 p.m. UTC | #1
On Thu, May 22, 2014 at 08:06:31PM +0530, sourab.gupta@intel.com wrote:
> From: Sourab Gupta <sourab.gupta@intel.com>
> 
> Using MMIO based flips on Gen5+. The MMIO flips are useful for the Media power
> well residency optimization. These maybe enabled on architectures where
> Render and Blitter engines reside in different power wells.
> The blitter ring is currently being used just for command streamer based
> flip calls. For pure 3D workloads in such cases, with MMIO flips, there will
> be no use of blitter ring and this will ensure the 100% residency for Media well.
> 
> v2: The MMIO flips now use the interrupt driven mechanism for issuing the
> flips when target seqno is reached. (Incorporating Ville's idea)
> 
> v3: Rebasing on latest code. Code restructuring after incorporating
> Damien's comments
> 
> v4: Addressing Ville's review comments
>     -general cleanup
>     -updating only base addr instead of calling update_primary_plane
>     -extending patch for gen5+ platforms
> 
> v5: Addressed Ville's review comments
>     -Making mmio flip vs cs flip selection based on module parameter
>     -Adding check for DRIVER_MODESET feature in notify_ring before calling
>      notify mmio flip.
>     -Other changes mostly in function arguments
> 
> v6: -Having a seperate function to check condition for using mmio flips (Ville)
>     -propogating error code from i915_gem_check_olr (Ville)
> 
> v7: -Adding __must_check with i915_gem_check_olr (Chris)
>     -Renaming mmio_flip_data to mmio_flip (Chris)
>     -Rebasing on latest nightly
> 
> Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
> Signed-off-by: Akash Goel <akash.goel@intel.com>

Seems the mmio vs cs flip race patches landed finally, so this needs a
rebase to kill the pin stuff from intel_queue_mmio_flip(), and that
also means you can squash patch 3/3 with this one. Also needs a
s/ring_buffer/engine_cs/ since that stuff also got changed.

Oh and Chris was right about the tiling thing. I totally forgot about
that when I said you shouldn't use .update_primary_plane(). I think
the easiest solution would be to stick the new tiling mode into the
mmio flip data, and just update DSPCNTR with a RMW in the flip notify
function. IIIRC we already have test for tiling change during page
flip in igt.

> ---
>  drivers/gpu/drm/i915/i915_dma.c      |   1 +
>  drivers/gpu/drm/i915/i915_drv.h      |   7 ++
>  drivers/gpu/drm/i915/i915_gem.c      |   2 +-
>  drivers/gpu/drm/i915/i915_irq.c      |   3 +
>  drivers/gpu/drm/i915/i915_params.c   |   4 ++
>  drivers/gpu/drm/i915/intel_display.c | 133 +++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_drv.h     |   6 ++
>  7 files changed, 155 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 20df7c72..266c9a6 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1571,6 +1571,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
>  	spin_lock_init(&dev_priv->backlight_lock);
>  	spin_lock_init(&dev_priv->uncore.lock);
>  	spin_lock_init(&dev_priv->mm.object_stat_lock);
> +	spin_lock_init(&dev_priv->mmio_flip_lock);
>  	mutex_init(&dev_priv->dpio_lock);
>  	mutex_init(&dev_priv->modeset_restore_lock);
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 13495a4..ced6e58 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1366,6 +1366,9 @@ struct drm_i915_private {
>  	/* protects the irq masks */
>  	spinlock_t irq_lock;
>  
> +	/* protects the mmio flip data */
> +	spinlock_t mmio_flip_lock;
> +
>  	bool display_irqs_enabled;
>  
>  	/* To control wakeup latency, e.g. for irq-driven dp aux transfers. */
> @@ -2036,6 +2039,7 @@ struct i915_params {
>  	bool reset;
>  	bool disable_display;
>  	bool disable_vtd_wa;
> +	bool use_mmio_flip;
>  };
>  extern struct i915_params i915 __read_mostly;
>  
> @@ -2230,6 +2234,7 @@ bool i915_gem_retire_requests(struct drm_device *dev);
>  void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring);
>  int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
>  				      bool interruptible);
> +int __must_check i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno);
>  static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
>  {
>  	return unlikely(atomic_read(&error->reset_counter)
> @@ -2598,6 +2603,8 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data,
>  int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
>  			       struct drm_file *file);
>  
> +void intel_notify_mmio_flip(struct intel_ring_buffer *ring);
> +
>  /* overlay */
>  extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
>  extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index f2713b9..bc6fe4e 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1096,7 +1096,7 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
>   * Compare seqno against outstanding lazy request. Emit a request if they are
>   * equal.
>   */
> -static int
> +__must_check int
>  i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
>  {
>  	int ret;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 2043276..f244b23 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1155,6 +1155,9 @@ static void notify_ring(struct drm_device *dev,
>  
>  	trace_i915_gem_request_complete(ring);
>  
> +	if (drm_core_check_feature(dev, DRIVER_MODESET))
> +		intel_notify_mmio_flip(ring);
> +
>  	wake_up_all(&ring->irq_queue);
>  	i915_queue_hangcheck(dev);
>  }
> diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
> index d05a2af..e0d44df 100644
> --- a/drivers/gpu/drm/i915/i915_params.c
> +++ b/drivers/gpu/drm/i915/i915_params.c
> @@ -48,6 +48,7 @@ struct i915_params i915 __read_mostly = {
>  	.disable_display = 0,
>  	.enable_cmd_parser = 1,
>  	.disable_vtd_wa = 0,
> +	.use_mmio_flip = 0,
>  };
>  
>  module_param_named(modeset, i915.modeset, int, 0400);
> @@ -156,3 +157,6 @@ MODULE_PARM_DESC(disable_vtd_wa, "Disable all VT-d workarounds (default: false)"
>  module_param_named(enable_cmd_parser, i915.enable_cmd_parser, int, 0600);
>  MODULE_PARM_DESC(enable_cmd_parser,
>  		 "Enable command parsing (1=enabled [default], 0=disabled)");
> +
> +module_param_named(use_mmio_flip, i915.use_mmio_flip, bool, 0600);
> +MODULE_PARM_DESC(use_mmio_flip, "use MMIO flips (default: false)");
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 19b92c1..a29552d 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -9179,6 +9179,136 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
>  	return 0;
>  }
>  
> +static bool intel_use_mmio_flip(struct drm_device *dev)
> +{
> +	/* If module parameter is disabled, use CS flips.
> +	 * Otherwise, use MMIO flips starting from Gen5.
> +	 * This is not being used for older platforms, because
> +	 * non-availability of flip done interrupt forces us to use
> +	 * CS flips. Older platforms derive flip done using some clever
> +	 * tricks involving the flip_pending status bits and vblank irqs.
> +	 * So using MMIO flips there would disrupt this mechanism.
> +	 */
> +
> +	if (i915.use_mmio_flip == 0)
> +		return false;
> +
> +	if (INTEL_INFO(dev)->gen >= 5)
> +		return true;
> +	else
> +		return false;
> +}
> +
> +static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
> +{
> +	struct drm_i915_private *dev_priv =
> +		intel_crtc->base.dev->dev_private;
> +	struct intel_framebuffer *intel_fb =
> +		to_intel_framebuffer(intel_crtc->base.primary->fb);
> +	struct drm_i915_gem_object *obj = intel_fb->obj;
> +
> +	intel_mark_page_flip_active(intel_crtc);
> +
> +	I915_WRITE(DSPSURF(intel_crtc->plane), i915_gem_obj_ggtt_offset(obj) +
> +			intel_crtc->dspaddr_offset);
> +	POSTING_READ(DSPSURF(intel_crtc->plane));
> +}
> +
> +static int intel_postpone_flip(struct drm_i915_gem_object *obj)
> +{
> +	int ret;
> +
> +	if (!obj->ring)
> +		return 0;
> +
> +	if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, true),
> +				obj->last_write_seqno))
> +		return 0;
> +
> +	ret = i915_gem_check_olr(obj->ring, obj->last_write_seqno);
> +	if (ret)
> +		return ret;
> +
> +	if (WARN_ON(!obj->ring->irq_get(obj->ring)))
> +		return 0;
> +
> +	return 1;
> +}
> +
> +void intel_notify_mmio_flip(struct intel_ring_buffer *ring)
> +{
> +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> +	struct intel_crtc *intel_crtc;
> +	unsigned long irq_flags;
> +	u32 seqno;
> +
> +	seqno = ring->get_seqno(ring, false);
> +
> +	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
> +	for_each_intel_crtc(ring->dev, intel_crtc) {
> +		struct intel_mmio_flip *mmio_flip;
> +
> +		mmio_flip = &intel_crtc->mmio_flip;
> +
> +		if (mmio_flip->seqno == 0)
> +			continue;
> +		if (ring->id != mmio_flip->ring_id)
> +			continue;
> +
> +		if (i915_seqno_passed(seqno, mmio_flip->seqno)) {
> +			intel_do_mmio_flip(intel_crtc);
> +			mmio_flip->seqno = 0;
> +			ring->irq_put(ring);
> +		}
> +	}
> +	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
> +}
> +
> +static int intel_queue_mmio_flip(struct drm_device *dev,
> +			struct drm_crtc *crtc,
> +			struct drm_framebuffer *fb,
> +			struct drm_i915_gem_object *obj,
> +			uint32_t flags)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +	unsigned long irq_flags;
> +	int ret;
> +
> +	ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring);
> +	if (ret)
> +		goto err;
> +
> +	if (WARN_ON(intel_crtc->mmio_flip.seqno)) {
> +		ret = -EBUSY;
> +		goto err_unpin;
> +	}
> +
> +	ret = intel_postpone_flip(obj);
> +	if (ret < 0) {
> +		goto err_unpin;
> +	} else if (ret == 0) {
> +		intel_do_mmio_flip(intel_crtc);
> +		return 0;
> +	}
> +
> +	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
> +	intel_crtc->mmio_flip.seqno = obj->last_write_seqno;
> +	intel_crtc->mmio_flip.ring_id = obj->ring->id;
> +	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
> +
> +	/* Double check to catch cases where irq fired before
> +	 * mmio flip data was ready
> +	 */
> +	intel_notify_mmio_flip(obj->ring);
> +	return 0;
> +
> +err_unpin:
> +	intel_unpin_fb_obj(obj);
> +err:
> +	return ret;
> +}
> +
>  static int intel_default_queue_flip(struct drm_device *dev,
>  				    struct drm_crtc *crtc,
>  				    struct drm_framebuffer *fb,
> @@ -11470,6 +11600,9 @@ static void intel_init_display(struct drm_device *dev)
>  		break;
>  	}
>  
> +	if (intel_use_mmio_flip(dev))
> +		dev_priv->display.queue_flip = intel_queue_mmio_flip;
> +
>  	intel_panel_init_backlight_funcs(dev);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 287b89e..5a4f60c 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -358,6 +358,11 @@ struct intel_pipe_wm {
>  	bool sprites_scaled;
>  };
>  
> +struct intel_mmio_flip {
> +	u32 seqno;
> +	u32 ring_id;
> +};
> +
>  struct intel_crtc {
>  	struct drm_crtc base;
>  	enum pipe pipe;
> @@ -409,6 +414,7 @@ struct intel_crtc {
>  	} wm;
>  
>  	wait_queue_head_t vbl_wait;
> +	struct intel_mmio_flip mmio_flip;
>  };
>  
>  struct intel_plane_wm_parameters {
> -- 
> 1.8.5.1
Daniel Vetter May 27, 2014, 1:09 p.m. UTC | #2
On Tue, May 27, 2014 at 03:52:55PM +0300, Ville Syrjälä wrote:
> On Thu, May 22, 2014 at 08:06:31PM +0530, sourab.gupta@intel.com wrote:
> > From: Sourab Gupta <sourab.gupta@intel.com>
> > 
> > Using MMIO based flips on Gen5+. The MMIO flips are useful for the Media power
> > well residency optimization. These maybe enabled on architectures where
> > Render and Blitter engines reside in different power wells.
> > The blitter ring is currently being used just for command streamer based
> > flip calls. For pure 3D workloads in such cases, with MMIO flips, there will
> > be no use of blitter ring and this will ensure the 100% residency for Media well.
> > 
> > v2: The MMIO flips now use the interrupt driven mechanism for issuing the
> > flips when target seqno is reached. (Incorporating Ville's idea)
> > 
> > v3: Rebasing on latest code. Code restructuring after incorporating
> > Damien's comments
> > 
> > v4: Addressing Ville's review comments
> >     -general cleanup
> >     -updating only base addr instead of calling update_primary_plane
> >     -extending patch for gen5+ platforms
> > 
> > v5: Addressed Ville's review comments
> >     -Making mmio flip vs cs flip selection based on module parameter
> >     -Adding check for DRIVER_MODESET feature in notify_ring before calling
> >      notify mmio flip.
> >     -Other changes mostly in function arguments
> > 
> > v6: -Having a seperate function to check condition for using mmio flips (Ville)
> >     -propogating error code from i915_gem_check_olr (Ville)
> > 
> > v7: -Adding __must_check with i915_gem_check_olr (Chris)
> >     -Renaming mmio_flip_data to mmio_flip (Chris)
> >     -Rebasing on latest nightly
> > 
> > Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
> > Signed-off-by: Akash Goel <akash.goel@intel.com>
> 
> Seems the mmio vs cs flip race patches landed finally, so this needs a
> rebase to kill the pin stuff from intel_queue_mmio_flip(), and that
> also means you can squash patch 3/3 with this one. Also needs a
> s/ring_buffer/engine_cs/ since that stuff also got changed.
> 
> Oh and Chris was right about the tiling thing. I totally forgot about
> that when I said you shouldn't use .update_primary_plane(). I think
> the easiest solution would be to stick the new tiling mode into the
> mmio flip data, and just update DSPCNTR with a RMW in the flip notify
> function. IIIRC we already have test for tiling change during page
> flip in igt.

Yeah, we have a test. But it hangs on a lot of platforms :( It's
kms_flip_tiling.

-Daniel
> 
> > ---
> >  drivers/gpu/drm/i915/i915_dma.c      |   1 +
> >  drivers/gpu/drm/i915/i915_drv.h      |   7 ++
> >  drivers/gpu/drm/i915/i915_gem.c      |   2 +-
> >  drivers/gpu/drm/i915/i915_irq.c      |   3 +
> >  drivers/gpu/drm/i915/i915_params.c   |   4 ++
> >  drivers/gpu/drm/i915/intel_display.c | 133 +++++++++++++++++++++++++++++++++++
> >  drivers/gpu/drm/i915/intel_drv.h     |   6 ++
> >  7 files changed, 155 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> > index 20df7c72..266c9a6 100644
> > --- a/drivers/gpu/drm/i915/i915_dma.c
> > +++ b/drivers/gpu/drm/i915/i915_dma.c
> > @@ -1571,6 +1571,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
> >  	spin_lock_init(&dev_priv->backlight_lock);
> >  	spin_lock_init(&dev_priv->uncore.lock);
> >  	spin_lock_init(&dev_priv->mm.object_stat_lock);
> > +	spin_lock_init(&dev_priv->mmio_flip_lock);
> >  	mutex_init(&dev_priv->dpio_lock);
> >  	mutex_init(&dev_priv->modeset_restore_lock);
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 13495a4..ced6e58 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1366,6 +1366,9 @@ struct drm_i915_private {
> >  	/* protects the irq masks */
> >  	spinlock_t irq_lock;
> >  
> > +	/* protects the mmio flip data */
> > +	spinlock_t mmio_flip_lock;
> > +
> >  	bool display_irqs_enabled;
> >  
> >  	/* To control wakeup latency, e.g. for irq-driven dp aux transfers. */
> > @@ -2036,6 +2039,7 @@ struct i915_params {
> >  	bool reset;
> >  	bool disable_display;
> >  	bool disable_vtd_wa;
> > +	bool use_mmio_flip;
> >  };
> >  extern struct i915_params i915 __read_mostly;
> >  
> > @@ -2230,6 +2234,7 @@ bool i915_gem_retire_requests(struct drm_device *dev);
> >  void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring);
> >  int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
> >  				      bool interruptible);
> > +int __must_check i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno);
> >  static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
> >  {
> >  	return unlikely(atomic_read(&error->reset_counter)
> > @@ -2598,6 +2603,8 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data,
> >  int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
> >  			       struct drm_file *file);
> >  
> > +void intel_notify_mmio_flip(struct intel_ring_buffer *ring);
> > +
> >  /* overlay */
> >  extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
> >  extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index f2713b9..bc6fe4e 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -1096,7 +1096,7 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
> >   * Compare seqno against outstanding lazy request. Emit a request if they are
> >   * equal.
> >   */
> > -static int
> > +__must_check int
> >  i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
> >  {
> >  	int ret;
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index 2043276..f244b23 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -1155,6 +1155,9 @@ static void notify_ring(struct drm_device *dev,
> >  
> >  	trace_i915_gem_request_complete(ring);
> >  
> > +	if (drm_core_check_feature(dev, DRIVER_MODESET))
> > +		intel_notify_mmio_flip(ring);
> > +
> >  	wake_up_all(&ring->irq_queue);
> >  	i915_queue_hangcheck(dev);
> >  }
> > diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
> > index d05a2af..e0d44df 100644
> > --- a/drivers/gpu/drm/i915/i915_params.c
> > +++ b/drivers/gpu/drm/i915/i915_params.c
> > @@ -48,6 +48,7 @@ struct i915_params i915 __read_mostly = {
> >  	.disable_display = 0,
> >  	.enable_cmd_parser = 1,
> >  	.disable_vtd_wa = 0,
> > +	.use_mmio_flip = 0,
> >  };
> >  
> >  module_param_named(modeset, i915.modeset, int, 0400);
> > @@ -156,3 +157,6 @@ MODULE_PARM_DESC(disable_vtd_wa, "Disable all VT-d workarounds (default: false)"
> >  module_param_named(enable_cmd_parser, i915.enable_cmd_parser, int, 0600);
> >  MODULE_PARM_DESC(enable_cmd_parser,
> >  		 "Enable command parsing (1=enabled [default], 0=disabled)");
> > +
> > +module_param_named(use_mmio_flip, i915.use_mmio_flip, bool, 0600);
> > +MODULE_PARM_DESC(use_mmio_flip, "use MMIO flips (default: false)");
> > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > index 19b92c1..a29552d 100644
> > --- a/drivers/gpu/drm/i915/intel_display.c
> > +++ b/drivers/gpu/drm/i915/intel_display.c
> > @@ -9179,6 +9179,136 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
> >  	return 0;
> >  }
> >  
> > +static bool intel_use_mmio_flip(struct drm_device *dev)
> > +{
> > +	/* If module parameter is disabled, use CS flips.
> > +	 * Otherwise, use MMIO flips starting from Gen5.
> > +	 * This is not being used for older platforms, because
> > +	 * non-availability of flip done interrupt forces us to use
> > +	 * CS flips. Older platforms derive flip done using some clever
> > +	 * tricks involving the flip_pending status bits and vblank irqs.
> > +	 * So using MMIO flips there would disrupt this mechanism.
> > +	 */
> > +
> > +	if (i915.use_mmio_flip == 0)
> > +		return false;
> > +
> > +	if (INTEL_INFO(dev)->gen >= 5)
> > +		return true;
> > +	else
> > +		return false;
> > +}
> > +
> > +static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
> > +{
> > +	struct drm_i915_private *dev_priv =
> > +		intel_crtc->base.dev->dev_private;
> > +	struct intel_framebuffer *intel_fb =
> > +		to_intel_framebuffer(intel_crtc->base.primary->fb);
> > +	struct drm_i915_gem_object *obj = intel_fb->obj;
> > +
> > +	intel_mark_page_flip_active(intel_crtc);
> > +
> > +	I915_WRITE(DSPSURF(intel_crtc->plane), i915_gem_obj_ggtt_offset(obj) +
> > +			intel_crtc->dspaddr_offset);
> > +	POSTING_READ(DSPSURF(intel_crtc->plane));
> > +}
> > +
> > +static int intel_postpone_flip(struct drm_i915_gem_object *obj)
> > +{
> > +	int ret;
> > +
> > +	if (!obj->ring)
> > +		return 0;
> > +
> > +	if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, true),
> > +				obj->last_write_seqno))
> > +		return 0;
> > +
> > +	ret = i915_gem_check_olr(obj->ring, obj->last_write_seqno);
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (WARN_ON(!obj->ring->irq_get(obj->ring)))
> > +		return 0;
> > +
> > +	return 1;
> > +}
> > +
> > +void intel_notify_mmio_flip(struct intel_ring_buffer *ring)
> > +{
> > +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> > +	struct intel_crtc *intel_crtc;
> > +	unsigned long irq_flags;
> > +	u32 seqno;
> > +
> > +	seqno = ring->get_seqno(ring, false);
> > +
> > +	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
> > +	for_each_intel_crtc(ring->dev, intel_crtc) {
> > +		struct intel_mmio_flip *mmio_flip;
> > +
> > +		mmio_flip = &intel_crtc->mmio_flip;
> > +
> > +		if (mmio_flip->seqno == 0)
> > +			continue;
> > +		if (ring->id != mmio_flip->ring_id)
> > +			continue;
> > +
> > +		if (i915_seqno_passed(seqno, mmio_flip->seqno)) {
> > +			intel_do_mmio_flip(intel_crtc);
> > +			mmio_flip->seqno = 0;
> > +			ring->irq_put(ring);
> > +		}
> > +	}
> > +	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
> > +}
> > +
> > +static int intel_queue_mmio_flip(struct drm_device *dev,
> > +			struct drm_crtc *crtc,
> > +			struct drm_framebuffer *fb,
> > +			struct drm_i915_gem_object *obj,
> > +			uint32_t flags)
> > +{
> > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > +	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> > +	unsigned long irq_flags;
> > +	int ret;
> > +
> > +	ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring);
> > +	if (ret)
> > +		goto err;
> > +
> > +	if (WARN_ON(intel_crtc->mmio_flip.seqno)) {
> > +		ret = -EBUSY;
> > +		goto err_unpin;
> > +	}
> > +
> > +	ret = intel_postpone_flip(obj);
> > +	if (ret < 0) {
> > +		goto err_unpin;
> > +	} else if (ret == 0) {
> > +		intel_do_mmio_flip(intel_crtc);
> > +		return 0;
> > +	}
> > +
> > +	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
> > +	intel_crtc->mmio_flip.seqno = obj->last_write_seqno;
> > +	intel_crtc->mmio_flip.ring_id = obj->ring->id;
> > +	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
> > +
> > +	/* Double check to catch cases where irq fired before
> > +	 * mmio flip data was ready
> > +	 */
> > +	intel_notify_mmio_flip(obj->ring);
> > +	return 0;
> > +
> > +err_unpin:
> > +	intel_unpin_fb_obj(obj);
> > +err:
> > +	return ret;
> > +}
> > +
> >  static int intel_default_queue_flip(struct drm_device *dev,
> >  				    struct drm_crtc *crtc,
> >  				    struct drm_framebuffer *fb,
> > @@ -11470,6 +11600,9 @@ static void intel_init_display(struct drm_device *dev)
> >  		break;
> >  	}
> >  
> > +	if (intel_use_mmio_flip(dev))
> > +		dev_priv->display.queue_flip = intel_queue_mmio_flip;
> > +
> >  	intel_panel_init_backlight_funcs(dev);
> >  }
> >  
> > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> > index 287b89e..5a4f60c 100644
> > --- a/drivers/gpu/drm/i915/intel_drv.h
> > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > @@ -358,6 +358,11 @@ struct intel_pipe_wm {
> >  	bool sprites_scaled;
> >  };
> >  
> > +struct intel_mmio_flip {
> > +	u32 seqno;
> > +	u32 ring_id;
> > +};
> > +
> >  struct intel_crtc {
> >  	struct drm_crtc base;
> >  	enum pipe pipe;
> > @@ -409,6 +414,7 @@ struct intel_crtc {
> >  	} wm;
> >  
> >  	wait_queue_head_t vbl_wait;
> > +	struct intel_mmio_flip mmio_flip;
> >  };
> >  
> >  struct intel_plane_wm_parameters {
> > -- 
> > 1.8.5.1
> 
> -- 
> Ville Syrjälä
> Intel OTC
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
sourab.gupta@intel.com May 28, 2014, 7:12 a.m. UTC | #3
From: Sourab Gupta <sourab.gupta@intel.com>

This patch series replaces Blitter ring based flips with MMIO based flips.
This is useful for Media power well residency optimization. These may be
enabled on architectures where Render and Blitter engines reside in different
power wells.
The blitter ring is currently being used just for command streamer based
flip calls. For pure 3D workloads, with MMIO flips, there will be no use
of blitter ring and this will ensure the 100% residency for Media well.

Sourab Gupta (2):
  drm/i915: Replaced Blitter ring based flips with MMIO flips
  drm/i915: Default to mmio flips on VLV

 drivers/gpu/drm/i915/i915_dma.c      |   1 +
 drivers/gpu/drm/i915/i915_drv.h      |   8 ++
 drivers/gpu/drm/i915/i915_gem.c      |   2 +-
 drivers/gpu/drm/i915/i915_irq.c      |   3 +
 drivers/gpu/drm/i915/i915_params.c   |   5 ++
 drivers/gpu/drm/i915/intel_display.c | 152 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_drv.h     |   6 ++
 7 files changed, 176 insertions(+), 1 deletion(-)
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 20df7c72..266c9a6 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1571,6 +1571,7 @@  int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	spin_lock_init(&dev_priv->backlight_lock);
 	spin_lock_init(&dev_priv->uncore.lock);
 	spin_lock_init(&dev_priv->mm.object_stat_lock);
+	spin_lock_init(&dev_priv->mmio_flip_lock);
 	mutex_init(&dev_priv->dpio_lock);
 	mutex_init(&dev_priv->modeset_restore_lock);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 13495a4..ced6e58 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1366,6 +1366,9 @@  struct drm_i915_private {
 	/* protects the irq masks */
 	spinlock_t irq_lock;
 
+	/* protects the mmio flip data */
+	spinlock_t mmio_flip_lock;
+
 	bool display_irqs_enabled;
 
 	/* To control wakeup latency, e.g. for irq-driven dp aux transfers. */
@@ -2036,6 +2039,7 @@  struct i915_params {
 	bool reset;
 	bool disable_display;
 	bool disable_vtd_wa;
+	bool use_mmio_flip;
 };
 extern struct i915_params i915 __read_mostly;
 
@@ -2230,6 +2234,7 @@  bool i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring);
 int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
 				      bool interruptible);
+int __must_check i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno);
 static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
 {
 	return unlikely(atomic_read(&error->reset_counter)
@@ -2598,6 +2603,8 @@  int i915_reg_read_ioctl(struct drm_device *dev, void *data,
 int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
 			       struct drm_file *file);
 
+void intel_notify_mmio_flip(struct intel_ring_buffer *ring);
+
 /* overlay */
 extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
 extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f2713b9..bc6fe4e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1096,7 +1096,7 @@  i915_gem_check_wedge(struct i915_gpu_error *error,
  * Compare seqno against outstanding lazy request. Emit a request if they are
  * equal.
  */
-static int
+__must_check int
 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
 {
 	int ret;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 2043276..f244b23 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1155,6 +1155,9 @@  static void notify_ring(struct drm_device *dev,
 
 	trace_i915_gem_request_complete(ring);
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		intel_notify_mmio_flip(ring);
+
 	wake_up_all(&ring->irq_queue);
 	i915_queue_hangcheck(dev);
 }
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index d05a2af..e0d44df 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -48,6 +48,7 @@  struct i915_params i915 __read_mostly = {
 	.disable_display = 0,
 	.enable_cmd_parser = 1,
 	.disable_vtd_wa = 0,
+	.use_mmio_flip = 0,
 };
 
 module_param_named(modeset, i915.modeset, int, 0400);
@@ -156,3 +157,6 @@  MODULE_PARM_DESC(disable_vtd_wa, "Disable all VT-d workarounds (default: false)"
 module_param_named(enable_cmd_parser, i915.enable_cmd_parser, int, 0600);
 MODULE_PARM_DESC(enable_cmd_parser,
 		 "Enable command parsing (1=enabled [default], 0=disabled)");
+
+module_param_named(use_mmio_flip, i915.use_mmio_flip, bool, 0600);
+MODULE_PARM_DESC(use_mmio_flip, "use MMIO flips (default: false)");
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 19b92c1..a29552d 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9179,6 +9179,136 @@  static int intel_gen7_queue_flip(struct drm_device *dev,
 	return 0;
 }
 
+static bool intel_use_mmio_flip(struct drm_device *dev)
+{
+	/* If module parameter is disabled, use CS flips.
+	 * Otherwise, use MMIO flips starting from Gen5.
+	 * This is not being used for older platforms, because
+	 * non-availability of flip done interrupt forces us to use
+	 * CS flips. Older platforms derive flip done using some clever
+	 * tricks involving the flip_pending status bits and vblank irqs.
+	 * So using MMIO flips there would disrupt this mechanism.
+	 */
+
+	if (i915.use_mmio_flip == 0)
+		return false;
+
+	if (INTEL_INFO(dev)->gen >= 5)
+		return true;
+	else
+		return false;
+}
+
+static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
+{
+	struct drm_i915_private *dev_priv =
+		intel_crtc->base.dev->dev_private;
+	struct intel_framebuffer *intel_fb =
+		to_intel_framebuffer(intel_crtc->base.primary->fb);
+	struct drm_i915_gem_object *obj = intel_fb->obj;
+
+	intel_mark_page_flip_active(intel_crtc);
+
+	I915_WRITE(DSPSURF(intel_crtc->plane), i915_gem_obj_ggtt_offset(obj) +
+			intel_crtc->dspaddr_offset);
+	POSTING_READ(DSPSURF(intel_crtc->plane));
+}
+
+static int intel_postpone_flip(struct drm_i915_gem_object *obj)
+{
+	int ret;
+
+	if (!obj->ring)
+		return 0;
+
+	if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, true),
+				obj->last_write_seqno))
+		return 0;
+
+	ret = i915_gem_check_olr(obj->ring, obj->last_write_seqno);
+	if (ret)
+		return ret;
+
+	if (WARN_ON(!obj->ring->irq_get(obj->ring)))
+		return 0;
+
+	return 1;
+}
+
+void intel_notify_mmio_flip(struct intel_ring_buffer *ring)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct intel_crtc *intel_crtc;
+	unsigned long irq_flags;
+	u32 seqno;
+
+	seqno = ring->get_seqno(ring, false);
+
+	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
+	for_each_intel_crtc(ring->dev, intel_crtc) {
+		struct intel_mmio_flip *mmio_flip;
+
+		mmio_flip = &intel_crtc->mmio_flip;
+
+		if (mmio_flip->seqno == 0)
+			continue;
+		if (ring->id != mmio_flip->ring_id)
+			continue;
+
+		if (i915_seqno_passed(seqno, mmio_flip->seqno)) {
+			intel_do_mmio_flip(intel_crtc);
+			mmio_flip->seqno = 0;
+			ring->irq_put(ring);
+		}
+	}
+	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
+}
+
+static int intel_queue_mmio_flip(struct drm_device *dev,
+			struct drm_crtc *crtc,
+			struct drm_framebuffer *fb,
+			struct drm_i915_gem_object *obj,
+			uint32_t flags)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	unsigned long irq_flags;
+	int ret;
+
+	ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring);
+	if (ret)
+		goto err;
+
+	if (WARN_ON(intel_crtc->mmio_flip.seqno)) {
+		ret = -EBUSY;
+		goto err_unpin;
+	}
+
+	ret = intel_postpone_flip(obj);
+	if (ret < 0) {
+		goto err_unpin;
+	} else if (ret == 0) {
+		intel_do_mmio_flip(intel_crtc);
+		return 0;
+	}
+
+	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
+	intel_crtc->mmio_flip.seqno = obj->last_write_seqno;
+	intel_crtc->mmio_flip.ring_id = obj->ring->id;
+	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
+
+	/* Double check to catch cases where irq fired before
+	 * mmio flip data was ready
+	 */
+	intel_notify_mmio_flip(obj->ring);
+	return 0;
+
+err_unpin:
+	intel_unpin_fb_obj(obj);
+err:
+	return ret;
+}
+
 static int intel_default_queue_flip(struct drm_device *dev,
 				    struct drm_crtc *crtc,
 				    struct drm_framebuffer *fb,
@@ -11470,6 +11600,9 @@  static void intel_init_display(struct drm_device *dev)
 		break;
 	}
 
+	if (intel_use_mmio_flip(dev))
+		dev_priv->display.queue_flip = intel_queue_mmio_flip;
+
 	intel_panel_init_backlight_funcs(dev);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 287b89e..5a4f60c 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -358,6 +358,11 @@  struct intel_pipe_wm {
 	bool sprites_scaled;
 };
 
+struct intel_mmio_flip {
+	u32 seqno;
+	u32 ring_id;
+};
+
 struct intel_crtc {
 	struct drm_crtc base;
 	enum pipe pipe;
@@ -409,6 +414,7 @@  struct intel_crtc {
 	} wm;
 
 	wait_queue_head_t vbl_wait;
+	struct intel_mmio_flip mmio_flip;
 };
 
 struct intel_plane_wm_parameters {