diff mbox

[v5] drm/i915: Replaced Blitter ring based flips with MMIO flips for VLV

Message ID 1400497138-20337-1-git-send-email-sourab.gupta@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

sourab.gupta@intel.com May 19, 2014, 10:58 a.m. UTC
From: Sourab Gupta <sourab.gupta@intel.com>

Using MMIO based flips on Gen5+ for Media power well residency optimization.
The blitter ring is currently being used just for command streamer based
flip calls. For pure 3D workloads, with MMIO flips, there will be no use
of blitter ring and this will ensure the 100% residency for Media well.

Can we address the condition for race between page flip mmio vs set base mmio
in a seperate patch or do we address it in this patch only? In which case, this
patch may be dependent on
http://lists.freedesktop.org/archives/intel-gfx/2014-April/043759.html

v2: The MMIO flips now use the interrupt driven mechanism for issuing the
flips when target seqno is reached. (Incorporating Ville's idea)

v3: Rebasing on latest code. Code restructuring after incorporating
Damien's comments

v4: Addressing Ville's review comments
    -general cleanup
    -updating only base addr instead of calling update_primary_plane
    -extending patch for gen5+ platforms

v5: Addressed Ville's review comments
    -Making mmio flip vs cs flip selection based on module parameter
    -Adding check for DRIVER_MODESET feature in notify_ring before calling
     notify mmio flip.
    -Other changes mostly in function arguments

Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Akash Goel <akash.goel@intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c      |   1 +
 drivers/gpu/drm/i915/i915_drv.h      |   6 ++
 drivers/gpu/drm/i915/i915_gem.c      |   2 +-
 drivers/gpu/drm/i915/i915_irq.c      |   3 +
 drivers/gpu/drm/i915/i915_params.c   |   4 ++
 drivers/gpu/drm/i915/intel_display.c | 113 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_drv.h     |   6 ++
 7 files changed, 134 insertions(+), 1 deletion(-)

Comments

Ville Syrjala May 19, 2014, 11:47 a.m. UTC | #1
On Mon, May 19, 2014 at 04:28:58PM +0530, sourab.gupta@intel.com wrote:
> From: Sourab Gupta <sourab.gupta@intel.com>
> 
> Using MMIO based flips on Gen5+ for Media power well residency optimization.
> The blitter ring is currently being used just for command streamer based
> flip calls. For pure 3D workloads, with MMIO flips, there will be no use
> of blitter ring and this will ensure the 100% residency for Media well.
> 
> Can we address the condition for race between page flip mmio vs set base mmio
> in a seperate patch or do we address it in this patch only? In which case, this
> patch may be dependent on
> http://lists.freedesktop.org/archives/intel-gfx/2014-April/043759.html
> 
> v2: The MMIO flips now use the interrupt driven mechanism for issuing the
> flips when target seqno is reached. (Incorporating Ville's idea)
> 
> v3: Rebasing on latest code. Code restructuring after incorporating
> Damien's comments
> 
> v4: Addressing Ville's review comments
>     -general cleanup
>     -updating only base addr instead of calling update_primary_plane
>     -extending patch for gen5+ platforms
> 
> v5: Addressed Ville's review comments
>     -Making mmio flip vs cs flip selection based on module parameter
>     -Adding check for DRIVER_MODESET feature in notify_ring before calling
>      notify mmio flip.
>     -Other changes mostly in function arguments
> 
> Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
> Signed-off-by: Akash Goel <akash.goel@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_dma.c      |   1 +
>  drivers/gpu/drm/i915/i915_drv.h      |   6 ++
>  drivers/gpu/drm/i915/i915_gem.c      |   2 +-
>  drivers/gpu/drm/i915/i915_irq.c      |   3 +
>  drivers/gpu/drm/i915/i915_params.c   |   4 ++
>  drivers/gpu/drm/i915/intel_display.c | 113 +++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_drv.h     |   6 ++
>  7 files changed, 134 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 46f1dec..672c28f 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1570,6 +1570,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
>  	spin_lock_init(&dev_priv->backlight_lock);
>  	spin_lock_init(&dev_priv->uncore.lock);
>  	spin_lock_init(&dev_priv->mm.object_stat_lock);
> +	spin_lock_init(&dev_priv->mmio_flip_lock);
>  	dev_priv->ring_index = 0;
>  	mutex_init(&dev_priv->dpio_lock);
>  	mutex_init(&dev_priv->modeset_restore_lock);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4006dfe..9f1d042 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1543,6 +1543,8 @@ struct drm_i915_private {
>  	struct i915_ums_state ums;
>  	/* the indicator for dispatch video commands on two BSD rings */
>  	int ring_index;
> +	/* protects the mmio flip data */
> +	spinlock_t mmio_flip_lock;
>  };
>  
>  static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
> @@ -2019,6 +2021,7 @@ struct i915_params {
>  	bool reset;
>  	bool disable_display;
>  	bool disable_vtd_wa;
> +	bool use_mmio_flip;
>  };
>  extern struct i915_params i915 __read_mostly;
>  
> @@ -2209,6 +2212,7 @@ bool i915_gem_retire_requests(struct drm_device *dev);
>  void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring);
>  int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
>  				      bool interruptible);
> +int i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno);
>  static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
>  {
>  	return unlikely(atomic_read(&error->reset_counter)
> @@ -2580,6 +2584,8 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data,
>  int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
>  			       struct drm_file *file);
>  
> +void intel_notify_mmio_flip(struct intel_ring_buffer *ring);
> +
>  /* overlay */
>  extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
>  extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index fa5b5ab..5b4e953 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -975,7 +975,7 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
>   * Compare seqno against outstanding lazy request. Emit a request if they are
>   * equal.
>   */
> -static int
> +int
>  i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
>  {
>  	int ret;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index b10fbde..31e98e2 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1084,6 +1084,9 @@ static void notify_ring(struct drm_device *dev,
>  
>  	trace_i915_gem_request_complete(ring);
>  
> +	if (drm_core_check_feature(dev, DRIVER_MODESET))
> +		intel_notify_mmio_flip(ring);
> +

I'm not a fan of such checks.

After a bit of extra thought I got the idea of adding a per-ring
notify_list. So we could have something like this:

struct ring_notify {
	void (*notify)(struct ring_notify *notify);
	struct list_head list;
	u32 seqno;
};

intel_crtc {
	...
	struct ring_notify mmio_flip_notify;
	...
};

I'll probably want something like this for FBC as well, so I guess
we might as well add it from the start. I think you could do this
as two patches; first one adds the ring notify list, second one
implements the mmio flip on top.

>  	wake_up_all(&ring->irq_queue);
>  	i915_queue_hangcheck(dev);
>  }
> diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
> index d05a2af..e0d44df 100644
> --- a/drivers/gpu/drm/i915/i915_params.c
> +++ b/drivers/gpu/drm/i915/i915_params.c
> @@ -48,6 +48,7 @@ struct i915_params i915 __read_mostly = {
>  	.disable_display = 0,
>  	.enable_cmd_parser = 1,
>  	.disable_vtd_wa = 0,
> +	.use_mmio_flip = 0,
>  };
>  
>  module_param_named(modeset, i915.modeset, int, 0400);
> @@ -156,3 +157,6 @@ MODULE_PARM_DESC(disable_vtd_wa, "Disable all VT-d workarounds (default: false)"
>  module_param_named(enable_cmd_parser, i915.enable_cmd_parser, int, 0600);
>  MODULE_PARM_DESC(enable_cmd_parser,
>  		 "Enable command parsing (1=enabled [default], 0=disabled)");
> +
> +module_param_named(use_mmio_flip, i915.use_mmio_flip, bool, 0600);
> +MODULE_PARM_DESC(use_mmio_flip, "use MMIO flips (default: false)");

If we want to enable this by default on VLV, then this should be an int
where -1 would mean per-chip default (ie. enable on VLV, disable on
rest).

> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 0f8f9bc..6003068 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -9037,6 +9037,108 @@ err:
>  	return ret;
>  }
>  
> +static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
> +{
> +	struct drm_i915_private *dev_priv =
> +		intel_crtc->base.dev->dev_private;
> +	struct intel_framebuffer *intel_fb =
> +		to_intel_framebuffer(intel_crtc->base.primary->fb);
> +	struct drm_i915_gem_object *obj = intel_fb->obj;
> +
> +	intel_mark_page_flip_active(intel_crtc);
> +
> +	I915_WRITE(DSPSURF(intel_crtc->plane), i915_gem_obj_ggtt_offset(obj) +
> +			intel_crtc->dspaddr_offset);
> +	POSTING_READ(DSPSURF(intel_crtc->plane));
> +}
> +
> +static int intel_postpone_flip(struct drm_i915_gem_object *obj)
> +{
> +	if (!obj->ring)
> +		return 0;
> +
> +	if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, true),
> +				obj->last_write_seqno))
> +		return 0;
> +
> +	if (i915_gem_check_olr(obj->ring, obj->last_write_seqno))
> +		return -1;

This should pass the actual error code to the caller.

> +
> +	if (WARN_ON(!obj->ring->irq_get(obj->ring)))
> +		return 0;
> +
> +	return 1;
> +}
> +
> +void intel_notify_mmio_flip(struct intel_ring_buffer *ring)
> +{
> +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> +	struct intel_crtc *intel_crtc;
> +	unsigned long irq_flags;
> +	u32 seqno;
> +
> +	seqno = ring->get_seqno(ring, false);
> +
> +	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
> +	for_each_intel_crtc(ring->dev, intel_crtc) {
> +		struct intel_mmio_flip *mmio_flip_data;
> +
> +		mmio_flip_data = &intel_crtc->mmio_flip_data;
> +
> +		if (mmio_flip_data->seqno == 0)
> +			continue;
> +		if (ring->id != mmio_flip_data->ring_id)
> +			continue;
> +
> +		if (i915_seqno_passed(seqno, mmio_flip_data->seqno)) {
> +			intel_do_mmio_flip(intel_crtc);
> +			mmio_flip_data->seqno = 0;
> +			ring->irq_put(ring);
> +		}
> +	}
> +	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
> +}
> +
> +static int intel_queue_mmio_flip(struct drm_device *dev,
> +			struct drm_crtc *crtc,
> +			struct drm_framebuffer *fb,
> +			struct drm_i915_gem_object *obj,
> +			uint32_t flags)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +	unsigned long irq_flags;
> +	int ret;
> +
> +	ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring);
> +	if (ret)
> +		goto err;
> +
> +	ret = intel_postpone_flip(obj);
> +	if (ret < 0) {
> +		goto err_unpin;
> +	} else if (ret == 0) {
> +		intel_do_mmio_flip(intel_crtc);
> +		return 0;
> +	}
> +
> +	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
> +	intel_crtc->mmio_flip_data.seqno = obj->last_write_seqno;
> +	intel_crtc->mmio_flip_data.ring_id = obj->ring->id;
> +	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
> +
> +	/* Double check to catch cases where irq fired before
> +	 * mmio flip data was ready
> +	 */
> +	intel_notify_mmio_flip(obj->ring);
> +	return 0;
> +
> +err_unpin:
> +	intel_unpin_fb_obj(obj);
> +err:
> +	return ret;
> +}
> +
>  static int intel_gen7_queue_flip(struct drm_device *dev,
>  				 struct drm_crtc *crtc,
>  				 struct drm_framebuffer *fb,
> @@ -11377,6 +11479,17 @@ static void intel_init_display(struct drm_device *dev)
>  		break;
>  	}
>  
> +	/* If module parameter is enabled, Use MMIO based flips starting
> +	 * from Gen5, for Media power well residency optimization. This is
> +	 * not currently being used for older platforms because of
> +	 * non-availability of flip done interrupt.
> +	 * The other alternative of having Render ring based flip calls is
> +	 * not being used, as the performance(FPS) of certain 3D Apps gets
> +	 * severly affected.

The comments here are rather VLV specific. They would be more
appropriate here if you actually enabled this by default on VLV.
Now they just seem a bit misplaced.

So I suggest you add another patch on top that enables the feature
by default on VLV and then add the comments there.

> +	 */
> +	if ((i915.use_mmio_flip != 0) && (INTEL_INFO(dev)->gen >= 5))

This has too many parens for my taste.

Also to reduce clutter it might be nice to move all these checks into
a small function eg. 'bool use_mmio_flip()'. Especially if you add the
"default to mmio flip on VLV" patch on top.

> +		dev_priv->display.queue_flip = intel_queue_mmio_flip;
> +
>  	intel_panel_init_backlight_funcs(dev);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 32a74e1..08d65a4 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -351,6 +351,11 @@ struct intel_pipe_wm {
>  	bool sprites_scaled;
>  };
>  
> +struct intel_mmio_flip {
> +	u32 seqno;
> +	u32 ring_id;
> +};
> +
>  struct intel_crtc {
>  	struct drm_crtc base;
>  	enum pipe pipe;
> @@ -403,6 +408,7 @@ struct intel_crtc {
>  	} wm;
>  
>  	wait_queue_head_t vbl_wait;
> +	struct intel_mmio_flip mmio_flip_data;
>  };
>  
>  struct intel_plane_wm_parameters {
> -- 
> 1.8.5.1
Daniel Vetter May 19, 2014, 12:29 p.m. UTC | #2
On Mon, May 19, 2014 at 02:47:20PM +0300, Ville Syrjälä wrote:
> On Mon, May 19, 2014 at 04:28:58PM +0530, sourab.gupta@intel.com wrote:
> > From: Sourab Gupta <sourab.gupta@intel.com>
> > 
> > Using MMIO based flips on Gen5+ for Media power well residency optimization.
> > The blitter ring is currently being used just for command streamer based
> > flip calls. For pure 3D workloads, with MMIO flips, there will be no use
> > of blitter ring and this will ensure the 100% residency for Media well.
> > 
> > Can we address the condition for race between page flip mmio vs set base mmio
> > in a seperate patch or do we address it in this patch only? In which case, this
> > patch may be dependent on
> > http://lists.freedesktop.org/archives/intel-gfx/2014-April/043759.html
> > 
> > v2: The MMIO flips now use the interrupt driven mechanism for issuing the
> > flips when target seqno is reached. (Incorporating Ville's idea)
> > 
> > v3: Rebasing on latest code. Code restructuring after incorporating
> > Damien's comments
> > 
> > v4: Addressing Ville's review comments
> >     -general cleanup
> >     -updating only base addr instead of calling update_primary_plane
> >     -extending patch for gen5+ platforms
> > 
> > v5: Addressed Ville's review comments
> >     -Making mmio flip vs cs flip selection based on module parameter
> >     -Adding check for DRIVER_MODESET feature in notify_ring before calling
> >      notify mmio flip.
> >     -Other changes mostly in function arguments
> > 
> > Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
> > Signed-off-by: Akash Goel <akash.goel@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_dma.c      |   1 +
> >  drivers/gpu/drm/i915/i915_drv.h      |   6 ++
> >  drivers/gpu/drm/i915/i915_gem.c      |   2 +-
> >  drivers/gpu/drm/i915/i915_irq.c      |   3 +
> >  drivers/gpu/drm/i915/i915_params.c   |   4 ++
> >  drivers/gpu/drm/i915/intel_display.c | 113 +++++++++++++++++++++++++++++++++++
> >  drivers/gpu/drm/i915/intel_drv.h     |   6 ++
> >  7 files changed, 134 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> > index 46f1dec..672c28f 100644
> > --- a/drivers/gpu/drm/i915/i915_dma.c
> > +++ b/drivers/gpu/drm/i915/i915_dma.c
> > @@ -1570,6 +1570,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
> >  	spin_lock_init(&dev_priv->backlight_lock);
> >  	spin_lock_init(&dev_priv->uncore.lock);
> >  	spin_lock_init(&dev_priv->mm.object_stat_lock);
> > +	spin_lock_init(&dev_priv->mmio_flip_lock);
> >  	dev_priv->ring_index = 0;
> >  	mutex_init(&dev_priv->dpio_lock);
> >  	mutex_init(&dev_priv->modeset_restore_lock);
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 4006dfe..9f1d042 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1543,6 +1543,8 @@ struct drm_i915_private {
> >  	struct i915_ums_state ums;
> >  	/* the indicator for dispatch video commands on two BSD rings */
> >  	int ring_index;
> > +	/* protects the mmio flip data */
> > +	spinlock_t mmio_flip_lock;
> >  };
> >  
> >  static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
> > @@ -2019,6 +2021,7 @@ struct i915_params {
> >  	bool reset;
> >  	bool disable_display;
> >  	bool disable_vtd_wa;
> > +	bool use_mmio_flip;
> >  };
> >  extern struct i915_params i915 __read_mostly;
> >  
> > @@ -2209,6 +2212,7 @@ bool i915_gem_retire_requests(struct drm_device *dev);
> >  void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring);
> >  int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
> >  				      bool interruptible);
> > +int i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno);
> >  static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
> >  {
> >  	return unlikely(atomic_read(&error->reset_counter)
> > @@ -2580,6 +2584,8 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data,
> >  int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
> >  			       struct drm_file *file);
> >  
> > +void intel_notify_mmio_flip(struct intel_ring_buffer *ring);
> > +
> >  /* overlay */
> >  extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
> >  extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index fa5b5ab..5b4e953 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -975,7 +975,7 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
> >   * Compare seqno against outstanding lazy request. Emit a request if they are
> >   * equal.
> >   */
> > -static int
> > +int
> >  i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
> >  {
> >  	int ret;
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index b10fbde..31e98e2 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -1084,6 +1084,9 @@ static void notify_ring(struct drm_device *dev,
> >  
> >  	trace_i915_gem_request_complete(ring);
> >  
> > +	if (drm_core_check_feature(dev, DRIVER_MODESET))
> > +		intel_notify_mmio_flip(ring);
> > +
> 
> I'm not a fan of such checks.
> 
> After a bit of extra thought I got the idea of adding a per-ring
> notify_list. So we could have something like this:
> 
> struct ring_notify {
> 	void (*notify)(struct ring_notify *notify);
> 	struct list_head list;
> 	u32 seqno;
> };
> 
> intel_crtc {
> 	...
> 	struct ring_notify mmio_flip_notify;
> 	...
> };
> 
> I'll probably want something like this for FBC as well, so I guess
> we might as well add it from the start. I think you could do this
> as two patches; first one adds the ring notify list, second one
> implements the mmio flip on top.

This fells like massive overkill, at least for now. So imo the check is
ok. What I do wonder about is whether we really want to run this in
interrupt context. With atomic sprite updates we need to do the vblank
avoidance, and that really shouldn't happen from interrupt context.

So why can't we just latch a work item which uses all the established
seqno waiting stuff and so avoids all these issues (and a pile of
duplicated code)?
-Daniel
Ville Syrjala May 19, 2014, 1:06 p.m. UTC | #3
On Mon, May 19, 2014 at 02:29:09PM +0200, Daniel Vetter wrote:
> On Mon, May 19, 2014 at 02:47:20PM +0300, Ville Syrjälä wrote:
> > On Mon, May 19, 2014 at 04:28:58PM +0530, sourab.gupta@intel.com wrote:
> > > From: Sourab Gupta <sourab.gupta@intel.com>
> > > 
> > > Using MMIO based flips on Gen5+ for Media power well residency optimization.
> > > The blitter ring is currently being used just for command streamer based
> > > flip calls. For pure 3D workloads, with MMIO flips, there will be no use
> > > of blitter ring and this will ensure the 100% residency for Media well.
> > > 
> > > Can we address the condition for race between page flip mmio vs set base mmio
> > > in a seperate patch or do we address it in this patch only? In which case, this
> > > patch may be dependent on
> > > http://lists.freedesktop.org/archives/intel-gfx/2014-April/043759.html
> > > 
> > > v2: The MMIO flips now use the interrupt driven mechanism for issuing the
> > > flips when target seqno is reached. (Incorporating Ville's idea)
> > > 
> > > v3: Rebasing on latest code. Code restructuring after incorporating
> > > Damien's comments
> > > 
> > > v4: Addressing Ville's review comments
> > >     -general cleanup
> > >     -updating only base addr instead of calling update_primary_plane
> > >     -extending patch for gen5+ platforms
> > > 
> > > v5: Addressed Ville's review comments
> > >     -Making mmio flip vs cs flip selection based on module parameter
> > >     -Adding check for DRIVER_MODESET feature in notify_ring before calling
> > >      notify mmio flip.
> > >     -Other changes mostly in function arguments
> > > 
> > > Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
> > > Signed-off-by: Akash Goel <akash.goel@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/i915_dma.c      |   1 +
> > >  drivers/gpu/drm/i915/i915_drv.h      |   6 ++
> > >  drivers/gpu/drm/i915/i915_gem.c      |   2 +-
> > >  drivers/gpu/drm/i915/i915_irq.c      |   3 +
> > >  drivers/gpu/drm/i915/i915_params.c   |   4 ++
> > >  drivers/gpu/drm/i915/intel_display.c | 113 +++++++++++++++++++++++++++++++++++
> > >  drivers/gpu/drm/i915/intel_drv.h     |   6 ++
> > >  7 files changed, 134 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> > > index 46f1dec..672c28f 100644
> > > --- a/drivers/gpu/drm/i915/i915_dma.c
> > > +++ b/drivers/gpu/drm/i915/i915_dma.c
> > > @@ -1570,6 +1570,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
> > >  	spin_lock_init(&dev_priv->backlight_lock);
> > >  	spin_lock_init(&dev_priv->uncore.lock);
> > >  	spin_lock_init(&dev_priv->mm.object_stat_lock);
> > > +	spin_lock_init(&dev_priv->mmio_flip_lock);
> > >  	dev_priv->ring_index = 0;
> > >  	mutex_init(&dev_priv->dpio_lock);
> > >  	mutex_init(&dev_priv->modeset_restore_lock);
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > > index 4006dfe..9f1d042 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -1543,6 +1543,8 @@ struct drm_i915_private {
> > >  	struct i915_ums_state ums;
> > >  	/* the indicator for dispatch video commands on two BSD rings */
> > >  	int ring_index;
> > > +	/* protects the mmio flip data */
> > > +	spinlock_t mmio_flip_lock;
> > >  };
> > >  
> > >  static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
> > > @@ -2019,6 +2021,7 @@ struct i915_params {
> > >  	bool reset;
> > >  	bool disable_display;
> > >  	bool disable_vtd_wa;
> > > +	bool use_mmio_flip;
> > >  };
> > >  extern struct i915_params i915 __read_mostly;
> > >  
> > > @@ -2209,6 +2212,7 @@ bool i915_gem_retire_requests(struct drm_device *dev);
> > >  void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring);
> > >  int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
> > >  				      bool interruptible);
> > > +int i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno);
> > >  static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
> > >  {
> > >  	return unlikely(atomic_read(&error->reset_counter)
> > > @@ -2580,6 +2584,8 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data,
> > >  int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
> > >  			       struct drm_file *file);
> > >  
> > > +void intel_notify_mmio_flip(struct intel_ring_buffer *ring);
> > > +
> > >  /* overlay */
> > >  extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
> > >  extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
> > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > > index fa5b5ab..5b4e953 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > > @@ -975,7 +975,7 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
> > >   * Compare seqno against outstanding lazy request. Emit a request if they are
> > >   * equal.
> > >   */
> > > -static int
> > > +int
> > >  i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
> > >  {
> > >  	int ret;
> > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > > index b10fbde..31e98e2 100644
> > > --- a/drivers/gpu/drm/i915/i915_irq.c
> > > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > > @@ -1084,6 +1084,9 @@ static void notify_ring(struct drm_device *dev,
> > >  
> > >  	trace_i915_gem_request_complete(ring);
> > >  
> > > +	if (drm_core_check_feature(dev, DRIVER_MODESET))
> > > +		intel_notify_mmio_flip(ring);
> > > +
> > 
> > I'm not a fan of such checks.
> > 
> > After a bit of extra thought I got the idea of adding a per-ring
> > notify_list. So we could have something like this:
> > 
> > struct ring_notify {
> > 	void (*notify)(struct ring_notify *notify);
> > 	struct list_head list;
> > 	u32 seqno;
> > };
> > 
> > intel_crtc {
> > 	...
> > 	struct ring_notify mmio_flip_notify;
> > 	...
> > };
> > 
> > I'll probably want something like this for FBC as well, so I guess
> > we might as well add it from the start. I think you could do this
> > as two patches; first one adds the ring notify list, second one
> > implements the mmio flip on top.
> 
> This fells like massive overkill, at least for now. So imo the check is
> ok.

OK. I guess we can then start with the simple check then.

> What I do wonder about is whether we really want to run this in
> interrupt context. With atomic sprite updates we need to do the vblank
> avoidance, and that really shouldn't happen from interrupt context.

That should be a fairly simple change to do at any time. But perhaps we 
want to do it from the start.

> 
> So why can't we just latch a work item which uses all the established
> seqno waiting stuff and so avoids all these issues

I hate blocking and waiting for stuff. It usually means all kinds of lock
dropping tricks, and extra hurdles if you want to cancel the operation.
My brain just can't deal with it. I much prefer a nice and simple event
based mechanism.

> (and a pile of  duplicated code)?

Which code is duplicated?
Daniel Vetter May 19, 2014, 1:41 p.m. UTC | #4
On Mon, May 19, 2014 at 3:06 PM, Ville Syrjälä
<ville.syrjala@linux.intel.com> wrote:
>> So why can't we just latch a work item which uses all the established
>> seqno waiting stuff and so avoids all these issues
>
> I hate blocking and waiting for stuff. It usually means all kinds of lock
> dropping tricks, and extra hurdles if you want to cancel the operation.
> My brain just can't deal with it. I much prefer a nice and simple event
> based mechanism.

Hm, I'm the other way round - I prefer stack based state machines over
continuation passing ...

>> (and a pile of  duplicated code)?
>
> Which code is duplicated?

Handling gpu hangs essentially. If we just latch a work item we can
reuse all the wait and notify logic we already have. And in the past
we've had divergent changes in this area.
-Daniel
sourab.gupta@intel.com May 20, 2014, 10:49 a.m. UTC | #5
From: Sourab Gupta <sourab.gupta@intel.com>

Using MMIO based flips for Media power well residency optimization.
The blitter ring is currently being used just for command streamer based
flip calls. For pure 3D workloads, with MMIO flips, there will be no use
of blitter ring and this will ensure the 100% residency for Media well.

Sourab Gupta (3):
  drm/i915: Replaced Blitter ring based flips with MMIO flips
  drm/i915: Default to mmio flips on VLV
  drm/i915: Fix mmio page flip vs mmio set base race

 drivers/gpu/drm/i915/i915_dma.c      |   1 +
 drivers/gpu/drm/i915/i915_drv.h      |   6 ++
 drivers/gpu/drm/i915/i915_gem.c      |   2 +-
 drivers/gpu/drm/i915/i915_irq.c      |   3 +
 drivers/gpu/drm/i915/i915_params.c   |   5 ++
 drivers/gpu/drm/i915/intel_display.c | 135 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_drv.h     |   6 ++
 7 files changed, 157 insertions(+), 1 deletion(-)
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 46f1dec..672c28f 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1570,6 +1570,7 @@  int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	spin_lock_init(&dev_priv->backlight_lock);
 	spin_lock_init(&dev_priv->uncore.lock);
 	spin_lock_init(&dev_priv->mm.object_stat_lock);
+	spin_lock_init(&dev_priv->mmio_flip_lock);
 	dev_priv->ring_index = 0;
 	mutex_init(&dev_priv->dpio_lock);
 	mutex_init(&dev_priv->modeset_restore_lock);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4006dfe..9f1d042 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1543,6 +1543,8 @@  struct drm_i915_private {
 	struct i915_ums_state ums;
 	/* the indicator for dispatch video commands on two BSD rings */
 	int ring_index;
+	/* protects the mmio flip data */
+	spinlock_t mmio_flip_lock;
 };
 
 static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
@@ -2019,6 +2021,7 @@  struct i915_params {
 	bool reset;
 	bool disable_display;
 	bool disable_vtd_wa;
+	bool use_mmio_flip;
 };
 extern struct i915_params i915 __read_mostly;
 
@@ -2209,6 +2212,7 @@  bool i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring);
 int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
 				      bool interruptible);
+int i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno);
 static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
 {
 	return unlikely(atomic_read(&error->reset_counter)
@@ -2580,6 +2584,8 @@  int i915_reg_read_ioctl(struct drm_device *dev, void *data,
 int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
 			       struct drm_file *file);
 
+void intel_notify_mmio_flip(struct intel_ring_buffer *ring);
+
 /* overlay */
 extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
 extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fa5b5ab..5b4e953 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -975,7 +975,7 @@  i915_gem_check_wedge(struct i915_gpu_error *error,
  * Compare seqno against outstanding lazy request. Emit a request if they are
  * equal.
  */
-static int
+int
 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
 {
 	int ret;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b10fbde..31e98e2 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1084,6 +1084,9 @@  static void notify_ring(struct drm_device *dev,
 
 	trace_i915_gem_request_complete(ring);
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		intel_notify_mmio_flip(ring);
+
 	wake_up_all(&ring->irq_queue);
 	i915_queue_hangcheck(dev);
 }
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index d05a2af..e0d44df 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -48,6 +48,7 @@  struct i915_params i915 __read_mostly = {
 	.disable_display = 0,
 	.enable_cmd_parser = 1,
 	.disable_vtd_wa = 0,
+	.use_mmio_flip = 0,
 };
 
 module_param_named(modeset, i915.modeset, int, 0400);
@@ -156,3 +157,6 @@  MODULE_PARM_DESC(disable_vtd_wa, "Disable all VT-d workarounds (default: false)"
 module_param_named(enable_cmd_parser, i915.enable_cmd_parser, int, 0600);
 MODULE_PARM_DESC(enable_cmd_parser,
 		 "Enable command parsing (1=enabled [default], 0=disabled)");
+
+module_param_named(use_mmio_flip, i915.use_mmio_flip, bool, 0600);
+MODULE_PARM_DESC(use_mmio_flip, "use MMIO flips (default: false)");
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 0f8f9bc..6003068 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9037,6 +9037,108 @@  err:
 	return ret;
 }
 
+static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
+{
+	struct drm_i915_private *dev_priv =
+		intel_crtc->base.dev->dev_private;
+	struct intel_framebuffer *intel_fb =
+		to_intel_framebuffer(intel_crtc->base.primary->fb);
+	struct drm_i915_gem_object *obj = intel_fb->obj;
+
+	intel_mark_page_flip_active(intel_crtc);
+
+	I915_WRITE(DSPSURF(intel_crtc->plane), i915_gem_obj_ggtt_offset(obj) +
+			intel_crtc->dspaddr_offset);
+	POSTING_READ(DSPSURF(intel_crtc->plane));
+}
+
+static int intel_postpone_flip(struct drm_i915_gem_object *obj)
+{
+	if (!obj->ring)
+		return 0;
+
+	if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, true),
+				obj->last_write_seqno))
+		return 0;
+
+	if (i915_gem_check_olr(obj->ring, obj->last_write_seqno))
+		return -1;
+
+	if (WARN_ON(!obj->ring->irq_get(obj->ring)))
+		return 0;
+
+	return 1;
+}
+
+void intel_notify_mmio_flip(struct intel_ring_buffer *ring)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct intel_crtc *intel_crtc;
+	unsigned long irq_flags;
+	u32 seqno;
+
+	seqno = ring->get_seqno(ring, false);
+
+	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
+	for_each_intel_crtc(ring->dev, intel_crtc) {
+		struct intel_mmio_flip *mmio_flip_data;
+
+		mmio_flip_data = &intel_crtc->mmio_flip_data;
+
+		if (mmio_flip_data->seqno == 0)
+			continue;
+		if (ring->id != mmio_flip_data->ring_id)
+			continue;
+
+		if (i915_seqno_passed(seqno, mmio_flip_data->seqno)) {
+			intel_do_mmio_flip(intel_crtc);
+			mmio_flip_data->seqno = 0;
+			ring->irq_put(ring);
+		}
+	}
+	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
+}
+
+static int intel_queue_mmio_flip(struct drm_device *dev,
+			struct drm_crtc *crtc,
+			struct drm_framebuffer *fb,
+			struct drm_i915_gem_object *obj,
+			uint32_t flags)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	unsigned long irq_flags;
+	int ret;
+
+	ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring);
+	if (ret)
+		goto err;
+
+	ret = intel_postpone_flip(obj);
+	if (ret < 0) {
+		goto err_unpin;
+	} else if (ret == 0) {
+		intel_do_mmio_flip(intel_crtc);
+		return 0;
+	}
+
+	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
+	intel_crtc->mmio_flip_data.seqno = obj->last_write_seqno;
+	intel_crtc->mmio_flip_data.ring_id = obj->ring->id;
+	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
+
+	/* Double check to catch cases where irq fired before
+	 * mmio flip data was ready
+	 */
+	intel_notify_mmio_flip(obj->ring);
+	return 0;
+
+err_unpin:
+	intel_unpin_fb_obj(obj);
+err:
+	return ret;
+}
+
 static int intel_gen7_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
@@ -11377,6 +11479,17 @@  static void intel_init_display(struct drm_device *dev)
 		break;
 	}
 
+	/* If module parameter is enabled, Use MMIO based flips starting
+	 * from Gen5, for Media power well residency optimization. This is
+	 * not currently being used for older platforms because of
+	 * non-availability of flip done interrupt.
+	 * The other alternative of having Render ring based flip calls is
+	 * not being used, as the performance(FPS) of certain 3D Apps gets
+	 * severly affected.
+	 */
+	if ((i915.use_mmio_flip != 0) && (INTEL_INFO(dev)->gen >= 5))
+		dev_priv->display.queue_flip = intel_queue_mmio_flip;
+
 	intel_panel_init_backlight_funcs(dev);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 32a74e1..08d65a4 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -351,6 +351,11 @@  struct intel_pipe_wm {
 	bool sprites_scaled;
 };
 
+struct intel_mmio_flip {
+	u32 seqno;
+	u32 ring_id;
+};
+
 struct intel_crtc {
 	struct drm_crtc base;
 	enum pipe pipe;
@@ -403,6 +408,7 @@  struct intel_crtc {
 	} wm;
 
 	wait_queue_head_t vbl_wait;
+	struct intel_mmio_flip mmio_flip_data;
 };
 
 struct intel_plane_wm_parameters {