diff mbox

[v3] drm/i915: Replaced Blitter ring based flips with MMIO flips for VLV

Message ID 1395565265-20714-1-git-send-email-sourab.gupta@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

sourab.gupta@intel.com March 23, 2014, 9:01 a.m. UTC
From: Sourab Gupta <sourab.gupta@intel.com>

Using MMIO based flips on VLV for Media power well residency optimization.
The blitter ring is currently being used just for command streamer based
flip calls. For pure 3D workloads, with MMIO flips, there will be no use
of blitter ring and this will ensure the 100% residency for Media well.

v2: The MMIO flips now use the interrupt driven mechanism for issuing the
flips when target seqno is reached. (Incorporating Ville's idea)

v3: Rebasing on latest code. Code restructuring after incorporating
Damien's comments

Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Akash Goel <akash.goel@intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c      |   1 +
 drivers/gpu/drm/i915/i915_drv.h      |   7 ++
 drivers/gpu/drm/i915/i915_irq.c      |   2 +
 drivers/gpu/drm/i915/intel_display.c | 124 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_drv.h     |   7 ++
 5 files changed, 141 insertions(+)

Comments

sourab.gupta@intel.com March 26, 2014, 7:49 a.m. UTC | #1
Hi Ville/Damien,
Can you please review the below patch(v3) for mmio flips.
Thanks,
Sourab

On Sun, 2014-03-23 at 09:01 +0000, Gupta, Sourab wrote:
> From: Sourab Gupta <sourab.gupta@intel.com>
> 
> Using MMIO based flips on VLV for Media power well residency optimization.
> The blitter ring is currently being used just for command streamer based
> flip calls. For pure 3D workloads, with MMIO flips, there will be no use
> of blitter ring and this will ensure the 100% residency for Media well.
> 
> v2: The MMIO flips now use the interrupt driven mechanism for issuing the
> flips when target seqno is reached. (Incorporating Ville's idea)
> 
> v3: Rebasing on latest code. Code restructuring after incorporating
> Damien's comments
> 
> Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
> Signed-off-by: Akash Goel <akash.goel@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_dma.c      |   1 +
>  drivers/gpu/drm/i915/i915_drv.h      |   7 ++
>  drivers/gpu/drm/i915/i915_irq.c      |   2 +
>  drivers/gpu/drm/i915/intel_display.c | 124 +++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_drv.h     |   7 ++
>  5 files changed, 141 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 4e0a26a..bca3c5a 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1569,6 +1569,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
>  	spin_lock_init(&dev_priv->backlight_lock);
>  	spin_lock_init(&dev_priv->uncore.lock);
>  	spin_lock_init(&dev_priv->mm.object_stat_lock);
> +	spin_lock_init(&dev_priv->mmio_flip_lock);
>  	mutex_init(&dev_priv->dpio_lock);
>  	mutex_init(&dev_priv->modeset_restore_lock);
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 3f62be0..678d31d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1621,6 +1621,10 @@ typedef struct drm_i915_private {
>  	struct i915_dri1_state dri1;
>  	/* Old ums support infrastructure, same warning applies. */
>  	struct i915_ums_state ums;
> +
> +	/* protects the mmio flip data */
> +	spinlock_t mmio_flip_lock;
> +
>  } drm_i915_private_t;
>  
>  static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
> @@ -2657,6 +2661,9 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data,
>  int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
>  			       struct drm_file *file);
>  
> +void intel_notify_mmio_flip(struct drm_device *dev,
> +			struct intel_ring_buffer *ring);
> +
>  /* overlay */
>  extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
>  extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 4b4aeb3..ad26abe 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1080,6 +1080,8 @@ static void notify_ring(struct drm_device *dev,
>  
>  	trace_i915_gem_request_complete(ring);
>  
> +	intel_notify_mmio_flip(dev, ring);
> +
>  	wake_up_all(&ring->irq_queue);
>  	i915_queue_hangcheck(dev);
>  }
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 7e4ea8d..19004bf 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -8782,6 +8782,125 @@ err:
>  	return ret;
>  }
>  
> +static int intel_do_mmio_flip(struct drm_device *dev,
> +			struct drm_crtc *crtc)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct intel_crtc *intel_crtc;
> +
> +	intel_crtc = to_intel_crtc(crtc);
> +
> +	intel_mark_page_flip_active(intel_crtc);
> +	return dev_priv->display.update_primary_plane(crtc, crtc->fb, 0, 0);
> +}
> +
> +static bool intel_postpone_flip(struct drm_i915_gem_object *obj)
> +{
> +	int ret;
> +	if (!obj->ring)
> +		return false;
> +
> +	if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, false),
> +			      obj->last_write_seqno))
> +		return false;
> +
> +	if (obj->last_write_seqno == obj->ring->outstanding_lazy_seqno) {
> +		ret = i915_add_request(obj->ring, NULL);
> +		if (WARN_ON(ret))
> +			return false;
> +	}
> +
> +	if (WARN_ON(!obj->ring->irq_get(obj->ring)))
> +		return false;
> +
> +	return true;
> +}
> +
> +void intel_notify_mmio_flip(struct drm_device *dev,
> +			struct intel_ring_buffer *ring)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_crtc *crtc;
> +	struct intel_crtc *intel_crtc;
> +	struct intel_mmio_flip *mmio_flip_data;
> +	unsigned long irq_flags;
> +	u32 seqno;
> +	enum pipe pipe;
> +
> +	BUG_ON(!ring);
> +
> +	seqno = ring->get_seqno(ring, false);
> +
> +	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
> +	for_each_pipe(pipe) {
> +		crtc = dev_priv->pipe_to_crtc_mapping[pipe];
> +		intel_crtc = to_intel_crtc(crtc);
> +		mmio_flip_data = &intel_crtc->mmio_flip_data;
> +		if ((mmio_flip_data->seqno != 0) &&
> +				(ring->id == mmio_flip_data->ring_id) &&
> +				(seqno >= mmio_flip_data->seqno)) {
> +			intel_do_mmio_flip(dev, crtc);
> +			mmio_flip_data->seqno = 0;
> +			ring->irq_put(ring);
> +		}
> +	}
> +
> +	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
> +}
> +
> +/* Using MMIO based flips starting from VLV, for Media power well
> + * residency optimization. The other alternative of having Render
> + * ring based flip calls is not being used, as the performance
> + * (FPS) of certain 3D Apps was getting severly affected.
> + */
> +static int intel_gen7_queue_mmio_flip(struct drm_device *dev,
> +			struct drm_crtc *crtc,
> +			struct drm_framebuffer *fb,
> +			struct drm_i915_gem_object *obj,
> +			uint32_t flags)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +	unsigned long irq_flags;
> +	int ret;
> +
> +	ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring);
> +	if (ret)
> +		goto err;
> +
> +	switch (intel_crtc->plane) {
> +	case PLANE_A:
> +	case PLANE_B:
> +	case PLANE_C:
> +	break;
> +	default:
> +		WARN_ONCE(1, "unknown plane in flip command\n");
> +		ret = -ENODEV;
> +		goto err_unpin;
> +	}
> +
> +	if (!intel_postpone_flip(obj)) {
> +		ret = intel_do_mmio_flip(dev, crtc);
> +		return ret;
> +	}
> +
> +	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
> +	intel_crtc->mmio_flip_data.seqno = obj->last_write_seqno;
> +	intel_crtc->mmio_flip_data.ring_id = obj->ring->id;
> +	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
> +
> +	/* Double check to catch cases where irq fired before
> +	 * mmio flip data was ready
> +	 */
> +	intel_notify_mmio_flip(dev, obj->ring);
> +	return 0;
> +
> +err_unpin:
> +	intel_unpin_fb_obj(obj);
> +err:
> +	return ret;
> +}
> +
>  static int intel_gen7_queue_flip(struct drm_device *dev,
>  				 struct drm_crtc *crtc,
>  				 struct drm_framebuffer *fb,
> @@ -10577,6 +10696,8 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
>  		DRM_DEBUG_KMS("swapping pipes & planes for FBC\n");
>  		intel_crtc->plane = !pipe;
>  	}
> +	if (IS_VALLEYVIEW(dev))
> +			intel_crtc->mmio_flip_data.seqno = 0;
>  
>  	BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) ||
>  	       dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL);
> @@ -11107,6 +11228,9 @@ static void intel_init_display(struct drm_device *dev)
>  	}
>  
>  	intel_panel_init_backlight_funcs(dev);
> +
> +	if (IS_VALLEYVIEW(dev))
> +		dev_priv->display.queue_flip = intel_gen7_queue_mmio_flip;
>  }
>  
>  /*
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index fa99104..f0b26a1 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -344,6 +344,11 @@ struct intel_pipe_wm {
>  	bool fbc_wm_enabled;
>  };
>  
> +struct intel_mmio_flip {
> +	u32 seqno;
> +	u32 ring_id;
> +};
> +
>  struct intel_crtc {
>  	struct drm_crtc base;
>  	enum pipe pipe;
> @@ -395,6 +400,8 @@ struct intel_crtc {
>  		/* watermarks currently being used  */
>  		struct intel_pipe_wm active;
>  	} wm;
> +
> +	struct intel_mmio_flip	mmio_flip_data;
>  };
>  
>  struct intel_plane_wm_parameters {
sourab.gupta@intel.com April 3, 2014, 8:40 a.m. UTC | #2
On Wed, 2014-03-26 at 13:20 +0530, sourab gupta wrote:
> Hi Ville/Damien,
> Can you please review the below patch(v3) for mmio flips.
> Thanks,
> Sourab
> 
> On Sun, 2014-03-23 at 09:01 +0000, Gupta, Sourab wrote:
> > From: Sourab Gupta <sourab.gupta@intel.com>
> > 
> > Using MMIO based flips on VLV for Media power well residency optimization.
> > The blitter ring is currently being used just for command streamer based
> > flip calls. For pure 3D workloads, with MMIO flips, there will be no use
> > of blitter ring and this will ensure the 100% residency for Media well.
> > 
> > v2: The MMIO flips now use the interrupt driven mechanism for issuing the
> > flips when target seqno is reached. (Incorporating Ville's idea)
> > 
> > v3: Rebasing on latest code. Code restructuring after incorporating
> > Damien's comments
> > 
> > Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
> > Signed-off-by: Akash Goel <akash.goel@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_dma.c      |   1 +
> >  drivers/gpu/drm/i915/i915_drv.h      |   7 ++
> >  drivers/gpu/drm/i915/i915_irq.c      |   2 +
> >  drivers/gpu/drm/i915/intel_display.c | 124 +++++++++++++++++++++++++++++++++++
> >  drivers/gpu/drm/i915/intel_drv.h     |   7 ++
> >  5 files changed, 141 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> > index 4e0a26a..bca3c5a 100644
> > --- a/drivers/gpu/drm/i915/i915_dma.c
> > +++ b/drivers/gpu/drm/i915/i915_dma.c
> > @@ -1569,6 +1569,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
> >  	spin_lock_init(&dev_priv->backlight_lock);
> >  	spin_lock_init(&dev_priv->uncore.lock);
> >  	spin_lock_init(&dev_priv->mm.object_stat_lock);
> > +	spin_lock_init(&dev_priv->mmio_flip_lock);
> >  	mutex_init(&dev_priv->dpio_lock);
> >  	mutex_init(&dev_priv->modeset_restore_lock);
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 3f62be0..678d31d 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1621,6 +1621,10 @@ typedef struct drm_i915_private {
> >  	struct i915_dri1_state dri1;
> >  	/* Old ums support infrastructure, same warning applies. */
> >  	struct i915_ums_state ums;
> > +
> > +	/* protects the mmio flip data */
> > +	spinlock_t mmio_flip_lock;
> > +
> >  } drm_i915_private_t;
> >  
> >  static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
> > @@ -2657,6 +2661,9 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data,
> >  int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
> >  			       struct drm_file *file);
> >  
> > +void intel_notify_mmio_flip(struct drm_device *dev,
> > +			struct intel_ring_buffer *ring);
> > +
> >  /* overlay */
> >  extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
> >  extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index 4b4aeb3..ad26abe 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -1080,6 +1080,8 @@ static void notify_ring(struct drm_device *dev,
> >  
> >  	trace_i915_gem_request_complete(ring);
> >  
> > +	intel_notify_mmio_flip(dev, ring);
> > +
> >  	wake_up_all(&ring->irq_queue);
> >  	i915_queue_hangcheck(dev);
> >  }
> > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > index 7e4ea8d..19004bf 100644
> > --- a/drivers/gpu/drm/i915/intel_display.c
> > +++ b/drivers/gpu/drm/i915/intel_display.c
> > @@ -8782,6 +8782,125 @@ err:
> >  	return ret;
> >  }
> >  
> > +static int intel_do_mmio_flip(struct drm_device *dev,
> > +			struct drm_crtc *crtc)
> > +{
> > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > +	struct intel_crtc *intel_crtc;
> > +
> > +	intel_crtc = to_intel_crtc(crtc);
> > +
> > +	intel_mark_page_flip_active(intel_crtc);
> > +	return dev_priv->display.update_primary_plane(crtc, crtc->fb, 0, 0);
> > +}
> > +
> > +static bool intel_postpone_flip(struct drm_i915_gem_object *obj)
> > +{
> > +	int ret;
> > +	if (!obj->ring)
> > +		return false;
> > +
> > +	if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, false),
> > +			      obj->last_write_seqno))
> > +		return false;
> > +
> > +	if (obj->last_write_seqno == obj->ring->outstanding_lazy_seqno) {
> > +		ret = i915_add_request(obj->ring, NULL);
> > +		if (WARN_ON(ret))
> > +			return false;
> > +	}
> > +
> > +	if (WARN_ON(!obj->ring->irq_get(obj->ring)))
> > +		return false;
> > +
> > +	return true;
> > +}
> > +
> > +void intel_notify_mmio_flip(struct drm_device *dev,
> > +			struct intel_ring_buffer *ring)
> > +{
> > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > +	struct drm_crtc *crtc;
> > +	struct intel_crtc *intel_crtc;
> > +	struct intel_mmio_flip *mmio_flip_data;
> > +	unsigned long irq_flags;
> > +	u32 seqno;
> > +	enum pipe pipe;
> > +
> > +	BUG_ON(!ring);
> > +
> > +	seqno = ring->get_seqno(ring, false);
> > +
> > +	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
> > +	for_each_pipe(pipe) {
> > +		crtc = dev_priv->pipe_to_crtc_mapping[pipe];
> > +		intel_crtc = to_intel_crtc(crtc);
> > +		mmio_flip_data = &intel_crtc->mmio_flip_data;
> > +		if ((mmio_flip_data->seqno != 0) &&
> > +				(ring->id == mmio_flip_data->ring_id) &&
> > +				(seqno >= mmio_flip_data->seqno)) {
> > +			intel_do_mmio_flip(dev, crtc);
> > +			mmio_flip_data->seqno = 0;
> > +			ring->irq_put(ring);
> > +		}
> > +	}
> > +
> > +	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
> > +}
> > +
> > +/* Using MMIO based flips starting from VLV, for Media power well
> > + * residency optimization. The other alternative of having Render
> > + * ring based flip calls is not being used, as the performance
> > + * (FPS) of certain 3D Apps was getting severly affected.
> > + */
> > +static int intel_gen7_queue_mmio_flip(struct drm_device *dev,
> > +			struct drm_crtc *crtc,
> > +			struct drm_framebuffer *fb,
> > +			struct drm_i915_gem_object *obj,
> > +			uint32_t flags)
> > +{
> > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > +	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> > +	unsigned long irq_flags;
> > +	int ret;
> > +
> > +	ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring);
> > +	if (ret)
> > +		goto err;
> > +
> > +	switch (intel_crtc->plane) {
> > +	case PLANE_A:
> > +	case PLANE_B:
> > +	case PLANE_C:
> > +	break;
> > +	default:
> > +		WARN_ONCE(1, "unknown plane in flip command\n");
> > +		ret = -ENODEV;
> > +		goto err_unpin;
> > +	}
> > +
> > +	if (!intel_postpone_flip(obj)) {
> > +		ret = intel_do_mmio_flip(dev, crtc);
> > +		return ret;
> > +	}
> > +
> > +	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
> > +	intel_crtc->mmio_flip_data.seqno = obj->last_write_seqno;
> > +	intel_crtc->mmio_flip_data.ring_id = obj->ring->id;
> > +	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
> > +
> > +	/* Double check to catch cases where irq fired before
> > +	 * mmio flip data was ready
> > +	 */
> > +	intel_notify_mmio_flip(dev, obj->ring);
> > +	return 0;
> > +
> > +err_unpin:
> > +	intel_unpin_fb_obj(obj);
> > +err:
> > +	return ret;
> > +}
> > +
> >  static int intel_gen7_queue_flip(struct drm_device *dev,
> >  				 struct drm_crtc *crtc,
> >  				 struct drm_framebuffer *fb,
> > @@ -10577,6 +10696,8 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
> >  		DRM_DEBUG_KMS("swapping pipes & planes for FBC\n");
> >  		intel_crtc->plane = !pipe;
> >  	}
> > +	if (IS_VALLEYVIEW(dev))
> > +			intel_crtc->mmio_flip_data.seqno = 0;
> >  
> >  	BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) ||
> >  	       dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL);
> > @@ -11107,6 +11228,9 @@ static void intel_init_display(struct drm_device *dev)
> >  	}
> >  
> >  	intel_panel_init_backlight_funcs(dev);
> > +
> > +	if (IS_VALLEYVIEW(dev))
> > +		dev_priv->display.queue_flip = intel_gen7_queue_mmio_flip;
> >  }
> >  
> >  /*
> > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> > index fa99104..f0b26a1 100644
> > --- a/drivers/gpu/drm/i915/intel_drv.h
> > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > @@ -344,6 +344,11 @@ struct intel_pipe_wm {
> >  	bool fbc_wm_enabled;
> >  };
> >  
> > +struct intel_mmio_flip {
> > +	u32 seqno;
> > +	u32 ring_id;
> > +};
> > +
> >  struct intel_crtc {
> >  	struct drm_crtc base;
> >  	enum pipe pipe;
> > @@ -395,6 +400,8 @@ struct intel_crtc {
> >  		/* watermarks currently being used  */
> >  		struct intel_pipe_wm active;
> >  	} wm;
> > +
> > +	struct intel_mmio_flip	mmio_flip_data;
> >  };
> >  
> >  struct intel_plane_wm_parameters {
> 

Hi Ville,
A gentle reminder to review the patch.

Thanks,
Sourab
sourab.gupta@intel.com April 7, 2014, 11:19 a.m. UTC | #3
On Thu, 2014-04-03 at 14:11 +0530, sourab gupta wrote:
> On Wed, 2014-03-26 at 13:20 +0530, sourab gupta wrote:
> > Hi Ville/Damien,
> > Can you please review the below patch(v3) for mmio flips.
> > Thanks,
> > Sourab
> > 
> > On Sun, 2014-03-23 at 09:01 +0000, Gupta, Sourab wrote:
> > > From: Sourab Gupta <sourab.gupta@intel.com>
> > > 
> > > Using MMIO based flips on VLV for Media power well residency optimization.
> > > The blitter ring is currently being used just for command streamer based
> > > flip calls. For pure 3D workloads, with MMIO flips, there will be no use
> > > of blitter ring and this will ensure the 100% residency for Media well.
> > > 
> > > v2: The MMIO flips now use the interrupt driven mechanism for issuing the
> > > flips when target seqno is reached. (Incorporating Ville's idea)
> > > 
> > > v3: Rebasing on latest code. Code restructuring after incorporating
> > > Damien's comments
> > > 
> > > Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
> > > Signed-off-by: Akash Goel <akash.goel@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/i915_dma.c      |   1 +
> > >  drivers/gpu/drm/i915/i915_drv.h      |   7 ++
> > >  drivers/gpu/drm/i915/i915_irq.c      |   2 +
> > >  drivers/gpu/drm/i915/intel_display.c | 124 +++++++++++++++++++++++++++++++++++
> > >  drivers/gpu/drm/i915/intel_drv.h     |   7 ++
> > >  5 files changed, 141 insertions(+)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> > > index 4e0a26a..bca3c5a 100644
> > > --- a/drivers/gpu/drm/i915/i915_dma.c
> > > +++ b/drivers/gpu/drm/i915/i915_dma.c
> > > @@ -1569,6 +1569,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
> > >  	spin_lock_init(&dev_priv->backlight_lock);
> > >  	spin_lock_init(&dev_priv->uncore.lock);
> > >  	spin_lock_init(&dev_priv->mm.object_stat_lock);
> > > +	spin_lock_init(&dev_priv->mmio_flip_lock);
> > >  	mutex_init(&dev_priv->dpio_lock);
> > >  	mutex_init(&dev_priv->modeset_restore_lock);
> > >  
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > > index 3f62be0..678d31d 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -1621,6 +1621,10 @@ typedef struct drm_i915_private {
> > >  	struct i915_dri1_state dri1;
> > >  	/* Old ums support infrastructure, same warning applies. */
> > >  	struct i915_ums_state ums;
> > > +
> > > +	/* protects the mmio flip data */
> > > +	spinlock_t mmio_flip_lock;
> > > +
> > >  } drm_i915_private_t;
> > >  
> > >  static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
> > > @@ -2657,6 +2661,9 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data,
> > >  int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
> > >  			       struct drm_file *file);
> > >  
> > > +void intel_notify_mmio_flip(struct drm_device *dev,
> > > +			struct intel_ring_buffer *ring);
> > > +
> > >  /* overlay */
> > >  extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
> > >  extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
> > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > > index 4b4aeb3..ad26abe 100644
> > > --- a/drivers/gpu/drm/i915/i915_irq.c
> > > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > > @@ -1080,6 +1080,8 @@ static void notify_ring(struct drm_device *dev,
> > >  
> > >  	trace_i915_gem_request_complete(ring);
> > >  
> > > +	intel_notify_mmio_flip(dev, ring);
> > > +
> > >  	wake_up_all(&ring->irq_queue);
> > >  	i915_queue_hangcheck(dev);
> > >  }
> > > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > > index 7e4ea8d..19004bf 100644
> > > --- a/drivers/gpu/drm/i915/intel_display.c
> > > +++ b/drivers/gpu/drm/i915/intel_display.c
> > > @@ -8782,6 +8782,125 @@ err:
> > >  	return ret;
> > >  }
> > >  
> > > +static int intel_do_mmio_flip(struct drm_device *dev,
> > > +			struct drm_crtc *crtc)
> > > +{
> > > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > > +	struct intel_crtc *intel_crtc;
> > > +
> > > +	intel_crtc = to_intel_crtc(crtc);
> > > +
> > > +	intel_mark_page_flip_active(intel_crtc);
> > > +	return dev_priv->display.update_primary_plane(crtc, crtc->fb, 0, 0);
> > > +}
> > > +
> > > +static bool intel_postpone_flip(struct drm_i915_gem_object *obj)
> > > +{
> > > +	int ret;
> > > +	if (!obj->ring)
> > > +		return false;
> > > +
> > > +	if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, false),
> > > +			      obj->last_write_seqno))
> > > +		return false;
> > > +
> > > +	if (obj->last_write_seqno == obj->ring->outstanding_lazy_seqno) {
> > > +		ret = i915_add_request(obj->ring, NULL);
> > > +		if (WARN_ON(ret))
> > > +			return false;
> > > +	}
> > > +
> > > +	if (WARN_ON(!obj->ring->irq_get(obj->ring)))
> > > +		return false;
> > > +
> > > +	return true;
> > > +}
> > > +
> > > +void intel_notify_mmio_flip(struct drm_device *dev,
> > > +			struct intel_ring_buffer *ring)
> > > +{
> > > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > > +	struct drm_crtc *crtc;
> > > +	struct intel_crtc *intel_crtc;
> > > +	struct intel_mmio_flip *mmio_flip_data;
> > > +	unsigned long irq_flags;
> > > +	u32 seqno;
> > > +	enum pipe pipe;
> > > +
> > > +	BUG_ON(!ring);
> > > +
> > > +	seqno = ring->get_seqno(ring, false);
> > > +
> > > +	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
> > > +	for_each_pipe(pipe) {
> > > +		crtc = dev_priv->pipe_to_crtc_mapping[pipe];
> > > +		intel_crtc = to_intel_crtc(crtc);
> > > +		mmio_flip_data = &intel_crtc->mmio_flip_data;
> > > +		if ((mmio_flip_data->seqno != 0) &&
> > > +				(ring->id == mmio_flip_data->ring_id) &&
> > > +				(seqno >= mmio_flip_data->seqno)) {
> > > +			intel_do_mmio_flip(dev, crtc);
> > > +			mmio_flip_data->seqno = 0;
> > > +			ring->irq_put(ring);
> > > +		}
> > > +	}
> > > +
> > > +	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
> > > +}
> > > +
> > > +/* Using MMIO based flips starting from VLV, for Media power well
> > > + * residency optimization. The other alternative of having Render
> > > + * ring based flip calls is not being used, as the performance
> > > + * (FPS) of certain 3D Apps was getting severly affected.
> > > + */
> > > +static int intel_gen7_queue_mmio_flip(struct drm_device *dev,
> > > +			struct drm_crtc *crtc,
> > > +			struct drm_framebuffer *fb,
> > > +			struct drm_i915_gem_object *obj,
> > > +			uint32_t flags)
> > > +{
> > > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > > +	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> > > +	unsigned long irq_flags;
> > > +	int ret;
> > > +
> > > +	ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring);
> > > +	if (ret)
> > > +		goto err;
> > > +
> > > +	switch (intel_crtc->plane) {
> > > +	case PLANE_A:
> > > +	case PLANE_B:
> > > +	case PLANE_C:
> > > +	break;
> > > +	default:
> > > +		WARN_ONCE(1, "unknown plane in flip command\n");
> > > +		ret = -ENODEV;
> > > +		goto err_unpin;
> > > +	}
> > > +
> > > +	if (!intel_postpone_flip(obj)) {
> > > +		ret = intel_do_mmio_flip(dev, crtc);
> > > +		return ret;
> > > +	}
> > > +
> > > +	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
> > > +	intel_crtc->mmio_flip_data.seqno = obj->last_write_seqno;
> > > +	intel_crtc->mmio_flip_data.ring_id = obj->ring->id;
> > > +	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
> > > +
> > > +	/* Double check to catch cases where irq fired before
> > > +	 * mmio flip data was ready
> > > +	 */
> > > +	intel_notify_mmio_flip(dev, obj->ring);
> > > +	return 0;
> > > +
> > > +err_unpin:
> > > +	intel_unpin_fb_obj(obj);
> > > +err:
> > > +	return ret;
> > > +}
> > > +
> > >  static int intel_gen7_queue_flip(struct drm_device *dev,
> > >  				 struct drm_crtc *crtc,
> > >  				 struct drm_framebuffer *fb,
> > > @@ -10577,6 +10696,8 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
> > >  		DRM_DEBUG_KMS("swapping pipes & planes for FBC\n");
> > >  		intel_crtc->plane = !pipe;
> > >  	}
> > > +	if (IS_VALLEYVIEW(dev))
> > > +			intel_crtc->mmio_flip_data.seqno = 0;
> > >  
> > >  	BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) ||
> > >  	       dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL);
> > > @@ -11107,6 +11228,9 @@ static void intel_init_display(struct drm_device *dev)
> > >  	}
> > >  
> > >  	intel_panel_init_backlight_funcs(dev);
> > > +
> > > +	if (IS_VALLEYVIEW(dev))
> > > +		dev_priv->display.queue_flip = intel_gen7_queue_mmio_flip;
> > >  }
> > >  
> > >  /*
> > > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> > > index fa99104..f0b26a1 100644
> > > --- a/drivers/gpu/drm/i915/intel_drv.h
> > > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > > @@ -344,6 +344,11 @@ struct intel_pipe_wm {
> > >  	bool fbc_wm_enabled;
> > >  };
> > >  
> > > +struct intel_mmio_flip {
> > > +	u32 seqno;
> > > +	u32 ring_id;
> > > +};
> > > +
> > >  struct intel_crtc {
> > >  	struct drm_crtc base;
> > >  	enum pipe pipe;
> > > @@ -395,6 +400,8 @@ struct intel_crtc {
> > >  		/* watermarks currently being used  */
> > >  		struct intel_pipe_wm active;
> > >  	} wm;
> > > +
> > > +	struct intel_mmio_flip	mmio_flip_data;
> > >  };
> > >  
> > >  struct intel_plane_wm_parameters {
> > 
> 
> Hi Ville,
> A gentle reminder to review the patch.
> 
> Thanks,
> Sourab
> 

Gentle reminder to review the patch. (Been more than 2 weeks since last
version was sent.)

Thanks,
Sourab
Ville Syrjala May 9, 2014, 11:59 a.m. UTC | #4
On Sun, Mar 23, 2014 at 02:31:05PM +0530, sourab.gupta@intel.com wrote:
> From: Sourab Gupta <sourab.gupta@intel.com>
> 
> Using MMIO based flips on VLV for Media power well residency optimization.
> The blitter ring is currently being used just for command streamer based
> flip calls. For pure 3D workloads, with MMIO flips, there will be no use
> of blitter ring and this will ensure the 100% residency for Media well.

Sorry for dragging my feet with reviewing this. I'm hoping this is the
latest version...

> 
> v2: The MMIO flips now use the interrupt driven mechanism for issuing the
> flips when target seqno is reached. (Incorporating Ville's idea)
> 
> v3: Rebasing on latest code. Code restructuring after incorporating
> Damien's comments
> 
> Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
> Signed-off-by: Akash Goel <akash.goel@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_dma.c      |   1 +
>  drivers/gpu/drm/i915/i915_drv.h      |   7 ++
>  drivers/gpu/drm/i915/i915_irq.c      |   2 +
>  drivers/gpu/drm/i915/intel_display.c | 124 +++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_drv.h     |   7 ++
>  5 files changed, 141 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 4e0a26a..bca3c5a 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1569,6 +1569,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
>  	spin_lock_init(&dev_priv->backlight_lock);
>  	spin_lock_init(&dev_priv->uncore.lock);
>  	spin_lock_init(&dev_priv->mm.object_stat_lock);
> +	spin_lock_init(&dev_priv->mmio_flip_lock);
>  	mutex_init(&dev_priv->dpio_lock);
>  	mutex_init(&dev_priv->modeset_restore_lock);
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 3f62be0..678d31d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1621,6 +1621,10 @@ typedef struct drm_i915_private {
>  	struct i915_dri1_state dri1;
>  	/* Old ums support infrastructure, same warning applies. */
>  	struct i915_ums_state ums;
> +
> +	/* protects the mmio flip data */
> +	spinlock_t mmio_flip_lock;
> +
>  } drm_i915_private_t;
>  
>  static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
> @@ -2657,6 +2661,9 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data,
>  int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
>  			       struct drm_file *file);
>  
> +void intel_notify_mmio_flip(struct drm_device *dev,
> +			struct intel_ring_buffer *ring);
> +
>  /* overlay */
>  extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
>  extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 4b4aeb3..ad26abe 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1080,6 +1080,8 @@ static void notify_ring(struct drm_device *dev,
>  
>  	trace_i915_gem_request_complete(ring);
>  
> +	intel_notify_mmio_flip(dev, ring);
> +
>  	wake_up_all(&ring->irq_queue);
>  	i915_queue_hangcheck(dev);
>  }
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 7e4ea8d..19004bf 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -8782,6 +8782,125 @@ err:
>  	return ret;
>  }
>  
> +static int intel_do_mmio_flip(struct drm_device *dev,
> +			struct drm_crtc *crtc)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct intel_crtc *intel_crtc;
> +
> +	intel_crtc = to_intel_crtc(crtc);

nit: could be part of intel_crtc declaration

> +
> +	intel_mark_page_flip_active(intel_crtc);
> +	return dev_priv->display.update_primary_plane(crtc, crtc->fb, 0, 0);

Needs to pass crtc->{x,y} instead of 0,0.

I was a bit worried crtc->fb might be changed already at this point, but
after thinking a bit it should be fine since the presense of unpin_work
will keep intel_crtc_page_flip() from frobbing with it and we always
call intel_crtc_wait_for_pending_flips() before set_base.

Just need to update to use crtc->primary->fb now.

I'm thinking we also have a small race here with a flip done interrupt
from a previous set_base. Probably we need to sort it out using the 
SURFLIVE and/or flip counter like I did for the mmio vs. cs flip
race. But I need to think on this a bit more. Perhaps you want to also
look at those patches a bit.

> +}
> +
> +static bool intel_postpone_flip(struct drm_i915_gem_object *obj)
> +{
> +	int ret;

nit: needs an empty line between declarations and code.

> +	if (!obj->ring)
> +		return false;
> +
> +	if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, false),
> +			      obj->last_write_seqno))

Maybe this should just do a very light check using lazy_coherency?

> +		return false;
> +
> +	if (obj->last_write_seqno == obj->ring->outstanding_lazy_seqno) {
> +		ret = i915_add_request(obj->ring, NULL);
> +		if (WARN_ON(ret))
> +			return false;
> +	}

Looks like i915_gem_check_olr(), so maybe make it non-static and use
here.

> +
> +	if (WARN_ON(!obj->ring->irq_get(obj->ring)))
> +		return false;

Maybe do this before the checking outstanding_lazy_seqno. Hmm, or
perhaps not. It won't actaully eliminate the race entirely so you
still have to do the manual check later. I guess having it in this
order keeps the error paths simpler.

> +
> +	return true;
> +}
> +
> +void intel_notify_mmio_flip(struct drm_device *dev,
> +			struct intel_ring_buffer *ring)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_crtc *crtc;
> +	struct intel_crtc *intel_crtc;
> +	struct intel_mmio_flip *mmio_flip_data;
> +	unsigned long irq_flags;
> +	u32 seqno;
> +	enum pipe pipe;
> +
> +	BUG_ON(!ring);

No need for the BUG. It'll explode on the next line anyway if
ring==NULL.

> +
> +	seqno = ring->get_seqno(ring, false);
> +`
> +	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
> +	for_each_pipe(pipe) {

list_for_each_entry() seems more appropriate since you don't use the
pipe variable for anything else besides looking up the crtc.

You could also move some of the variable declarations inside the loop
since they're not needed on the outside.

> +		crtc = dev_priv->pipe_to_crtc_mapping[pipe];
> +		intel_crtc = to_intel_crtc(crtc);
> +		mmio_flip_data = &intel_crtc->mmio_flip_data;
> +		if ((mmio_flip_data->seqno != 0) &&
> +				(ring->id == mmio_flip_data->ring_id) &&
> +				(seqno >= mmio_flip_data->seqno)) {

Weird indentation and a bit too many parens for my taste.

Should also use i915_seqno_passed() here.

Special casing seqno 0 this way seems safe enough since we apparently
skip seqno 0 always.

> +			intel_do_mmio_flip(dev, crtc);
> +			mmio_flip_data->seqno = 0;
> +			ring->irq_put(ring);
> +		}
> +	}
> +
> +	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
> +}
> +
> +/* Using MMIO based flips starting from VLV, for Media power well
> + * residency optimization. The other alternative of having Render
> + * ring based flip calls is not being used, as the performance
> + * (FPS) of certain 3D Apps was getting severly affected.
> + */
> +static int intel_gen7_queue_mmio_flip(struct drm_device *dev,
> +			struct drm_crtc *crtc,
> +			struct drm_framebuffer *fb,
> +			struct drm_i915_gem_object *obj,
> +			uint32_t flags)

There's nothing gen7 specific here. So you could just rename the
function to eg. intel_queue_mmio_flip(). Maybe also move the
comment about VLV to where you set up the function pointer.

> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +	unsigned long irq_flags;
> +	int ret;
> +
> +	ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring);
> +	if (ret)
> +		goto err;
> +
> +	switch (intel_crtc->plane) {
> +	case PLANE_A:
> +	case PLANE_B:
> +	case PLANE_C:
> +	break;
> +	default:
> +		WARN_ONCE(1, "unknown plane in flip command\n");
> +		ret = -ENODEV;
> +		goto err_unpin;
> +	}

I think you can drop this plane check. We should hopefully catch such
bugs elsewhere already.

> +
> +	if (!intel_postpone_flip(obj)) {
> +		ret = intel_do_mmio_flip(dev, crtc);
> +		return ret;

Just 'return intel_do_mmio_flip(...'

Actually I think you can just make intel_do_mmio_flip() void since
.update_primary_plane() can't really fail. I think Daniel even has a
patch lined up to make it void as well.

> +	}
> +
> +	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
> +	intel_crtc->mmio_flip_data.seqno = obj->last_write_seqno;
> +	intel_crtc->mmio_flip_data.ring_id = obj->ring->id;
> +	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
> +
> +	/* Double check to catch cases where irq fired before
> +	 * mmio flip data was ready
> +	 */
> +	intel_notify_mmio_flip(dev, obj->ring);
> +	return 0;
> +
> +err_unpin:
> +	intel_unpin_fb_obj(obj);
> +err:
> +	return ret;
> +}
> +
>  static int intel_gen7_queue_flip(struct drm_device *dev,
>  				 struct drm_crtc *crtc,
>  				 struct drm_framebuffer *fb,
> @@ -10577,6 +10696,8 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
>  		DRM_DEBUG_KMS("swapping pipes & planes for FBC\n");
>  		intel_crtc->plane = !pipe;
>  	}
> +	if (IS_VALLEYVIEW(dev))
> +			intel_crtc->mmio_flip_data.seqno = 0;

Not needed. It's kzalloced so everything starts zeroed.

>  
>  	BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) ||
>  	       dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL);
> @@ -11107,6 +11228,9 @@ static void intel_init_display(struct drm_device *dev)
>  	}
>  
>  	intel_panel_init_backlight_funcs(dev);
> +
> +	if (IS_VALLEYVIEW(dev))
> +		dev_priv->display.queue_flip = intel_gen7_queue_mmio_flip;

Would look a bit cleaner to do this before
intel_panel_init_backlight_funcs(). That will keep the .queue_flip
assignments closer together.

I'm also thinking that maybe we want a module parameter to select
between CS vs. mmio flips. It could default to mmio flips on VLV
and CS flips on other platforms if it's generally accepted that mmio
flips are better on VLV. That can at least get us a bit more testing
coverage if people don't need a specific platform to try it out.

On the whole I think it looks fairly good, and it should be fairly easy
to extend it more for nuclear flips.

>  }
>  
>  /*
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index fa99104..f0b26a1 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -344,6 +344,11 @@ struct intel_pipe_wm {
>  	bool fbc_wm_enabled;
>  };
>  
> +struct intel_mmio_flip {
> +	u32 seqno;
> +	u32 ring_id;
> +};
> +
>  struct intel_crtc {
>  	struct drm_crtc base;
>  	enum pipe pipe;
> @@ -395,6 +400,8 @@ struct intel_crtc {
>  		/* watermarks currently being used  */
>  		struct intel_pipe_wm active;
>  	} wm;
> +
> +	struct intel_mmio_flip	mmio_flip_data;
>  };
>  
>  struct intel_plane_wm_parameters {
> -- 
> 1.8.5.1
Ville Syrjala May 9, 2014, 1:28 p.m. UTC | #5
On Fri, May 09, 2014 at 02:59:42PM +0300, Ville Syrjälä wrote:
> On Sun, Mar 23, 2014 at 02:31:05PM +0530, sourab.gupta@intel.com wrote:
> > From: Sourab Gupta <sourab.gupta@intel.com>
> > 
> > Using MMIO based flips on VLV for Media power well residency optimization.
> > The blitter ring is currently being used just for command streamer based
> > flip calls. For pure 3D workloads, with MMIO flips, there will be no use
> > of blitter ring and this will ensure the 100% residency for Media well.
> 
> Sorry for dragging my feet with reviewing this. I'm hoping this is the
> latest version...
> 
> > 
> > v2: The MMIO flips now use the interrupt driven mechanism for issuing the
> > flips when target seqno is reached. (Incorporating Ville's idea)
> > 
> > v3: Rebasing on latest code. Code restructuring after incorporating
> > Damien's comments
> > 
> > Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
> > Signed-off-by: Akash Goel <akash.goel@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_dma.c      |   1 +
> >  drivers/gpu/drm/i915/i915_drv.h      |   7 ++
> >  drivers/gpu/drm/i915/i915_irq.c      |   2 +
> >  drivers/gpu/drm/i915/intel_display.c | 124 +++++++++++++++++++++++++++++++++++
> >  drivers/gpu/drm/i915/intel_drv.h     |   7 ++
> >  5 files changed, 141 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> > index 4e0a26a..bca3c5a 100644
> > --- a/drivers/gpu/drm/i915/i915_dma.c
> > +++ b/drivers/gpu/drm/i915/i915_dma.c
> > @@ -1569,6 +1569,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
> >  	spin_lock_init(&dev_priv->backlight_lock);
> >  	spin_lock_init(&dev_priv->uncore.lock);
> >  	spin_lock_init(&dev_priv->mm.object_stat_lock);
> > +	spin_lock_init(&dev_priv->mmio_flip_lock);
> >  	mutex_init(&dev_priv->dpio_lock);
> >  	mutex_init(&dev_priv->modeset_restore_lock);
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 3f62be0..678d31d 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1621,6 +1621,10 @@ typedef struct drm_i915_private {
> >  	struct i915_dri1_state dri1;
> >  	/* Old ums support infrastructure, same warning applies. */
> >  	struct i915_ums_state ums;
> > +
> > +	/* protects the mmio flip data */
> > +	spinlock_t mmio_flip_lock;
> > +
> >  } drm_i915_private_t;
> >  
> >  static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
> > @@ -2657,6 +2661,9 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data,
> >  int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
> >  			       struct drm_file *file);
> >  
> > +void intel_notify_mmio_flip(struct drm_device *dev,
> > +			struct intel_ring_buffer *ring);
> > +
> >  /* overlay */
> >  extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
> >  extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index 4b4aeb3..ad26abe 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -1080,6 +1080,8 @@ static void notify_ring(struct drm_device *dev,
> >  
> >  	trace_i915_gem_request_complete(ring);
> >  
> > +	intel_notify_mmio_flip(dev, ring);
> > +
> >  	wake_up_all(&ring->irq_queue);
> >  	i915_queue_hangcheck(dev);
> >  }
> > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > index 7e4ea8d..19004bf 100644
> > --- a/drivers/gpu/drm/i915/intel_display.c
> > +++ b/drivers/gpu/drm/i915/intel_display.c
> > @@ -8782,6 +8782,125 @@ err:
> >  	return ret;
> >  }
> >  
> > +static int intel_do_mmio_flip(struct drm_device *dev,
> > +			struct drm_crtc *crtc)
> > +{
> > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > +	struct intel_crtc *intel_crtc;
> > +
> > +	intel_crtc = to_intel_crtc(crtc);
> 
> nit: could be part of intel_crtc declaration
> 
> > +
> > +	intel_mark_page_flip_active(intel_crtc);
> > +	return dev_priv->display.update_primary_plane(crtc, crtc->fb, 0, 0);
> 
> Needs to pass crtc->{x,y} instead of 0,0.
> 
> I was a bit worried crtc->fb might be changed already at this point, but
> after thinking a bit it should be fine since the presense of unpin_work
> will keep intel_crtc_page_flip() from frobbing with it and we always
> call intel_crtc_wait_for_pending_flips() before set_base.
> 
> Just need to update to use crtc->primary->fb now.
> 
> I'm thinking we also have a small race here with a flip done interrupt
> from a previous set_base. Probably we need to sort it out using the 
> SURFLIVE and/or flip counter like I did for the mmio vs. cs flip
> race. But I need to think on this a bit more. Perhaps you want to also
> look at those patches a bit.

Oh another thing here is that we update_primary_plane() isn't guaranteed
to be atomic. We could start to use the atomic pipe update mechanism here
already but then we need to schedule a work so that we can perform the
vblank evade trick. That's going to be needed for the nuclear flip
anyway.

Another option would be to only update the base address and leave the
offset registers alone just like the CS flip does.
Ville Syrjala May 9, 2014, 5:18 p.m. UTC | #6
On Fri, May 09, 2014 at 02:59:42PM +0300, Ville Syrjälä wrote:
> On Sun, Mar 23, 2014 at 02:31:05PM +0530, sourab.gupta@intel.com wrote:
> > +			intel_do_mmio_flip(dev, crtc);
> > +			mmio_flip_data->seqno = 0;
> > +			ring->irq_put(ring);
> > +		}
> > +	}
> > +
> > +	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
> > +}
> > +
> > +/* Using MMIO based flips starting from VLV, for Media power well
> > + * residency optimization. The other alternative of having Render
> > + * ring based flip calls is not being used, as the performance
> > + * (FPS) of certain 3D Apps was getting severly affected.
> > + */
> > +static int intel_gen7_queue_mmio_flip(struct drm_device *dev,
> > +			struct drm_crtc *crtc,
> > +			struct drm_framebuffer *fb,
> > +			struct drm_i915_gem_object *obj,
> > +			uint32_t flags)
> 
> There's nothing gen7 specific here. So you could just rename the
> function to eg. intel_queue_mmio_flip(). Maybe also move the
> comment about VLV to where you set up the function pointer.

Actually this code isn't entirely gen agnostic. It should work on gen5+
since all of those have a flip done interrupt. For older platforms we
use some clever tricks involving the flip_pending status bits and vblank
irqs. That code won't work for mmio flips. We'd need to add another way
to complete the flips based. That would involve using the frame counter
to make it accurate. To avoid races there we'd definitely need to use
the vblank evade mechanism to make sure we sample the frame counter
within the same frame as when we write the registers. Also gen2 has
the extra complication that it lacks a hardware frame counter.

So I think we can start off with limiting this to gen5+, and later we
can extend it to cover the older platforms since we anyway need to do
that work to get the nuclear flips working.

BTW I gave this code a whirl on my IVB and everything seems to work
fine.
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 4e0a26a..bca3c5a 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1569,6 +1569,7 @@  int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	spin_lock_init(&dev_priv->backlight_lock);
 	spin_lock_init(&dev_priv->uncore.lock);
 	spin_lock_init(&dev_priv->mm.object_stat_lock);
+	spin_lock_init(&dev_priv->mmio_flip_lock);
 	mutex_init(&dev_priv->dpio_lock);
 	mutex_init(&dev_priv->modeset_restore_lock);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3f62be0..678d31d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1621,6 +1621,10 @@  typedef struct drm_i915_private {
 	struct i915_dri1_state dri1;
 	/* Old ums support infrastructure, same warning applies. */
 	struct i915_ums_state ums;
+
+	/* protects the mmio flip data */
+	spinlock_t mmio_flip_lock;
+
 } drm_i915_private_t;
 
 static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
@@ -2657,6 +2661,9 @@  int i915_reg_read_ioctl(struct drm_device *dev, void *data,
 int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
 			       struct drm_file *file);
 
+void intel_notify_mmio_flip(struct drm_device *dev,
+			struct intel_ring_buffer *ring);
+
 /* overlay */
 extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
 extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 4b4aeb3..ad26abe 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1080,6 +1080,8 @@  static void notify_ring(struct drm_device *dev,
 
 	trace_i915_gem_request_complete(ring);
 
+	intel_notify_mmio_flip(dev, ring);
+
 	wake_up_all(&ring->irq_queue);
 	i915_queue_hangcheck(dev);
 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 7e4ea8d..19004bf 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -8782,6 +8782,125 @@  err:
 	return ret;
 }
 
+static int intel_do_mmio_flip(struct drm_device *dev,
+			struct drm_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc;
+
+	intel_crtc = to_intel_crtc(crtc);
+
+	intel_mark_page_flip_active(intel_crtc);
+	return dev_priv->display.update_primary_plane(crtc, crtc->fb, 0, 0);
+}
+
+static bool intel_postpone_flip(struct drm_i915_gem_object *obj)
+{
+	int ret;
+	if (!obj->ring)
+		return false;
+
+	if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, false),
+			      obj->last_write_seqno))
+		return false;
+
+	if (obj->last_write_seqno == obj->ring->outstanding_lazy_seqno) {
+		ret = i915_add_request(obj->ring, NULL);
+		if (WARN_ON(ret))
+			return false;
+	}
+
+	if (WARN_ON(!obj->ring->irq_get(obj->ring)))
+		return false;
+
+	return true;
+}
+
+void intel_notify_mmio_flip(struct drm_device *dev,
+			struct intel_ring_buffer *ring)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_crtc *crtc;
+	struct intel_crtc *intel_crtc;
+	struct intel_mmio_flip *mmio_flip_data;
+	unsigned long irq_flags;
+	u32 seqno;
+	enum pipe pipe;
+
+	BUG_ON(!ring);
+
+	seqno = ring->get_seqno(ring, false);
+
+	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
+	for_each_pipe(pipe) {
+		crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+		intel_crtc = to_intel_crtc(crtc);
+		mmio_flip_data = &intel_crtc->mmio_flip_data;
+		if ((mmio_flip_data->seqno != 0) &&
+				(ring->id == mmio_flip_data->ring_id) &&
+				(seqno >= mmio_flip_data->seqno)) {
+			intel_do_mmio_flip(dev, crtc);
+			mmio_flip_data->seqno = 0;
+			ring->irq_put(ring);
+		}
+	}
+
+	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
+}
+
+/* Using MMIO based flips starting from VLV, for Media power well
+ * residency optimization. The other alternative of having Render
+ * ring based flip calls is not being used, as the performance
+ * (FPS) of certain 3D Apps was getting severly affected.
+ */
+static int intel_gen7_queue_mmio_flip(struct drm_device *dev,
+			struct drm_crtc *crtc,
+			struct drm_framebuffer *fb,
+			struct drm_i915_gem_object *obj,
+			uint32_t flags)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	unsigned long irq_flags;
+	int ret;
+
+	ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring);
+	if (ret)
+		goto err;
+
+	switch (intel_crtc->plane) {
+	case PLANE_A:
+	case PLANE_B:
+	case PLANE_C:
+	break;
+	default:
+		WARN_ONCE(1, "unknown plane in flip command\n");
+		ret = -ENODEV;
+		goto err_unpin;
+	}
+
+	if (!intel_postpone_flip(obj)) {
+		ret = intel_do_mmio_flip(dev, crtc);
+		return ret;
+	}
+
+	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
+	intel_crtc->mmio_flip_data.seqno = obj->last_write_seqno;
+	intel_crtc->mmio_flip_data.ring_id = obj->ring->id;
+	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
+
+	/* Double check to catch cases where irq fired before
+	 * mmio flip data was ready
+	 */
+	intel_notify_mmio_flip(dev, obj->ring);
+	return 0;
+
+err_unpin:
+	intel_unpin_fb_obj(obj);
+err:
+	return ret;
+}
+
 static int intel_gen7_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
@@ -10577,6 +10696,8 @@  static void intel_crtc_init(struct drm_device *dev, int pipe)
 		DRM_DEBUG_KMS("swapping pipes & planes for FBC\n");
 		intel_crtc->plane = !pipe;
 	}
+	if (IS_VALLEYVIEW(dev))
+			intel_crtc->mmio_flip_data.seqno = 0;
 
 	BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) ||
 	       dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL);
@@ -11107,6 +11228,9 @@  static void intel_init_display(struct drm_device *dev)
 	}
 
 	intel_panel_init_backlight_funcs(dev);
+
+	if (IS_VALLEYVIEW(dev))
+		dev_priv->display.queue_flip = intel_gen7_queue_mmio_flip;
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index fa99104..f0b26a1 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -344,6 +344,11 @@  struct intel_pipe_wm {
 	bool fbc_wm_enabled;
 };
 
+struct intel_mmio_flip {
+	u32 seqno;
+	u32 ring_id;
+};
+
 struct intel_crtc {
 	struct drm_crtc base;
 	enum pipe pipe;
@@ -395,6 +400,8 @@  struct intel_crtc {
 		/* watermarks currently being used  */
 		struct intel_pipe_wm active;
 	} wm;
+
+	struct intel_mmio_flip	mmio_flip_data;
 };
 
 struct intel_plane_wm_parameters {