Message ID | 1395565265-20714-1-git-send-email-sourab.gupta@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Ville/Damien, Can you please review the below patch(v3) for mmio flips. Thanks, Sourab On Sun, 2014-03-23 at 09:01 +0000, Gupta, Sourab wrote: > From: Sourab Gupta <sourab.gupta@intel.com> > > Using MMIO based flips on VLV for Media power well residency optimization. > The blitter ring is currently being used just for command streamer based > flip calls. For pure 3D workloads, with MMIO flips, there will be no use > of blitter ring and this will ensure the 100% residency for Media well. > > v2: The MMIO flips now use the interrupt driven mechanism for issuing the > flips when target seqno is reached. (Incorporating Ville's idea) > > v3: Rebasing on latest code. Code restructuring after incorporating > Damien's comments > > Signed-off-by: Sourab Gupta <sourab.gupta@intel.com> > Signed-off-by: Akash Goel <akash.goel@intel.com> > --- > drivers/gpu/drm/i915/i915_dma.c | 1 + > drivers/gpu/drm/i915/i915_drv.h | 7 ++ > drivers/gpu/drm/i915/i915_irq.c | 2 + > drivers/gpu/drm/i915/intel_display.c | 124 +++++++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_drv.h | 7 ++ > 5 files changed, 141 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > index 4e0a26a..bca3c5a 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -1569,6 +1569,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) > spin_lock_init(&dev_priv->backlight_lock); > spin_lock_init(&dev_priv->uncore.lock); > spin_lock_init(&dev_priv->mm.object_stat_lock); > + spin_lock_init(&dev_priv->mmio_flip_lock); > mutex_init(&dev_priv->dpio_lock); > mutex_init(&dev_priv->modeset_restore_lock); > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 3f62be0..678d31d 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1621,6 +1621,10 @@ typedef struct drm_i915_private { > struct i915_dri1_state dri1; > /* Old ums support infrastructure, same warning applies. */ > struct i915_ums_state ums; > + > + /* protects the mmio flip data */ > + spinlock_t mmio_flip_lock; > + > } drm_i915_private_t; > > static inline struct drm_i915_private *to_i915(const struct drm_device *dev) > @@ -2657,6 +2661,9 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data, > int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data, > struct drm_file *file); > > +void intel_notify_mmio_flip(struct drm_device *dev, > + struct intel_ring_buffer *ring); > + > /* overlay */ > extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev); > extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e, > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 4b4aeb3..ad26abe 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1080,6 +1080,8 @@ static void notify_ring(struct drm_device *dev, > > trace_i915_gem_request_complete(ring); > > + intel_notify_mmio_flip(dev, ring); > + > wake_up_all(&ring->irq_queue); > i915_queue_hangcheck(dev); > } > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > index 7e4ea8d..19004bf 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -8782,6 +8782,125 @@ err: > return ret; > } > > +static int intel_do_mmio_flip(struct drm_device *dev, > + struct drm_crtc *crtc) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct intel_crtc *intel_crtc; > + > + intel_crtc = to_intel_crtc(crtc); > + > + intel_mark_page_flip_active(intel_crtc); > + return dev_priv->display.update_primary_plane(crtc, crtc->fb, 0, 0); > +} > + > +static bool intel_postpone_flip(struct drm_i915_gem_object *obj) > +{ > + int ret; > + if (!obj->ring) > + return false; > + > + if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, false), > + obj->last_write_seqno)) > + return false; > + > + if (obj->last_write_seqno == obj->ring->outstanding_lazy_seqno) { > + ret = i915_add_request(obj->ring, NULL); > + if (WARN_ON(ret)) > + return false; > + } > + > + if (WARN_ON(!obj->ring->irq_get(obj->ring))) > + return false; > + > + return true; > +} > + > +void intel_notify_mmio_flip(struct drm_device *dev, > + struct intel_ring_buffer *ring) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct drm_crtc *crtc; > + struct intel_crtc *intel_crtc; > + struct intel_mmio_flip *mmio_flip_data; > + unsigned long irq_flags; > + u32 seqno; > + enum pipe pipe; > + > + BUG_ON(!ring); > + > + seqno = ring->get_seqno(ring, false); > + > + spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags); > + for_each_pipe(pipe) { > + crtc = dev_priv->pipe_to_crtc_mapping[pipe]; > + intel_crtc = to_intel_crtc(crtc); > + mmio_flip_data = &intel_crtc->mmio_flip_data; > + if ((mmio_flip_data->seqno != 0) && > + (ring->id == mmio_flip_data->ring_id) && > + (seqno >= mmio_flip_data->seqno)) { > + intel_do_mmio_flip(dev, crtc); > + mmio_flip_data->seqno = 0; > + ring->irq_put(ring); > + } > + } > + > + spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags); > +} > + > +/* Using MMIO based flips starting from VLV, for Media power well > + * residency optimization. The other alternative of having Render > + * ring based flip calls is not being used, as the performance > + * (FPS) of certain 3D Apps was getting severly affected. > + */ > +static int intel_gen7_queue_mmio_flip(struct drm_device *dev, > + struct drm_crtc *crtc, > + struct drm_framebuffer *fb, > + struct drm_i915_gem_object *obj, > + uint32_t flags) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > + unsigned long irq_flags; > + int ret; > + > + ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring); > + if (ret) > + goto err; > + > + switch (intel_crtc->plane) { > + case PLANE_A: > + case PLANE_B: > + case PLANE_C: > + break; > + default: > + WARN_ONCE(1, "unknown plane in flip command\n"); > + ret = -ENODEV; > + goto err_unpin; > + } > + > + if (!intel_postpone_flip(obj)) { > + ret = intel_do_mmio_flip(dev, crtc); > + return ret; > + } > + > + spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags); > + intel_crtc->mmio_flip_data.seqno = obj->last_write_seqno; > + intel_crtc->mmio_flip_data.ring_id = obj->ring->id; > + spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags); > + > + /* Double check to catch cases where irq fired before > + * mmio flip data was ready > + */ > + intel_notify_mmio_flip(dev, obj->ring); > + return 0; > + > +err_unpin: > + intel_unpin_fb_obj(obj); > +err: > + return ret; > +} > + > static int intel_gen7_queue_flip(struct drm_device *dev, > struct drm_crtc *crtc, > struct drm_framebuffer *fb, > @@ -10577,6 +10696,8 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) > DRM_DEBUG_KMS("swapping pipes & planes for FBC\n"); > intel_crtc->plane = !pipe; > } > + if (IS_VALLEYVIEW(dev)) > + intel_crtc->mmio_flip_data.seqno = 0; > > BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) || > dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL); > @@ -11107,6 +11228,9 @@ static void intel_init_display(struct drm_device *dev) > } > > intel_panel_init_backlight_funcs(dev); > + > + if (IS_VALLEYVIEW(dev)) > + dev_priv->display.queue_flip = intel_gen7_queue_mmio_flip; > } > > /* > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h > index fa99104..f0b26a1 100644 > --- a/drivers/gpu/drm/i915/intel_drv.h > +++ b/drivers/gpu/drm/i915/intel_drv.h > @@ -344,6 +344,11 @@ struct intel_pipe_wm { > bool fbc_wm_enabled; > }; > > +struct intel_mmio_flip { > + u32 seqno; > + u32 ring_id; > +}; > + > struct intel_crtc { > struct drm_crtc base; > enum pipe pipe; > @@ -395,6 +400,8 @@ struct intel_crtc { > /* watermarks currently being used */ > struct intel_pipe_wm active; > } wm; > + > + struct intel_mmio_flip mmio_flip_data; > }; > > struct intel_plane_wm_parameters {
On Wed, 2014-03-26 at 13:20 +0530, sourab gupta wrote: > Hi Ville/Damien, > Can you please review the below patch(v3) for mmio flips. > Thanks, > Sourab > > On Sun, 2014-03-23 at 09:01 +0000, Gupta, Sourab wrote: > > From: Sourab Gupta <sourab.gupta@intel.com> > > > > Using MMIO based flips on VLV for Media power well residency optimization. > > The blitter ring is currently being used just for command streamer based > > flip calls. For pure 3D workloads, with MMIO flips, there will be no use > > of blitter ring and this will ensure the 100% residency for Media well. > > > > v2: The MMIO flips now use the interrupt driven mechanism for issuing the > > flips when target seqno is reached. (Incorporating Ville's idea) > > > > v3: Rebasing on latest code. Code restructuring after incorporating > > Damien's comments > > > > Signed-off-by: Sourab Gupta <sourab.gupta@intel.com> > > Signed-off-by: Akash Goel <akash.goel@intel.com> > > --- > > drivers/gpu/drm/i915/i915_dma.c | 1 + > > drivers/gpu/drm/i915/i915_drv.h | 7 ++ > > drivers/gpu/drm/i915/i915_irq.c | 2 + > > drivers/gpu/drm/i915/intel_display.c | 124 +++++++++++++++++++++++++++++++++++ > > drivers/gpu/drm/i915/intel_drv.h | 7 ++ > > 5 files changed, 141 insertions(+) > > > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > > index 4e0a26a..bca3c5a 100644 > > --- a/drivers/gpu/drm/i915/i915_dma.c > > +++ b/drivers/gpu/drm/i915/i915_dma.c > > @@ -1569,6 +1569,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) > > spin_lock_init(&dev_priv->backlight_lock); > > spin_lock_init(&dev_priv->uncore.lock); > > spin_lock_init(&dev_priv->mm.object_stat_lock); > > + spin_lock_init(&dev_priv->mmio_flip_lock); > > mutex_init(&dev_priv->dpio_lock); > > mutex_init(&dev_priv->modeset_restore_lock); > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > > index 3f62be0..678d31d 100644 > > --- a/drivers/gpu/drm/i915/i915_drv.h > > +++ b/drivers/gpu/drm/i915/i915_drv.h > > @@ -1621,6 +1621,10 @@ typedef struct drm_i915_private { > > struct i915_dri1_state dri1; > > /* Old ums support infrastructure, same warning applies. */ > > struct i915_ums_state ums; > > + > > + /* protects the mmio flip data */ > > + spinlock_t mmio_flip_lock; > > + > > } drm_i915_private_t; > > > > static inline struct drm_i915_private *to_i915(const struct drm_device *dev) > > @@ -2657,6 +2661,9 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data, > > int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data, > > struct drm_file *file); > > > > +void intel_notify_mmio_flip(struct drm_device *dev, > > + struct intel_ring_buffer *ring); > > + > > /* overlay */ > > extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev); > > extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e, > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > > index 4b4aeb3..ad26abe 100644 > > --- a/drivers/gpu/drm/i915/i915_irq.c > > +++ b/drivers/gpu/drm/i915/i915_irq.c > > @@ -1080,6 +1080,8 @@ static void notify_ring(struct drm_device *dev, > > > > trace_i915_gem_request_complete(ring); > > > > + intel_notify_mmio_flip(dev, ring); > > + > > wake_up_all(&ring->irq_queue); > > i915_queue_hangcheck(dev); > > } > > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > > index 7e4ea8d..19004bf 100644 > > --- a/drivers/gpu/drm/i915/intel_display.c > > +++ b/drivers/gpu/drm/i915/intel_display.c > > @@ -8782,6 +8782,125 @@ err: > > return ret; > > } > > > > +static int intel_do_mmio_flip(struct drm_device *dev, > > + struct drm_crtc *crtc) > > +{ > > + struct drm_i915_private *dev_priv = dev->dev_private; > > + struct intel_crtc *intel_crtc; > > + > > + intel_crtc = to_intel_crtc(crtc); > > + > > + intel_mark_page_flip_active(intel_crtc); > > + return dev_priv->display.update_primary_plane(crtc, crtc->fb, 0, 0); > > +} > > + > > +static bool intel_postpone_flip(struct drm_i915_gem_object *obj) > > +{ > > + int ret; > > + if (!obj->ring) > > + return false; > > + > > + if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, false), > > + obj->last_write_seqno)) > > + return false; > > + > > + if (obj->last_write_seqno == obj->ring->outstanding_lazy_seqno) { > > + ret = i915_add_request(obj->ring, NULL); > > + if (WARN_ON(ret)) > > + return false; > > + } > > + > > + if (WARN_ON(!obj->ring->irq_get(obj->ring))) > > + return false; > > + > > + return true; > > +} > > + > > +void intel_notify_mmio_flip(struct drm_device *dev, > > + struct intel_ring_buffer *ring) > > +{ > > + struct drm_i915_private *dev_priv = dev->dev_private; > > + struct drm_crtc *crtc; > > + struct intel_crtc *intel_crtc; > > + struct intel_mmio_flip *mmio_flip_data; > > + unsigned long irq_flags; > > + u32 seqno; > > + enum pipe pipe; > > + > > + BUG_ON(!ring); > > + > > + seqno = ring->get_seqno(ring, false); > > + > > + spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags); > > + for_each_pipe(pipe) { > > + crtc = dev_priv->pipe_to_crtc_mapping[pipe]; > > + intel_crtc = to_intel_crtc(crtc); > > + mmio_flip_data = &intel_crtc->mmio_flip_data; > > + if ((mmio_flip_data->seqno != 0) && > > + (ring->id == mmio_flip_data->ring_id) && > > + (seqno >= mmio_flip_data->seqno)) { > > + intel_do_mmio_flip(dev, crtc); > > + mmio_flip_data->seqno = 0; > > + ring->irq_put(ring); > > + } > > + } > > + > > + spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags); > > +} > > + > > +/* Using MMIO based flips starting from VLV, for Media power well > > + * residency optimization. The other alternative of having Render > > + * ring based flip calls is not being used, as the performance > > + * (FPS) of certain 3D Apps was getting severly affected. > > + */ > > +static int intel_gen7_queue_mmio_flip(struct drm_device *dev, > > + struct drm_crtc *crtc, > > + struct drm_framebuffer *fb, > > + struct drm_i915_gem_object *obj, > > + uint32_t flags) > > +{ > > + struct drm_i915_private *dev_priv = dev->dev_private; > > + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > > + unsigned long irq_flags; > > + int ret; > > + > > + ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring); > > + if (ret) > > + goto err; > > + > > + switch (intel_crtc->plane) { > > + case PLANE_A: > > + case PLANE_B: > > + case PLANE_C: > > + break; > > + default: > > + WARN_ONCE(1, "unknown plane in flip command\n"); > > + ret = -ENODEV; > > + goto err_unpin; > > + } > > + > > + if (!intel_postpone_flip(obj)) { > > + ret = intel_do_mmio_flip(dev, crtc); > > + return ret; > > + } > > + > > + spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags); > > + intel_crtc->mmio_flip_data.seqno = obj->last_write_seqno; > > + intel_crtc->mmio_flip_data.ring_id = obj->ring->id; > > + spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags); > > + > > + /* Double check to catch cases where irq fired before > > + * mmio flip data was ready > > + */ > > + intel_notify_mmio_flip(dev, obj->ring); > > + return 0; > > + > > +err_unpin: > > + intel_unpin_fb_obj(obj); > > +err: > > + return ret; > > +} > > + > > static int intel_gen7_queue_flip(struct drm_device *dev, > > struct drm_crtc *crtc, > > struct drm_framebuffer *fb, > > @@ -10577,6 +10696,8 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) > > DRM_DEBUG_KMS("swapping pipes & planes for FBC\n"); > > intel_crtc->plane = !pipe; > > } > > + if (IS_VALLEYVIEW(dev)) > > + intel_crtc->mmio_flip_data.seqno = 0; > > > > BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) || > > dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL); > > @@ -11107,6 +11228,9 @@ static void intel_init_display(struct drm_device *dev) > > } > > > > intel_panel_init_backlight_funcs(dev); > > + > > + if (IS_VALLEYVIEW(dev)) > > + dev_priv->display.queue_flip = intel_gen7_queue_mmio_flip; > > } > > > > /* > > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h > > index fa99104..f0b26a1 100644 > > --- a/drivers/gpu/drm/i915/intel_drv.h > > +++ b/drivers/gpu/drm/i915/intel_drv.h > > @@ -344,6 +344,11 @@ struct intel_pipe_wm { > > bool fbc_wm_enabled; > > }; > > > > +struct intel_mmio_flip { > > + u32 seqno; > > + u32 ring_id; > > +}; > > + > > struct intel_crtc { > > struct drm_crtc base; > > enum pipe pipe; > > @@ -395,6 +400,8 @@ struct intel_crtc { > > /* watermarks currently being used */ > > struct intel_pipe_wm active; > > } wm; > > + > > + struct intel_mmio_flip mmio_flip_data; > > }; > > > > struct intel_plane_wm_parameters { > Hi Ville, A gentle reminder to review the patch. Thanks, Sourab
On Thu, 2014-04-03 at 14:11 +0530, sourab gupta wrote: > On Wed, 2014-03-26 at 13:20 +0530, sourab gupta wrote: > > Hi Ville/Damien, > > Can you please review the below patch(v3) for mmio flips. > > Thanks, > > Sourab > > > > On Sun, 2014-03-23 at 09:01 +0000, Gupta, Sourab wrote: > > > From: Sourab Gupta <sourab.gupta@intel.com> > > > > > > Using MMIO based flips on VLV for Media power well residency optimization. > > > The blitter ring is currently being used just for command streamer based > > > flip calls. For pure 3D workloads, with MMIO flips, there will be no use > > > of blitter ring and this will ensure the 100% residency for Media well. > > > > > > v2: The MMIO flips now use the interrupt driven mechanism for issuing the > > > flips when target seqno is reached. (Incorporating Ville's idea) > > > > > > v3: Rebasing on latest code. Code restructuring after incorporating > > > Damien's comments > > > > > > Signed-off-by: Sourab Gupta <sourab.gupta@intel.com> > > > Signed-off-by: Akash Goel <akash.goel@intel.com> > > > --- > > > drivers/gpu/drm/i915/i915_dma.c | 1 + > > > drivers/gpu/drm/i915/i915_drv.h | 7 ++ > > > drivers/gpu/drm/i915/i915_irq.c | 2 + > > > drivers/gpu/drm/i915/intel_display.c | 124 +++++++++++++++++++++++++++++++++++ > > > drivers/gpu/drm/i915/intel_drv.h | 7 ++ > > > 5 files changed, 141 insertions(+) > > > > > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > > > index 4e0a26a..bca3c5a 100644 > > > --- a/drivers/gpu/drm/i915/i915_dma.c > > > +++ b/drivers/gpu/drm/i915/i915_dma.c > > > @@ -1569,6 +1569,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) > > > spin_lock_init(&dev_priv->backlight_lock); > > > spin_lock_init(&dev_priv->uncore.lock); > > > spin_lock_init(&dev_priv->mm.object_stat_lock); > > > + spin_lock_init(&dev_priv->mmio_flip_lock); > > > mutex_init(&dev_priv->dpio_lock); > > > mutex_init(&dev_priv->modeset_restore_lock); > > > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > > > index 3f62be0..678d31d 100644 > > > --- a/drivers/gpu/drm/i915/i915_drv.h > > > +++ b/drivers/gpu/drm/i915/i915_drv.h > > > @@ -1621,6 +1621,10 @@ typedef struct drm_i915_private { > > > struct i915_dri1_state dri1; > > > /* Old ums support infrastructure, same warning applies. */ > > > struct i915_ums_state ums; > > > + > > > + /* protects the mmio flip data */ > > > + spinlock_t mmio_flip_lock; > > > + > > > } drm_i915_private_t; > > > > > > static inline struct drm_i915_private *to_i915(const struct drm_device *dev) > > > @@ -2657,6 +2661,9 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data, > > > int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data, > > > struct drm_file *file); > > > > > > +void intel_notify_mmio_flip(struct drm_device *dev, > > > + struct intel_ring_buffer *ring); > > > + > > > /* overlay */ > > > extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev); > > > extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e, > > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > > > index 4b4aeb3..ad26abe 100644 > > > --- a/drivers/gpu/drm/i915/i915_irq.c > > > +++ b/drivers/gpu/drm/i915/i915_irq.c > > > @@ -1080,6 +1080,8 @@ static void notify_ring(struct drm_device *dev, > > > > > > trace_i915_gem_request_complete(ring); > > > > > > + intel_notify_mmio_flip(dev, ring); > > > + > > > wake_up_all(&ring->irq_queue); > > > i915_queue_hangcheck(dev); > > > } > > > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > > > index 7e4ea8d..19004bf 100644 > > > --- a/drivers/gpu/drm/i915/intel_display.c > > > +++ b/drivers/gpu/drm/i915/intel_display.c > > > @@ -8782,6 +8782,125 @@ err: > > > return ret; > > > } > > > > > > +static int intel_do_mmio_flip(struct drm_device *dev, > > > + struct drm_crtc *crtc) > > > +{ > > > + struct drm_i915_private *dev_priv = dev->dev_private; > > > + struct intel_crtc *intel_crtc; > > > + > > > + intel_crtc = to_intel_crtc(crtc); > > > + > > > + intel_mark_page_flip_active(intel_crtc); > > > + return dev_priv->display.update_primary_plane(crtc, crtc->fb, 0, 0); > > > +} > > > + > > > +static bool intel_postpone_flip(struct drm_i915_gem_object *obj) > > > +{ > > > + int ret; > > > + if (!obj->ring) > > > + return false; > > > + > > > + if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, false), > > > + obj->last_write_seqno)) > > > + return false; > > > + > > > + if (obj->last_write_seqno == obj->ring->outstanding_lazy_seqno) { > > > + ret = i915_add_request(obj->ring, NULL); > > > + if (WARN_ON(ret)) > > > + return false; > > > + } > > > + > > > + if (WARN_ON(!obj->ring->irq_get(obj->ring))) > > > + return false; > > > + > > > + return true; > > > +} > > > + > > > +void intel_notify_mmio_flip(struct drm_device *dev, > > > + struct intel_ring_buffer *ring) > > > +{ > > > + struct drm_i915_private *dev_priv = dev->dev_private; > > > + struct drm_crtc *crtc; > > > + struct intel_crtc *intel_crtc; > > > + struct intel_mmio_flip *mmio_flip_data; > > > + unsigned long irq_flags; > > > + u32 seqno; > > > + enum pipe pipe; > > > + > > > + BUG_ON(!ring); > > > + > > > + seqno = ring->get_seqno(ring, false); > > > + > > > + spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags); > > > + for_each_pipe(pipe) { > > > + crtc = dev_priv->pipe_to_crtc_mapping[pipe]; > > > + intel_crtc = to_intel_crtc(crtc); > > > + mmio_flip_data = &intel_crtc->mmio_flip_data; > > > + if ((mmio_flip_data->seqno != 0) && > > > + (ring->id == mmio_flip_data->ring_id) && > > > + (seqno >= mmio_flip_data->seqno)) { > > > + intel_do_mmio_flip(dev, crtc); > > > + mmio_flip_data->seqno = 0; > > > + ring->irq_put(ring); > > > + } > > > + } > > > + > > > + spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags); > > > +} > > > + > > > +/* Using MMIO based flips starting from VLV, for Media power well > > > + * residency optimization. The other alternative of having Render > > > + * ring based flip calls is not being used, as the performance > > > + * (FPS) of certain 3D Apps was getting severly affected. > > > + */ > > > +static int intel_gen7_queue_mmio_flip(struct drm_device *dev, > > > + struct drm_crtc *crtc, > > > + struct drm_framebuffer *fb, > > > + struct drm_i915_gem_object *obj, > > > + uint32_t flags) > > > +{ > > > + struct drm_i915_private *dev_priv = dev->dev_private; > > > + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > > > + unsigned long irq_flags; > > > + int ret; > > > + > > > + ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring); > > > + if (ret) > > > + goto err; > > > + > > > + switch (intel_crtc->plane) { > > > + case PLANE_A: > > > + case PLANE_B: > > > + case PLANE_C: > > > + break; > > > + default: > > > + WARN_ONCE(1, "unknown plane in flip command\n"); > > > + ret = -ENODEV; > > > + goto err_unpin; > > > + } > > > + > > > + if (!intel_postpone_flip(obj)) { > > > + ret = intel_do_mmio_flip(dev, crtc); > > > + return ret; > > > + } > > > + > > > + spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags); > > > + intel_crtc->mmio_flip_data.seqno = obj->last_write_seqno; > > > + intel_crtc->mmio_flip_data.ring_id = obj->ring->id; > > > + spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags); > > > + > > > + /* Double check to catch cases where irq fired before > > > + * mmio flip data was ready > > > + */ > > > + intel_notify_mmio_flip(dev, obj->ring); > > > + return 0; > > > + > > > +err_unpin: > > > + intel_unpin_fb_obj(obj); > > > +err: > > > + return ret; > > > +} > > > + > > > static int intel_gen7_queue_flip(struct drm_device *dev, > > > struct drm_crtc *crtc, > > > struct drm_framebuffer *fb, > > > @@ -10577,6 +10696,8 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) > > > DRM_DEBUG_KMS("swapping pipes & planes for FBC\n"); > > > intel_crtc->plane = !pipe; > > > } > > > + if (IS_VALLEYVIEW(dev)) > > > + intel_crtc->mmio_flip_data.seqno = 0; > > > > > > BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) || > > > dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL); > > > @@ -11107,6 +11228,9 @@ static void intel_init_display(struct drm_device *dev) > > > } > > > > > > intel_panel_init_backlight_funcs(dev); > > > + > > > + if (IS_VALLEYVIEW(dev)) > > > + dev_priv->display.queue_flip = intel_gen7_queue_mmio_flip; > > > } > > > > > > /* > > > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h > > > index fa99104..f0b26a1 100644 > > > --- a/drivers/gpu/drm/i915/intel_drv.h > > > +++ b/drivers/gpu/drm/i915/intel_drv.h > > > @@ -344,6 +344,11 @@ struct intel_pipe_wm { > > > bool fbc_wm_enabled; > > > }; > > > > > > +struct intel_mmio_flip { > > > + u32 seqno; > > > + u32 ring_id; > > > +}; > > > + > > > struct intel_crtc { > > > struct drm_crtc base; > > > enum pipe pipe; > > > @@ -395,6 +400,8 @@ struct intel_crtc { > > > /* watermarks currently being used */ > > > struct intel_pipe_wm active; > > > } wm; > > > + > > > + struct intel_mmio_flip mmio_flip_data; > > > }; > > > > > > struct intel_plane_wm_parameters { > > > > Hi Ville, > A gentle reminder to review the patch. > > Thanks, > Sourab > Gentle reminder to review the patch. (Been more than 2 weeks since last version was sent.) Thanks, Sourab
On Sun, Mar 23, 2014 at 02:31:05PM +0530, sourab.gupta@intel.com wrote: > From: Sourab Gupta <sourab.gupta@intel.com> > > Using MMIO based flips on VLV for Media power well residency optimization. > The blitter ring is currently being used just for command streamer based > flip calls. For pure 3D workloads, with MMIO flips, there will be no use > of blitter ring and this will ensure the 100% residency for Media well. Sorry for dragging my feet with reviewing this. I'm hoping this is the latest version... > > v2: The MMIO flips now use the interrupt driven mechanism for issuing the > flips when target seqno is reached. (Incorporating Ville's idea) > > v3: Rebasing on latest code. Code restructuring after incorporating > Damien's comments > > Signed-off-by: Sourab Gupta <sourab.gupta@intel.com> > Signed-off-by: Akash Goel <akash.goel@intel.com> > --- > drivers/gpu/drm/i915/i915_dma.c | 1 + > drivers/gpu/drm/i915/i915_drv.h | 7 ++ > drivers/gpu/drm/i915/i915_irq.c | 2 + > drivers/gpu/drm/i915/intel_display.c | 124 +++++++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_drv.h | 7 ++ > 5 files changed, 141 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > index 4e0a26a..bca3c5a 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -1569,6 +1569,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) > spin_lock_init(&dev_priv->backlight_lock); > spin_lock_init(&dev_priv->uncore.lock); > spin_lock_init(&dev_priv->mm.object_stat_lock); > + spin_lock_init(&dev_priv->mmio_flip_lock); > mutex_init(&dev_priv->dpio_lock); > mutex_init(&dev_priv->modeset_restore_lock); > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 3f62be0..678d31d 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1621,6 +1621,10 @@ typedef struct drm_i915_private { > struct i915_dri1_state dri1; > /* Old ums support infrastructure, same warning applies. */ > struct i915_ums_state ums; > + > + /* protects the mmio flip data */ > + spinlock_t mmio_flip_lock; > + > } drm_i915_private_t; > > static inline struct drm_i915_private *to_i915(const struct drm_device *dev) > @@ -2657,6 +2661,9 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data, > int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data, > struct drm_file *file); > > +void intel_notify_mmio_flip(struct drm_device *dev, > + struct intel_ring_buffer *ring); > + > /* overlay */ > extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev); > extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e, > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 4b4aeb3..ad26abe 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1080,6 +1080,8 @@ static void notify_ring(struct drm_device *dev, > > trace_i915_gem_request_complete(ring); > > + intel_notify_mmio_flip(dev, ring); > + > wake_up_all(&ring->irq_queue); > i915_queue_hangcheck(dev); > } > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > index 7e4ea8d..19004bf 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -8782,6 +8782,125 @@ err: > return ret; > } > > +static int intel_do_mmio_flip(struct drm_device *dev, > + struct drm_crtc *crtc) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct intel_crtc *intel_crtc; > + > + intel_crtc = to_intel_crtc(crtc); nit: could be part of intel_crtc declaration > + > + intel_mark_page_flip_active(intel_crtc); > + return dev_priv->display.update_primary_plane(crtc, crtc->fb, 0, 0); Needs to pass crtc->{x,y} instead of 0,0. I was a bit worried crtc->fb might be changed already at this point, but after thinking a bit it should be fine since the presense of unpin_work will keep intel_crtc_page_flip() from frobbing with it and we always call intel_crtc_wait_for_pending_flips() before set_base. Just need to update to use crtc->primary->fb now. I'm thinking we also have a small race here with a flip done interrupt from a previous set_base. Probably we need to sort it out using the SURFLIVE and/or flip counter like I did for the mmio vs. cs flip race. But I need to think on this a bit more. Perhaps you want to also look at those patches a bit. > +} > + > +static bool intel_postpone_flip(struct drm_i915_gem_object *obj) > +{ > + int ret; nit: needs an empty line between declarations and code. > + if (!obj->ring) > + return false; > + > + if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, false), > + obj->last_write_seqno)) Maybe this should just do a very light check using lazy_coherency? > + return false; > + > + if (obj->last_write_seqno == obj->ring->outstanding_lazy_seqno) { > + ret = i915_add_request(obj->ring, NULL); > + if (WARN_ON(ret)) > + return false; > + } Looks like i915_gem_check_olr(), so maybe make it non-static and use here. > + > + if (WARN_ON(!obj->ring->irq_get(obj->ring))) > + return false; Maybe do this before the checking outstanding_lazy_seqno. Hmm, or perhaps not. It won't actaully eliminate the race entirely so you still have to do the manual check later. I guess having it in this order keeps the error paths simpler. > + > + return true; > +} > + > +void intel_notify_mmio_flip(struct drm_device *dev, > + struct intel_ring_buffer *ring) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct drm_crtc *crtc; > + struct intel_crtc *intel_crtc; > + struct intel_mmio_flip *mmio_flip_data; > + unsigned long irq_flags; > + u32 seqno; > + enum pipe pipe; > + > + BUG_ON(!ring); No need for the BUG. It'll explode on the next line anyway if ring==NULL. > + > + seqno = ring->get_seqno(ring, false); > +` > + spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags); > + for_each_pipe(pipe) { list_for_each_entry() seems more appropriate since you don't use the pipe variable for anything else besides looking up the crtc. You could also move some of the variable declarations inside the loop since they're not needed on the outside. > + crtc = dev_priv->pipe_to_crtc_mapping[pipe]; > + intel_crtc = to_intel_crtc(crtc); > + mmio_flip_data = &intel_crtc->mmio_flip_data; > + if ((mmio_flip_data->seqno != 0) && > + (ring->id == mmio_flip_data->ring_id) && > + (seqno >= mmio_flip_data->seqno)) { Weird indentation and a bit too many parens for my taste. Should also use i915_seqno_passed() here. Special casing seqno 0 this way seems safe enough since we apparently skip seqno 0 always. > + intel_do_mmio_flip(dev, crtc); > + mmio_flip_data->seqno = 0; > + ring->irq_put(ring); > + } > + } > + > + spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags); > +} > + > +/* Using MMIO based flips starting from VLV, for Media power well > + * residency optimization. The other alternative of having Render > + * ring based flip calls is not being used, as the performance > + * (FPS) of certain 3D Apps was getting severly affected. > + */ > +static int intel_gen7_queue_mmio_flip(struct drm_device *dev, > + struct drm_crtc *crtc, > + struct drm_framebuffer *fb, > + struct drm_i915_gem_object *obj, > + uint32_t flags) There's nothing gen7 specific here. So you could just rename the function to eg. intel_queue_mmio_flip(). Maybe also move the comment about VLV to where you set up the function pointer. > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > + unsigned long irq_flags; > + int ret; > + > + ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring); > + if (ret) > + goto err; > + > + switch (intel_crtc->plane) { > + case PLANE_A: > + case PLANE_B: > + case PLANE_C: > + break; > + default: > + WARN_ONCE(1, "unknown plane in flip command\n"); > + ret = -ENODEV; > + goto err_unpin; > + } I think you can drop this plane check. We should hopefully catch such bugs elsewhere already. > + > + if (!intel_postpone_flip(obj)) { > + ret = intel_do_mmio_flip(dev, crtc); > + return ret; Just 'return intel_do_mmio_flip(...' Actually I think you can just make intel_do_mmio_flip() void since .update_primary_plane() can't really fail. I think Daniel even has a patch lined up to make it void as well. > + } > + > + spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags); > + intel_crtc->mmio_flip_data.seqno = obj->last_write_seqno; > + intel_crtc->mmio_flip_data.ring_id = obj->ring->id; > + spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags); > + > + /* Double check to catch cases where irq fired before > + * mmio flip data was ready > + */ > + intel_notify_mmio_flip(dev, obj->ring); > + return 0; > + > +err_unpin: > + intel_unpin_fb_obj(obj); > +err: > + return ret; > +} > + > static int intel_gen7_queue_flip(struct drm_device *dev, > struct drm_crtc *crtc, > struct drm_framebuffer *fb, > @@ -10577,6 +10696,8 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) > DRM_DEBUG_KMS("swapping pipes & planes for FBC\n"); > intel_crtc->plane = !pipe; > } > + if (IS_VALLEYVIEW(dev)) > + intel_crtc->mmio_flip_data.seqno = 0; Not needed. It's kzalloced so everything starts zeroed. > > BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) || > dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL); > @@ -11107,6 +11228,9 @@ static void intel_init_display(struct drm_device *dev) > } > > intel_panel_init_backlight_funcs(dev); > + > + if (IS_VALLEYVIEW(dev)) > + dev_priv->display.queue_flip = intel_gen7_queue_mmio_flip; Would look a bit cleaner to do this before intel_panel_init_backlight_funcs(). That will keep the .queue_flip assignments closer together. I'm also thinking that maybe we want a module parameter to select between CS vs. mmio flips. It could default to mmio flips on VLV and CS flips on other platforms if it's generally accepted that mmio flips are better on VLV. That can at least get us a bit more testing coverage if people don't need a specific platform to try it out. On the whole I think it looks fairly good, and it should be fairly easy to extend it more for nuclear flips. > } > > /* > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h > index fa99104..f0b26a1 100644 > --- a/drivers/gpu/drm/i915/intel_drv.h > +++ b/drivers/gpu/drm/i915/intel_drv.h > @@ -344,6 +344,11 @@ struct intel_pipe_wm { > bool fbc_wm_enabled; > }; > > +struct intel_mmio_flip { > + u32 seqno; > + u32 ring_id; > +}; > + > struct intel_crtc { > struct drm_crtc base; > enum pipe pipe; > @@ -395,6 +400,8 @@ struct intel_crtc { > /* watermarks currently being used */ > struct intel_pipe_wm active; > } wm; > + > + struct intel_mmio_flip mmio_flip_data; > }; > > struct intel_plane_wm_parameters { > -- > 1.8.5.1
On Fri, May 09, 2014 at 02:59:42PM +0300, Ville Syrjälä wrote: > On Sun, Mar 23, 2014 at 02:31:05PM +0530, sourab.gupta@intel.com wrote: > > From: Sourab Gupta <sourab.gupta@intel.com> > > > > Using MMIO based flips on VLV for Media power well residency optimization. > > The blitter ring is currently being used just for command streamer based > > flip calls. For pure 3D workloads, with MMIO flips, there will be no use > > of blitter ring and this will ensure the 100% residency for Media well. > > Sorry for dragging my feet with reviewing this. I'm hoping this is the > latest version... > > > > > v2: The MMIO flips now use the interrupt driven mechanism for issuing the > > flips when target seqno is reached. (Incorporating Ville's idea) > > > > v3: Rebasing on latest code. Code restructuring after incorporating > > Damien's comments > > > > Signed-off-by: Sourab Gupta <sourab.gupta@intel.com> > > Signed-off-by: Akash Goel <akash.goel@intel.com> > > --- > > drivers/gpu/drm/i915/i915_dma.c | 1 + > > drivers/gpu/drm/i915/i915_drv.h | 7 ++ > > drivers/gpu/drm/i915/i915_irq.c | 2 + > > drivers/gpu/drm/i915/intel_display.c | 124 +++++++++++++++++++++++++++++++++++ > > drivers/gpu/drm/i915/intel_drv.h | 7 ++ > > 5 files changed, 141 insertions(+) > > > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > > index 4e0a26a..bca3c5a 100644 > > --- a/drivers/gpu/drm/i915/i915_dma.c > > +++ b/drivers/gpu/drm/i915/i915_dma.c > > @@ -1569,6 +1569,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) > > spin_lock_init(&dev_priv->backlight_lock); > > spin_lock_init(&dev_priv->uncore.lock); > > spin_lock_init(&dev_priv->mm.object_stat_lock); > > + spin_lock_init(&dev_priv->mmio_flip_lock); > > mutex_init(&dev_priv->dpio_lock); > > mutex_init(&dev_priv->modeset_restore_lock); > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > > index 3f62be0..678d31d 100644 > > --- a/drivers/gpu/drm/i915/i915_drv.h > > +++ b/drivers/gpu/drm/i915/i915_drv.h > > @@ -1621,6 +1621,10 @@ typedef struct drm_i915_private { > > struct i915_dri1_state dri1; > > /* Old ums support infrastructure, same warning applies. */ > > struct i915_ums_state ums; > > + > > + /* protects the mmio flip data */ > > + spinlock_t mmio_flip_lock; > > + > > } drm_i915_private_t; > > > > static inline struct drm_i915_private *to_i915(const struct drm_device *dev) > > @@ -2657,6 +2661,9 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data, > > int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data, > > struct drm_file *file); > > > > +void intel_notify_mmio_flip(struct drm_device *dev, > > + struct intel_ring_buffer *ring); > > + > > /* overlay */ > > extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev); > > extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e, > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > > index 4b4aeb3..ad26abe 100644 > > --- a/drivers/gpu/drm/i915/i915_irq.c > > +++ b/drivers/gpu/drm/i915/i915_irq.c > > @@ -1080,6 +1080,8 @@ static void notify_ring(struct drm_device *dev, > > > > trace_i915_gem_request_complete(ring); > > > > + intel_notify_mmio_flip(dev, ring); > > + > > wake_up_all(&ring->irq_queue); > > i915_queue_hangcheck(dev); > > } > > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > > index 7e4ea8d..19004bf 100644 > > --- a/drivers/gpu/drm/i915/intel_display.c > > +++ b/drivers/gpu/drm/i915/intel_display.c > > @@ -8782,6 +8782,125 @@ err: > > return ret; > > } > > > > +static int intel_do_mmio_flip(struct drm_device *dev, > > + struct drm_crtc *crtc) > > +{ > > + struct drm_i915_private *dev_priv = dev->dev_private; > > + struct intel_crtc *intel_crtc; > > + > > + intel_crtc = to_intel_crtc(crtc); > > nit: could be part of intel_crtc declaration > > > + > > + intel_mark_page_flip_active(intel_crtc); > > + return dev_priv->display.update_primary_plane(crtc, crtc->fb, 0, 0); > > Needs to pass crtc->{x,y} instead of 0,0. > > I was a bit worried crtc->fb might be changed already at this point, but > after thinking a bit it should be fine since the presense of unpin_work > will keep intel_crtc_page_flip() from frobbing with it and we always > call intel_crtc_wait_for_pending_flips() before set_base. > > Just need to update to use crtc->primary->fb now. > > I'm thinking we also have a small race here with a flip done interrupt > from a previous set_base. Probably we need to sort it out using the > SURFLIVE and/or flip counter like I did for the mmio vs. cs flip > race. But I need to think on this a bit more. Perhaps you want to also > look at those patches a bit. Oh another thing here is that we update_primary_plane() isn't guaranteed to be atomic. We could start to use the atomic pipe update mechanism here already but then we need to schedule a work so that we can perform the vblank evade trick. That's going to be needed for the nuclear flip anyway. Another option would be to only update the base address and leave the offset registers alone just like the CS flip does.
On Fri, May 09, 2014 at 02:59:42PM +0300, Ville Syrjälä wrote: > On Sun, Mar 23, 2014 at 02:31:05PM +0530, sourab.gupta@intel.com wrote: > > + intel_do_mmio_flip(dev, crtc); > > + mmio_flip_data->seqno = 0; > > + ring->irq_put(ring); > > + } > > + } > > + > > + spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags); > > +} > > + > > +/* Using MMIO based flips starting from VLV, for Media power well > > + * residency optimization. The other alternative of having Render > > + * ring based flip calls is not being used, as the performance > > + * (FPS) of certain 3D Apps was getting severly affected. > > + */ > > +static int intel_gen7_queue_mmio_flip(struct drm_device *dev, > > + struct drm_crtc *crtc, > > + struct drm_framebuffer *fb, > > + struct drm_i915_gem_object *obj, > > + uint32_t flags) > > There's nothing gen7 specific here. So you could just rename the > function to eg. intel_queue_mmio_flip(). Maybe also move the > comment about VLV to where you set up the function pointer. Actually this code isn't entirely gen agnostic. It should work on gen5+ since all of those have a flip done interrupt. For older platforms we use some clever tricks involving the flip_pending status bits and vblank irqs. That code won't work for mmio flips. We'd need to add another way to complete the flips based. That would involve using the frame counter to make it accurate. To avoid races there we'd definitely need to use the vblank evade mechanism to make sure we sample the frame counter within the same frame as when we write the registers. Also gen2 has the extra complication that it lacks a hardware frame counter. So I think we can start off with limiting this to gen5+, and later we can extend it to cover the older platforms since we anyway need to do that work to get the nuclear flips working. BTW I gave this code a whirl on my IVB and everything seems to work fine.
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 4e0a26a..bca3c5a 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1569,6 +1569,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) spin_lock_init(&dev_priv->backlight_lock); spin_lock_init(&dev_priv->uncore.lock); spin_lock_init(&dev_priv->mm.object_stat_lock); + spin_lock_init(&dev_priv->mmio_flip_lock); mutex_init(&dev_priv->dpio_lock); mutex_init(&dev_priv->modeset_restore_lock); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3f62be0..678d31d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1621,6 +1621,10 @@ typedef struct drm_i915_private { struct i915_dri1_state dri1; /* Old ums support infrastructure, same warning applies. */ struct i915_ums_state ums; + + /* protects the mmio flip data */ + spinlock_t mmio_flip_lock; + } drm_i915_private_t; static inline struct drm_i915_private *to_i915(const struct drm_device *dev) @@ -2657,6 +2661,9 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data, int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +void intel_notify_mmio_flip(struct drm_device *dev, + struct intel_ring_buffer *ring); + /* overlay */ extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev); extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e, diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 4b4aeb3..ad26abe 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1080,6 +1080,8 @@ static void notify_ring(struct drm_device *dev, trace_i915_gem_request_complete(ring); + intel_notify_mmio_flip(dev, ring); + wake_up_all(&ring->irq_queue); i915_queue_hangcheck(dev); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 7e4ea8d..19004bf 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8782,6 +8782,125 @@ err: return ret; } +static int intel_do_mmio_flip(struct drm_device *dev, + struct drm_crtc *crtc) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_crtc *intel_crtc; + + intel_crtc = to_intel_crtc(crtc); + + intel_mark_page_flip_active(intel_crtc); + return dev_priv->display.update_primary_plane(crtc, crtc->fb, 0, 0); +} + +static bool intel_postpone_flip(struct drm_i915_gem_object *obj) +{ + int ret; + if (!obj->ring) + return false; + + if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, false), + obj->last_write_seqno)) + return false; + + if (obj->last_write_seqno == obj->ring->outstanding_lazy_seqno) { + ret = i915_add_request(obj->ring, NULL); + if (WARN_ON(ret)) + return false; + } + + if (WARN_ON(!obj->ring->irq_get(obj->ring))) + return false; + + return true; +} + +void intel_notify_mmio_flip(struct drm_device *dev, + struct intel_ring_buffer *ring) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_crtc *crtc; + struct intel_crtc *intel_crtc; + struct intel_mmio_flip *mmio_flip_data; + unsigned long irq_flags; + u32 seqno; + enum pipe pipe; + + BUG_ON(!ring); + + seqno = ring->get_seqno(ring, false); + + spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags); + for_each_pipe(pipe) { + crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + intel_crtc = to_intel_crtc(crtc); + mmio_flip_data = &intel_crtc->mmio_flip_data; + if ((mmio_flip_data->seqno != 0) && + (ring->id == mmio_flip_data->ring_id) && + (seqno >= mmio_flip_data->seqno)) { + intel_do_mmio_flip(dev, crtc); + mmio_flip_data->seqno = 0; + ring->irq_put(ring); + } + } + + spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags); +} + +/* Using MMIO based flips starting from VLV, for Media power well + * residency optimization. The other alternative of having Render + * ring based flip calls is not being used, as the performance + * (FPS) of certain 3D Apps was getting severly affected. + */ +static int intel_gen7_queue_mmio_flip(struct drm_device *dev, + struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_i915_gem_object *obj, + uint32_t flags) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + unsigned long irq_flags; + int ret; + + ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring); + if (ret) + goto err; + + switch (intel_crtc->plane) { + case PLANE_A: + case PLANE_B: + case PLANE_C: + break; + default: + WARN_ONCE(1, "unknown plane in flip command\n"); + ret = -ENODEV; + goto err_unpin; + } + + if (!intel_postpone_flip(obj)) { + ret = intel_do_mmio_flip(dev, crtc); + return ret; + } + + spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags); + intel_crtc->mmio_flip_data.seqno = obj->last_write_seqno; + intel_crtc->mmio_flip_data.ring_id = obj->ring->id; + spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags); + + /* Double check to catch cases where irq fired before + * mmio flip data was ready + */ + intel_notify_mmio_flip(dev, obj->ring); + return 0; + +err_unpin: + intel_unpin_fb_obj(obj); +err: + return ret; +} + static int intel_gen7_queue_flip(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, @@ -10577,6 +10696,8 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) DRM_DEBUG_KMS("swapping pipes & planes for FBC\n"); intel_crtc->plane = !pipe; } + if (IS_VALLEYVIEW(dev)) + intel_crtc->mmio_flip_data.seqno = 0; BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) || dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL); @@ -11107,6 +11228,9 @@ static void intel_init_display(struct drm_device *dev) } intel_panel_init_backlight_funcs(dev); + + if (IS_VALLEYVIEW(dev)) + dev_priv->display.queue_flip = intel_gen7_queue_mmio_flip; } /* diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index fa99104..f0b26a1 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -344,6 +344,11 @@ struct intel_pipe_wm { bool fbc_wm_enabled; }; +struct intel_mmio_flip { + u32 seqno; + u32 ring_id; +}; + struct intel_crtc { struct drm_crtc base; enum pipe pipe; @@ -395,6 +400,8 @@ struct intel_crtc { /* watermarks currently being used */ struct intel_pipe_wm active; } wm; + + struct intel_mmio_flip mmio_flip_data; }; struct intel_plane_wm_parameters {