Message ID | 1400956226-28053-15-git-send-email-robdclark@gmail.com (mailing list archive) |
---|---|
State | Accepted |
Headers | show |
On Sat, May 24, 2014 at 02:30:23PM -0400, Rob Clark wrote: > Signed-off-by: Rob Clark <robdclark@gmail.com> > --- > drivers/gpu/drm/msm/Makefile | 1 + > drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c | 57 ++++++------ > drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c | 6 ++ > drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h | 1 + > drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c | 5 -- > drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c | 56 ++++++------ > drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c | 6 ++ > drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h | 2 +- > drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c | 5 -- > drivers/gpu/drm/msm/msm_atomic.c | 141 ++++++++++++++++++++++++++++++ > drivers/gpu/drm/msm/msm_drv.c | 22 ++++- > drivers/gpu/drm/msm/msm_drv.h | 7 ++ > drivers/gpu/drm/msm/msm_gem.c | 24 +---- > drivers/gpu/drm/msm/msm_gem.h | 13 +++ > drivers/gpu/drm/msm/msm_kms.h | 1 + > 15 files changed, 253 insertions(+), 94 deletions(-) > create mode 100644 drivers/gpu/drm/msm/msm_atomic.c > > diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile > index 5e1e6b0..dd12f56 100644 > --- a/drivers/gpu/drm/msm/Makefile > +++ b/drivers/gpu/drm/msm/Makefile > @@ -27,6 +27,7 @@ msm-y := \ > mdp/mdp5/mdp5_kms.o \ > mdp/mdp5/mdp5_plane.o \ > mdp/mdp5/mdp5_smp.o \ > + msm_atomic.o \ > msm_drv.o \ > msm_fb.o \ > msm_gem.o \ > diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c > index d0d8befd..2fa6b75 100644 > --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c > +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c > @@ -52,7 +52,6 @@ struct mdp4_crtc { > > /* if there is a pending flip, these will be non-null: */ > struct drm_pending_vblank_event *event; > - struct msm_fence_cb pageflip_cb; > > #define PENDING_CURSOR 0x1 > #define PENDING_FLIP 0x2 > @@ -93,12 +92,16 @@ static void request_pending(struct drm_crtc *crtc, uint32_t pending) > mdp_irq_register(&get_kms(crtc)->base, &mdp4_crtc->vblank); > } > > -static void crtc_flush(struct drm_crtc *crtc) > +void mdp4_crtc_flush(struct drm_crtc *crtc) > { > + struct msm_drm_private *priv = crtc->dev->dev_private; > struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); > struct mdp4_kms *mdp4_kms = get_kms(crtc); > uint32_t i, flush = 0; > > + if (priv->pending_crtcs & (1 << crtc->id)) > + return; > + > for (i = 0; i < ARRAY_SIZE(mdp4_crtc->planes); i++) { > struct drm_plane *plane = mdp4_crtc->planes[i]; > if (plane) { > @@ -142,7 +145,7 @@ static void update_scanout(struct drm_crtc *crtc, struct drm_framebuffer *fb) > * so that we can safely queue unref to current fb (ie. next > * vblank we know hw is done w/ previous scanout_fb). > */ > - crtc_flush(crtc); > + mdp4_crtc_flush(crtc); > > if (mdp4_crtc->scanout_fb) > drm_flip_work_queue(&mdp4_crtc->unref_fb_work, > @@ -177,21 +180,6 @@ static void complete_flip(struct drm_crtc *crtc, struct drm_file *file) > spin_unlock_irqrestore(&dev->event_lock, flags); > } > > -static void pageflip_cb(struct msm_fence_cb *cb) > -{ > - struct mdp4_crtc *mdp4_crtc = > - container_of(cb, struct mdp4_crtc, pageflip_cb); > - struct drm_crtc *crtc = &mdp4_crtc->base; > - struct drm_framebuffer *fb = crtc->primary->fb; > - > - if (!fb) > - return; > - > - drm_framebuffer_reference(fb); > - mdp4_plane_set_scanout(mdp4_crtc->plane, fb); > - update_scanout(crtc, fb); > -} > - > static void unref_fb_worker(struct drm_flip_work *work, void *val) > { > struct mdp4_crtc *mdp4_crtc = > @@ -406,7 +394,7 @@ static void mdp4_crtc_prepare(struct drm_crtc *crtc) > static void mdp4_crtc_commit(struct drm_crtc *crtc) > { > mdp4_crtc_dpms(crtc, DRM_MODE_DPMS_ON); > - crtc_flush(crtc); > + mdp4_crtc_flush(crtc); > /* drop the ref to mdp clk's that we got in prepare: */ > mdp4_disable(get_kms(crtc)); > } > @@ -448,23 +436,27 @@ static int mdp4_crtc_page_flip(struct drm_crtc *crtc, > { > struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); > struct drm_device *dev = crtc->dev; > - struct drm_gem_object *obj; > unsigned long flags; > > + spin_lock_irqsave(&dev->event_lock, flags); > if (mdp4_crtc->event) { > dev_err(dev->dev, "already pending flip!\n"); > + spin_unlock_irqrestore(&dev->event_lock, flags); > return -EBUSY; > } > > - obj = msm_framebuffer_bo(new_fb, 0); > - > - spin_lock_irqsave(&dev->event_lock, flags); > mdp4_crtc->event = event; > spin_unlock_irqrestore(&dev->event_lock, flags); > > update_fb(crtc, new_fb); > > - return msm_gem_queue_inactive_cb(obj, &mdp4_crtc->pageflip_cb); > + > + /* grab extra ref for update_scanout() */ > + drm_framebuffer_reference(new_fb); > + mdp4_plane_set_scanout(mdp4_crtc->plane, new_fb); > + update_scanout(crtc, new_fb); > + > + return 0; > } > > static int mdp4_crtc_set_property(struct drm_crtc *crtc, > @@ -598,7 +590,7 @@ static int mdp4_crtc_cursor_move(struct drm_crtc *crtc, int x, int y) > mdp4_crtc->cursor.y = y; > spin_unlock_irqrestore(&mdp4_crtc->cursor.lock, flags); > > - crtc_flush(crtc); > + mdp4_crtc_flush(crtc); > request_pending(crtc, PENDING_CURSOR); > > return 0; > @@ -635,8 +627,13 @@ static void mdp4_crtc_vblank_irq(struct mdp_irq *irq, uint32_t irqstatus) > pending = atomic_xchg(&mdp4_crtc->pending, 0); > > if (pending & PENDING_FLIP) { > - complete_flip(crtc, NULL); > - drm_flip_work_commit(&mdp4_crtc->unref_fb_work, priv->wq); > + if (priv->pending_crtcs & (1 << crtc->id)) { > + /* our update hasn't been flushed yet: */ > + request_pending(crtc, PENDING_FLIP); > + } else { > + complete_flip(crtc, NULL); > + drm_flip_work_commit(&mdp4_crtc->unref_fb_work, priv->wq); > + } > } > > if (pending & PENDING_CURSOR) { > @@ -650,7 +647,7 @@ static void mdp4_crtc_err_irq(struct mdp_irq *irq, uint32_t irqstatus) > struct mdp4_crtc *mdp4_crtc = container_of(irq, struct mdp4_crtc, err); > struct drm_crtc *crtc = &mdp4_crtc->base; > DBG("%s: error: %08x", mdp4_crtc->name, irqstatus); > - crtc_flush(crtc); > + mdp4_crtc_flush(crtc); > } > > uint32_t mdp4_crtc_vblank(struct drm_crtc *crtc) > @@ -730,7 +727,7 @@ static void set_attach(struct drm_crtc *crtc, enum mdp4_pipe pipe_id, > mdp4_crtc->planes[pipe_id] = plane; > blend_setup(crtc); > if (mdp4_crtc->enabled && (plane != mdp4_crtc->plane)) > - crtc_flush(crtc); > + mdp4_crtc_flush(crtc); > } > > void mdp4_crtc_attach(struct drm_crtc *crtc, struct drm_plane *plane) > @@ -792,8 +789,6 @@ struct drm_crtc *mdp4_crtc_init(struct drm_device *dev, > ret = drm_flip_work_init(&mdp4_crtc->unref_cursor_work, 64, > "unref cursor", unref_cursor_worker); > > - INIT_FENCE_CB(&mdp4_crtc->pageflip_cb, pageflip_cb); > - > drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp4_crtc_funcs); > drm_crtc_helper_add(crtc, &mdp4_crtc_helper_funcs); > > diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c > index 0bb4faa..af0bfb1 100644 > --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c > +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c > @@ -131,6 +131,11 @@ static long mdp4_round_pixclk(struct msm_kms *kms, unsigned long rate, > return mdp4_dtv_round_pixclk(encoder, rate); > } > > +static void mdp4_flush(struct msm_kms *kms, struct drm_crtc *crtc) > +{ > + mdp4_crtc_flush(crtc); > +} > + > static void mdp4_preclose(struct msm_kms *kms, struct drm_file *file) > { > struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms)); > @@ -162,6 +167,7 @@ static const struct mdp_kms_funcs kms_funcs = { > .disable_vblank = mdp4_disable_vblank, > .get_format = mdp_get_format, > .round_pixclk = mdp4_round_pixclk, > + .flush = mdp4_flush, > .preclose = mdp4_preclose, > .destroy = mdp4_destroy, > }, > diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h > index 715520c5..834454c 100644 > --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h > +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h > @@ -184,6 +184,7 @@ enum mdp4_pipe mdp4_plane_pipe(struct drm_plane *plane); > struct drm_plane *mdp4_plane_init(struct drm_device *dev, > enum mdp4_pipe pipe_id, bool private_plane); > > +void mdp4_crtc_flush(struct drm_crtc *crtc); > uint32_t mdp4_crtc_vblank(struct drm_crtc *crtc); > void mdp4_crtc_cancel_pending_flip(struct drm_crtc *crtc, struct drm_file *file); > void mdp4_crtc_set_config(struct drm_crtc *crtc, uint32_t config); > diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c > index 4c92985..f35848d 100644 > --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c > +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c > @@ -48,11 +48,6 @@ static int mdp4_plane_update(struct drm_plane *plane, > > mdp4_plane->enabled = true; > > - if (plane->fb) > - drm_framebuffer_unreference(plane->fb); > - > - drm_framebuffer_reference(fb); > - > return mdp4_plane_mode_set(plane, crtc, fb, > crtc_x, crtc_y, crtc_w, crtc_h, > src_x, src_y, src_w, src_h); > diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c > index 7f4ee99..0e74317 100644 > --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c > +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c > @@ -35,7 +35,6 @@ struct mdp5_crtc { > > /* if there is a pending flip, these will be non-null: */ > struct drm_pending_vblank_event *event; > - struct msm_fence_cb pageflip_cb; > > #define PENDING_CURSOR 0x1 > #define PENDING_FLIP 0x2 > @@ -73,13 +72,17 @@ static void request_pending(struct drm_crtc *crtc, uint32_t pending) > mdp_irq_register(&get_kms(crtc)->base, &mdp5_crtc->vblank); > } > > -static void crtc_flush(struct drm_crtc *crtc) > +void mdp5_crtc_flush(struct drm_crtc *crtc) > { > + struct msm_drm_private *priv = crtc->dev->dev_private; > struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc); > struct mdp5_kms *mdp5_kms = get_kms(crtc); > int id = mdp5_crtc->id; > uint32_t i, flush = 0; > > + if (priv->pending_crtcs & (1 << crtc->id)) > + return; > + > for (i = 0; i < ARRAY_SIZE(mdp5_crtc->planes); i++) { > struct drm_plane *plane = mdp5_crtc->planes[i]; > if (plane) { > @@ -124,7 +127,7 @@ static void update_scanout(struct drm_crtc *crtc, struct drm_framebuffer *fb) > * so that we can safely queue unref to current fb (ie. next > * vblank we know hw is done w/ previous scanout_fb). > */ > - crtc_flush(crtc); > + mdp5_crtc_flush(crtc); > > if (mdp5_crtc->scanout_fb) > drm_flip_work_queue(&mdp5_crtc->unref_fb_work, > @@ -165,21 +168,6 @@ static void complete_flip(struct drm_crtc *crtc, struct drm_file *file) > } > } > > -static void pageflip_cb(struct msm_fence_cb *cb) > -{ > - struct mdp5_crtc *mdp5_crtc = > - container_of(cb, struct mdp5_crtc, pageflip_cb); > - struct drm_crtc *crtc = &mdp5_crtc->base; > - struct drm_framebuffer *fb = mdp5_crtc->fb; > - > - if (!fb) > - return; > - > - drm_framebuffer_reference(fb); > - mdp5_plane_set_scanout(mdp5_crtc->plane, fb); > - update_scanout(crtc, fb); > -} > - > static void unref_fb_worker(struct drm_flip_work *work, void *val) > { > struct mdp5_crtc *mdp5_crtc = > @@ -324,7 +312,7 @@ static void mdp5_crtc_prepare(struct drm_crtc *crtc) > static void mdp5_crtc_commit(struct drm_crtc *crtc) > { > mdp5_crtc_dpms(crtc, DRM_MODE_DPMS_ON); > - crtc_flush(crtc); > + mdp5_crtc_flush(crtc); > /* drop the ref to mdp clk's that we got in prepare: */ > mdp5_disable(get_kms(crtc)); > } > @@ -366,23 +354,26 @@ static int mdp5_crtc_page_flip(struct drm_crtc *crtc, > { > struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc); > struct drm_device *dev = crtc->dev; > - struct drm_gem_object *obj; > unsigned long flags; > > + spin_lock_irqsave(&dev->event_lock, flags); > if (mdp5_crtc->event) { > dev_err(dev->dev, "already pending flip!\n"); > + spin_unlock_irqrestore(&dev->event_lock, flags); > return -EBUSY; > } > > - obj = msm_framebuffer_bo(new_fb, 0); > - > - spin_lock_irqsave(&dev->event_lock, flags); > mdp5_crtc->event = event; > spin_unlock_irqrestore(&dev->event_lock, flags); > > update_fb(crtc, new_fb); > > - return msm_gem_queue_inactive_cb(obj, &mdp5_crtc->pageflip_cb); > + /* grab extra ref for update_scanout() */ > + drm_framebuffer_reference(new_fb); > + mdp5_plane_set_scanout(mdp5_crtc->plane, new_fb); > + update_scanout(crtc, new_fb); > + > + return 0; > } > > static int mdp5_crtc_set_property(struct drm_crtc *crtc, > @@ -424,8 +415,13 @@ static void mdp5_crtc_vblank_irq(struct mdp_irq *irq, uint32_t irqstatus) > pending = atomic_xchg(&mdp5_crtc->pending, 0); > > if (pending & PENDING_FLIP) { > - complete_flip(crtc, NULL); > - drm_flip_work_commit(&mdp5_crtc->unref_fb_work, priv->wq); > + if (priv->pending_crtcs & (1 << crtc->id)) { > + /* our update hasn't been flushed yet: */ > + request_pending(crtc, PENDING_FLIP); > + } else { > + complete_flip(crtc, NULL); > + drm_flip_work_commit(&mdp5_crtc->unref_fb_work, priv->wq); > + } > } > } > > @@ -434,7 +430,7 @@ static void mdp5_crtc_err_irq(struct mdp_irq *irq, uint32_t irqstatus) > struct mdp5_crtc *mdp5_crtc = container_of(irq, struct mdp5_crtc, err); > struct drm_crtc *crtc = &mdp5_crtc->base; > DBG("%s: error: %08x", mdp5_crtc->name, irqstatus); > - crtc_flush(crtc); > + mdp5_crtc_flush(crtc); > } > > uint32_t mdp5_crtc_vblank(struct drm_crtc *crtc) > @@ -501,7 +497,7 @@ void mdp5_crtc_set_intf(struct drm_crtc *crtc, int intf, > MDP5_CTL_OP_MODE(MODE_NONE) | > MDP5_CTL_OP_INTF_NUM(intfnum[intf])); > > - crtc_flush(crtc); > + mdp5_crtc_flush(crtc); > } > > static void set_attach(struct drm_crtc *crtc, enum mdp5_pipe pipe_id, > @@ -517,7 +513,7 @@ static void set_attach(struct drm_crtc *crtc, enum mdp5_pipe pipe_id, > mdp5_crtc->planes[pipe_id] = plane; > blend_setup(crtc); > if (mdp5_crtc->enabled && (plane != mdp5_crtc->plane)) > - crtc_flush(crtc); > + mdp5_crtc_flush(crtc); > } > > void mdp5_crtc_attach(struct drm_crtc *crtc, struct drm_plane *plane) > @@ -563,8 +559,6 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev, > if (ret) > goto fail; > > - INIT_FENCE_CB(&mdp5_crtc->pageflip_cb, pageflip_cb); > - > drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp5_crtc_funcs); > drm_crtc_helper_add(crtc, &mdp5_crtc_helper_funcs); > > diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c > index ee8446c..01c3d70 100644 > --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c > +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c > @@ -91,6 +91,11 @@ static long mdp5_round_pixclk(struct msm_kms *kms, unsigned long rate, > return rate; > } > > +static void mdp5_flush(struct msm_kms *kms, struct drm_crtc *crtc) > +{ > + mdp5_crtc_flush(crtc); > +} > + > static void mdp5_preclose(struct msm_kms *kms, struct drm_file *file) > { > struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms)); > @@ -118,6 +123,7 @@ static const struct mdp_kms_funcs kms_funcs = { > .disable_vblank = mdp5_disable_vblank, > .get_format = mdp_get_format, > .round_pixclk = mdp5_round_pixclk, > + .flush = mdp5_flush, > .preclose = mdp5_preclose, > .destroy = mdp5_destroy, > }, > diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h > index c8b1a25..18b031b 100644 > --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h > +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h > @@ -197,8 +197,8 @@ enum mdp5_pipe mdp5_plane_pipe(struct drm_plane *plane); > struct drm_plane *mdp5_plane_init(struct drm_device *dev, > enum mdp5_pipe pipe, bool private_plane); > > +void mdp5_crtc_flush(struct drm_crtc *crtc); > uint32_t mdp5_crtc_vblank(struct drm_crtc *crtc); > - > void mdp5_crtc_cancel_pending_flip(struct drm_crtc *crtc, struct drm_file *file); > void mdp5_crtc_set_intf(struct drm_crtc *crtc, int intf, > enum mdp5_intf intf_id); > diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c > index 53cc8c6..f1bf3c2 100644 > --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c > +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c > @@ -48,11 +48,6 @@ static int mdp5_plane_update(struct drm_plane *plane, > > mdp5_plane->enabled = true; > > - if (plane->fb) > - drm_framebuffer_unreference(plane->fb); > - > - drm_framebuffer_reference(fb); > - > return mdp5_plane_mode_set(plane, crtc, fb, > crtc_x, crtc_y, crtc_w, crtc_h, > src_x, src_y, src_w, src_h); > diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c > new file mode 100644 > index 0000000..b231377 > --- /dev/null > +++ b/drivers/gpu/drm/msm/msm_atomic.c > @@ -0,0 +1,141 @@ > +/* > + * Copyright (C) 2013 Red Hat > + * Author: Rob Clark <robdclark@gmail.com> > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms of the GNU General Public License version 2 as published by > + * the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, but WITHOUT > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > + * more details. > + * > + * You should have received a copy of the GNU General Public License along with > + * this program. If not, see <http://www.gnu.org/licenses/>. > + */ > + > +#include "msm_drv.h" > +#include "msm_kms.h" > +#include "msm_gem.h" > + > +struct msm_async_commit { > + struct drm_atomic_state *state; > + uint32_t fence; > + struct msm_fence_cb fence_cb; > +}; > + > +static void fence_cb(struct msm_fence_cb *cb); > +static int commit_sync(struct drm_device *dev, void *state, bool unlocked); > + > +static struct msm_async_commit *new_commit(struct drm_atomic_state *state) > +{ > + struct msm_async_commit *c = kzalloc(sizeof(*c), GFP_KERNEL); > + > + if (!c) > + return NULL; > + > + drm_atomic_state_reference(state); > + c->state = state; > + INIT_FENCE_CB(&c->fence_cb, fence_cb); > + > + return c; > +} > +static void free_commit(struct msm_async_commit *c) > +{ > + drm_atomic_state_unreference(c->state); > + kfree(c); > +} > + > +static void fence_cb(struct msm_fence_cb *cb) > +{ > + struct msm_async_commit *c = > + container_of(cb, struct msm_async_commit, fence_cb); > + commit_sync(c->state->dev, c->state, true); > + free_commit(c); > +} > + > +static void add_fb(struct msm_async_commit *c, struct drm_crtc *crtc, > + struct drm_framebuffer *fb) > +{ > + struct drm_gem_object *obj = msm_framebuffer_bo(fb, 0); > + c->fence = max(c->fence, msm_gem_fence(to_msm_bo(obj), MSM_PREP_READ)); > +} > + > +static int wait_fb(struct drm_crtc *crtc, struct drm_framebuffer *fb) > +{ > + // XXX TODO wait.. > + return 0; > +} > + > +#define pending_fb(state) ((state) && (state)->fb && (state)->new_fb) > + > +static int commit_sync(struct drm_device *dev, void *state, bool unlocked) > +{ > + struct drm_atomic_state *a = state; > + struct msm_drm_private *priv = dev->dev_private; > + struct msm_kms *kms = priv->kms; > + int ncrtcs = dev->mode_config.num_crtc; > + uint32_t pending_crtcs = 0; > + int i, ret; > + > + for (i = 0; i < ncrtcs; i++) > + if (a->crtcs[i]) > + pending_crtcs |= (1 << a->crtcs[i]->id); > + > + mutex_lock(&dev->struct_mutex); > + WARN_ON(priv->pending_crtcs & pending_crtcs); > + priv->pending_crtcs |= pending_crtcs; > + mutex_unlock(&dev->struct_mutex); > + > + if (unlocked) > + ret = drm_atomic_commit_unlocked(dev, state); > + else > + ret = drm_atomic_commit(dev, state); > + > + mutex_lock(&dev->struct_mutex); > + priv->pending_crtcs &= ~pending_crtcs; > + mutex_unlock(&dev->struct_mutex); > + > + if (ret) > + return ret; > + > + for (i = 0; i < ncrtcs; i++) > + if (a->crtcs[i]) > + kms->funcs->flush(kms, a->crtcs[i]); > + > + return 0; > +} > + > +int msm_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state) > +{ > + struct drm_atomic_state *a = state; > + int nplanes = dev->mode_config.num_total_plane; > + int i; > + > + if (a->flags & DRM_MODE_ATOMIC_NONBLOCK) { > + /* non-block mode: defer commit until fb's are ready */ > + struct msm_async_commit *c = new_commit(state); > + > + if (!c) > + return -ENOMEM; > + > + for (i = 0; i < nplanes; i++) > + if (pending_fb(a->pstates[i])) > + add_fb(c, a->pstates[i]->crtc, a->pstates[i]->fb); > + > + return msm_queue_fence_cb(dev, &c->fence_cb, c->fence); > + } else { > + /* blocking mode: wait until fb's are ready */ > + int ret = 0; > + > + for (i = 0; i < nplanes && !ret; i++) > + if (pending_fb(a->pstates[i])) > + ret = wait_fb(a->pstates[i]->crtc, a->pstates[i]->fb); > + > + if (ret) > + return ret; > + > + return commit_sync(dev, state, false); > + } > +} Ok, I think I should have read your msm implementation a _lot_ earlier. Explains your desing choices neatly. Two observations: - A GO bit makes nuclear pageflips ridiculously easy to implement, presuming the hardware actually works. And it's the sane model, so imo a good one to wrap the atomic helpers around. But reality is often a lot more ugly, especially if you're employed by Intel. Which is why I'm harping so much on this helpers-vs-core interface issues ... We really need the full state transition in one piece to do anything useful. - msm doesn't have any resource sharing going on for modeset operations (which I mean lighting up crtcs to feed pixel streams to connectors). Which means the simplistic "loop over all crtcs and call the old ->setcrtc" approach actually works. The problem I see here is that if you have hardware with more elaborate needs (e.g. shared dplls), but otherwise sanity for plane updates (i.e. a GO bit) then your current atomic helpers will make it rather hard to mix this. So I think we should pimp the crtc helpers a bit to be atomic compliant (i.e. kill all outputs first before enabling anything new) and try to integrate this with the atomic helpers used for GO bit style updates. i915 has dpll sharing on ivb/hsw, but doesn't use the the crtc helpers anymore. But the radone eyefinity (or whatever the damn thing is called) I have lying around here fits the bill: It has 5 DP+ outputs but only 2 dplls. So you can drive e.g. 3 DP screens and then switch to 2 HDMI screens atomically and it should all pan out. But since your current helpers just loop over all crtcs without any regard to ordering constraints this will fall over if the 2 HDMI outputs get enabled before the 3 DP outputs get disabled all disabled. So with all that in mind I have 3 sanity checks for the atomic interfaces and helpers: 1. The atomic helpers should make enabling sane hw with a GO bit easy for nuclear pageflips. Benchmark would be sane hw like msm or omap. 2. It should cooperate with the crtc helpers such that hw with some shared resources (like dplls) works for atomic modesets. That pretty much means "disable everything (including crtc/connetor pipelines that only changed some parts) before enabling anything". Benchmark would be a platform with shared dplls. 3. The core->driver interface should be powerful enough to support insanity like i915, but no more. Which means all the code that's share (i.e. the set_prop code I've been harping all over the place about) should be done in the core, without any means for drivers to override. Currently the drm core also takes care of a bunch of things like basic locking and refcounting, and that's kinda the spirit for this. i915 is the obvious benchmark here. I think we can roll this out piecemeal (and it's probably better to do it that way), but I also think that until we've resolved the requirements of 2&3 we should try to minimize subsystem wide changes as much as possible by making them opt-in and the vfuncs optional. If you compare this approach with my review for Matt's universal plane patches it's exactly the same song. I hope this elaboration of my thinking clarifies all my review comments a bit and explains what I'm aiming for. Cheers, Daniel
On Mon, May 26, 2014 at 1:54 PM, Daniel Vetter <daniel@ffwll.ch> wrote: > On Sat, May 24, 2014 at 02:30:23PM -0400, Rob Clark wrote: >> Signed-off-by: Rob Clark <robdclark@gmail.com> >> --- >> drivers/gpu/drm/msm/Makefile | 1 + >> drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c | 57 ++++++------ >> drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c | 6 ++ >> drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h | 1 + >> drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c | 5 -- >> drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c | 56 ++++++------ >> drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c | 6 ++ >> drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h | 2 +- >> drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c | 5 -- >> drivers/gpu/drm/msm/msm_atomic.c | 141 ++++++++++++++++++++++++++++++ >> drivers/gpu/drm/msm/msm_drv.c | 22 ++++- >> drivers/gpu/drm/msm/msm_drv.h | 7 ++ >> drivers/gpu/drm/msm/msm_gem.c | 24 +---- >> drivers/gpu/drm/msm/msm_gem.h | 13 +++ >> drivers/gpu/drm/msm/msm_kms.h | 1 + >> 15 files changed, 253 insertions(+), 94 deletions(-) >> create mode 100644 drivers/gpu/drm/msm/msm_atomic.c >> >> diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile >> index 5e1e6b0..dd12f56 100644 >> --- a/drivers/gpu/drm/msm/Makefile >> +++ b/drivers/gpu/drm/msm/Makefile >> @@ -27,6 +27,7 @@ msm-y := \ >> mdp/mdp5/mdp5_kms.o \ >> mdp/mdp5/mdp5_plane.o \ >> mdp/mdp5/mdp5_smp.o \ >> + msm_atomic.o \ >> msm_drv.o \ >> msm_fb.o \ >> msm_gem.o \ >> diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c >> index d0d8befd..2fa6b75 100644 >> --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c >> +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c >> @@ -52,7 +52,6 @@ struct mdp4_crtc { >> >> /* if there is a pending flip, these will be non-null: */ >> struct drm_pending_vblank_event *event; >> - struct msm_fence_cb pageflip_cb; >> >> #define PENDING_CURSOR 0x1 >> #define PENDING_FLIP 0x2 >> @@ -93,12 +92,16 @@ static void request_pending(struct drm_crtc *crtc, uint32_t pending) >> mdp_irq_register(&get_kms(crtc)->base, &mdp4_crtc->vblank); >> } >> >> -static void crtc_flush(struct drm_crtc *crtc) >> +void mdp4_crtc_flush(struct drm_crtc *crtc) >> { >> + struct msm_drm_private *priv = crtc->dev->dev_private; >> struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); >> struct mdp4_kms *mdp4_kms = get_kms(crtc); >> uint32_t i, flush = 0; >> >> + if (priv->pending_crtcs & (1 << crtc->id)) >> + return; >> + >> for (i = 0; i < ARRAY_SIZE(mdp4_crtc->planes); i++) { >> struct drm_plane *plane = mdp4_crtc->planes[i]; >> if (plane) { >> @@ -142,7 +145,7 @@ static void update_scanout(struct drm_crtc *crtc, struct drm_framebuffer *fb) >> * so that we can safely queue unref to current fb (ie. next >> * vblank we know hw is done w/ previous scanout_fb). >> */ >> - crtc_flush(crtc); >> + mdp4_crtc_flush(crtc); >> >> if (mdp4_crtc->scanout_fb) >> drm_flip_work_queue(&mdp4_crtc->unref_fb_work, >> @@ -177,21 +180,6 @@ static void complete_flip(struct drm_crtc *crtc, struct drm_file *file) >> spin_unlock_irqrestore(&dev->event_lock, flags); >> } >> >> -static void pageflip_cb(struct msm_fence_cb *cb) >> -{ >> - struct mdp4_crtc *mdp4_crtc = >> - container_of(cb, struct mdp4_crtc, pageflip_cb); >> - struct drm_crtc *crtc = &mdp4_crtc->base; >> - struct drm_framebuffer *fb = crtc->primary->fb; >> - >> - if (!fb) >> - return; >> - >> - drm_framebuffer_reference(fb); >> - mdp4_plane_set_scanout(mdp4_crtc->plane, fb); >> - update_scanout(crtc, fb); >> -} >> - >> static void unref_fb_worker(struct drm_flip_work *work, void *val) >> { >> struct mdp4_crtc *mdp4_crtc = >> @@ -406,7 +394,7 @@ static void mdp4_crtc_prepare(struct drm_crtc *crtc) >> static void mdp4_crtc_commit(struct drm_crtc *crtc) >> { >> mdp4_crtc_dpms(crtc, DRM_MODE_DPMS_ON); >> - crtc_flush(crtc); >> + mdp4_crtc_flush(crtc); >> /* drop the ref to mdp clk's that we got in prepare: */ >> mdp4_disable(get_kms(crtc)); >> } >> @@ -448,23 +436,27 @@ static int mdp4_crtc_page_flip(struct drm_crtc *crtc, >> { >> struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); >> struct drm_device *dev = crtc->dev; >> - struct drm_gem_object *obj; >> unsigned long flags; >> >> + spin_lock_irqsave(&dev->event_lock, flags); >> if (mdp4_crtc->event) { >> dev_err(dev->dev, "already pending flip!\n"); >> + spin_unlock_irqrestore(&dev->event_lock, flags); >> return -EBUSY; >> } >> >> - obj = msm_framebuffer_bo(new_fb, 0); >> - >> - spin_lock_irqsave(&dev->event_lock, flags); >> mdp4_crtc->event = event; >> spin_unlock_irqrestore(&dev->event_lock, flags); >> >> update_fb(crtc, new_fb); >> >> - return msm_gem_queue_inactive_cb(obj, &mdp4_crtc->pageflip_cb); >> + >> + /* grab extra ref for update_scanout() */ >> + drm_framebuffer_reference(new_fb); >> + mdp4_plane_set_scanout(mdp4_crtc->plane, new_fb); >> + update_scanout(crtc, new_fb); >> + >> + return 0; >> } >> >> static int mdp4_crtc_set_property(struct drm_crtc *crtc, >> @@ -598,7 +590,7 @@ static int mdp4_crtc_cursor_move(struct drm_crtc *crtc, int x, int y) >> mdp4_crtc->cursor.y = y; >> spin_unlock_irqrestore(&mdp4_crtc->cursor.lock, flags); >> >> - crtc_flush(crtc); >> + mdp4_crtc_flush(crtc); >> request_pending(crtc, PENDING_CURSOR); >> >> return 0; >> @@ -635,8 +627,13 @@ static void mdp4_crtc_vblank_irq(struct mdp_irq *irq, uint32_t irqstatus) >> pending = atomic_xchg(&mdp4_crtc->pending, 0); >> >> if (pending & PENDING_FLIP) { >> - complete_flip(crtc, NULL); >> - drm_flip_work_commit(&mdp4_crtc->unref_fb_work, priv->wq); >> + if (priv->pending_crtcs & (1 << crtc->id)) { >> + /* our update hasn't been flushed yet: */ >> + request_pending(crtc, PENDING_FLIP); >> + } else { >> + complete_flip(crtc, NULL); >> + drm_flip_work_commit(&mdp4_crtc->unref_fb_work, priv->wq); >> + } >> } >> >> if (pending & PENDING_CURSOR) { >> @@ -650,7 +647,7 @@ static void mdp4_crtc_err_irq(struct mdp_irq *irq, uint32_t irqstatus) >> struct mdp4_crtc *mdp4_crtc = container_of(irq, struct mdp4_crtc, err); >> struct drm_crtc *crtc = &mdp4_crtc->base; >> DBG("%s: error: %08x", mdp4_crtc->name, irqstatus); >> - crtc_flush(crtc); >> + mdp4_crtc_flush(crtc); >> } >> >> uint32_t mdp4_crtc_vblank(struct drm_crtc *crtc) >> @@ -730,7 +727,7 @@ static void set_attach(struct drm_crtc *crtc, enum mdp4_pipe pipe_id, >> mdp4_crtc->planes[pipe_id] = plane; >> blend_setup(crtc); >> if (mdp4_crtc->enabled && (plane != mdp4_crtc->plane)) >> - crtc_flush(crtc); >> + mdp4_crtc_flush(crtc); >> } >> >> void mdp4_crtc_attach(struct drm_crtc *crtc, struct drm_plane *plane) >> @@ -792,8 +789,6 @@ struct drm_crtc *mdp4_crtc_init(struct drm_device *dev, >> ret = drm_flip_work_init(&mdp4_crtc->unref_cursor_work, 64, >> "unref cursor", unref_cursor_worker); >> >> - INIT_FENCE_CB(&mdp4_crtc->pageflip_cb, pageflip_cb); >> - >> drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp4_crtc_funcs); >> drm_crtc_helper_add(crtc, &mdp4_crtc_helper_funcs); >> >> diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c >> index 0bb4faa..af0bfb1 100644 >> --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c >> +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c >> @@ -131,6 +131,11 @@ static long mdp4_round_pixclk(struct msm_kms *kms, unsigned long rate, >> return mdp4_dtv_round_pixclk(encoder, rate); >> } >> >> +static void mdp4_flush(struct msm_kms *kms, struct drm_crtc *crtc) >> +{ >> + mdp4_crtc_flush(crtc); >> +} >> + >> static void mdp4_preclose(struct msm_kms *kms, struct drm_file *file) >> { >> struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms)); >> @@ -162,6 +167,7 @@ static const struct mdp_kms_funcs kms_funcs = { >> .disable_vblank = mdp4_disable_vblank, >> .get_format = mdp_get_format, >> .round_pixclk = mdp4_round_pixclk, >> + .flush = mdp4_flush, >> .preclose = mdp4_preclose, >> .destroy = mdp4_destroy, >> }, >> diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h >> index 715520c5..834454c 100644 >> --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h >> +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h >> @@ -184,6 +184,7 @@ enum mdp4_pipe mdp4_plane_pipe(struct drm_plane *plane); >> struct drm_plane *mdp4_plane_init(struct drm_device *dev, >> enum mdp4_pipe pipe_id, bool private_plane); >> >> +void mdp4_crtc_flush(struct drm_crtc *crtc); >> uint32_t mdp4_crtc_vblank(struct drm_crtc *crtc); >> void mdp4_crtc_cancel_pending_flip(struct drm_crtc *crtc, struct drm_file *file); >> void mdp4_crtc_set_config(struct drm_crtc *crtc, uint32_t config); >> diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c >> index 4c92985..f35848d 100644 >> --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c >> +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c >> @@ -48,11 +48,6 @@ static int mdp4_plane_update(struct drm_plane *plane, >> >> mdp4_plane->enabled = true; >> >> - if (plane->fb) >> - drm_framebuffer_unreference(plane->fb); >> - >> - drm_framebuffer_reference(fb); >> - >> return mdp4_plane_mode_set(plane, crtc, fb, >> crtc_x, crtc_y, crtc_w, crtc_h, >> src_x, src_y, src_w, src_h); >> diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c >> index 7f4ee99..0e74317 100644 >> --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c >> +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c >> @@ -35,7 +35,6 @@ struct mdp5_crtc { >> >> /* if there is a pending flip, these will be non-null: */ >> struct drm_pending_vblank_event *event; >> - struct msm_fence_cb pageflip_cb; >> >> #define PENDING_CURSOR 0x1 >> #define PENDING_FLIP 0x2 >> @@ -73,13 +72,17 @@ static void request_pending(struct drm_crtc *crtc, uint32_t pending) >> mdp_irq_register(&get_kms(crtc)->base, &mdp5_crtc->vblank); >> } >> >> -static void crtc_flush(struct drm_crtc *crtc) >> +void mdp5_crtc_flush(struct drm_crtc *crtc) >> { >> + struct msm_drm_private *priv = crtc->dev->dev_private; >> struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc); >> struct mdp5_kms *mdp5_kms = get_kms(crtc); >> int id = mdp5_crtc->id; >> uint32_t i, flush = 0; >> >> + if (priv->pending_crtcs & (1 << crtc->id)) >> + return; >> + >> for (i = 0; i < ARRAY_SIZE(mdp5_crtc->planes); i++) { >> struct drm_plane *plane = mdp5_crtc->planes[i]; >> if (plane) { >> @@ -124,7 +127,7 @@ static void update_scanout(struct drm_crtc *crtc, struct drm_framebuffer *fb) >> * so that we can safely queue unref to current fb (ie. next >> * vblank we know hw is done w/ previous scanout_fb). >> */ >> - crtc_flush(crtc); >> + mdp5_crtc_flush(crtc); >> >> if (mdp5_crtc->scanout_fb) >> drm_flip_work_queue(&mdp5_crtc->unref_fb_work, >> @@ -165,21 +168,6 @@ static void complete_flip(struct drm_crtc *crtc, struct drm_file *file) >> } >> } >> >> -static void pageflip_cb(struct msm_fence_cb *cb) >> -{ >> - struct mdp5_crtc *mdp5_crtc = >> - container_of(cb, struct mdp5_crtc, pageflip_cb); >> - struct drm_crtc *crtc = &mdp5_crtc->base; >> - struct drm_framebuffer *fb = mdp5_crtc->fb; >> - >> - if (!fb) >> - return; >> - >> - drm_framebuffer_reference(fb); >> - mdp5_plane_set_scanout(mdp5_crtc->plane, fb); >> - update_scanout(crtc, fb); >> -} >> - >> static void unref_fb_worker(struct drm_flip_work *work, void *val) >> { >> struct mdp5_crtc *mdp5_crtc = >> @@ -324,7 +312,7 @@ static void mdp5_crtc_prepare(struct drm_crtc *crtc) >> static void mdp5_crtc_commit(struct drm_crtc *crtc) >> { >> mdp5_crtc_dpms(crtc, DRM_MODE_DPMS_ON); >> - crtc_flush(crtc); >> + mdp5_crtc_flush(crtc); >> /* drop the ref to mdp clk's that we got in prepare: */ >> mdp5_disable(get_kms(crtc)); >> } >> @@ -366,23 +354,26 @@ static int mdp5_crtc_page_flip(struct drm_crtc *crtc, >> { >> struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc); >> struct drm_device *dev = crtc->dev; >> - struct drm_gem_object *obj; >> unsigned long flags; >> >> + spin_lock_irqsave(&dev->event_lock, flags); >> if (mdp5_crtc->event) { >> dev_err(dev->dev, "already pending flip!\n"); >> + spin_unlock_irqrestore(&dev->event_lock, flags); >> return -EBUSY; >> } >> >> - obj = msm_framebuffer_bo(new_fb, 0); >> - >> - spin_lock_irqsave(&dev->event_lock, flags); >> mdp5_crtc->event = event; >> spin_unlock_irqrestore(&dev->event_lock, flags); >> >> update_fb(crtc, new_fb); >> >> - return msm_gem_queue_inactive_cb(obj, &mdp5_crtc->pageflip_cb); >> + /* grab extra ref for update_scanout() */ >> + drm_framebuffer_reference(new_fb); >> + mdp5_plane_set_scanout(mdp5_crtc->plane, new_fb); >> + update_scanout(crtc, new_fb); >> + >> + return 0; >> } >> >> static int mdp5_crtc_set_property(struct drm_crtc *crtc, >> @@ -424,8 +415,13 @@ static void mdp5_crtc_vblank_irq(struct mdp_irq *irq, uint32_t irqstatus) >> pending = atomic_xchg(&mdp5_crtc->pending, 0); >> >> if (pending & PENDING_FLIP) { >> - complete_flip(crtc, NULL); >> - drm_flip_work_commit(&mdp5_crtc->unref_fb_work, priv->wq); >> + if (priv->pending_crtcs & (1 << crtc->id)) { >> + /* our update hasn't been flushed yet: */ >> + request_pending(crtc, PENDING_FLIP); >> + } else { >> + complete_flip(crtc, NULL); >> + drm_flip_work_commit(&mdp5_crtc->unref_fb_work, priv->wq); >> + } >> } >> } >> >> @@ -434,7 +430,7 @@ static void mdp5_crtc_err_irq(struct mdp_irq *irq, uint32_t irqstatus) >> struct mdp5_crtc *mdp5_crtc = container_of(irq, struct mdp5_crtc, err); >> struct drm_crtc *crtc = &mdp5_crtc->base; >> DBG("%s: error: %08x", mdp5_crtc->name, irqstatus); >> - crtc_flush(crtc); >> + mdp5_crtc_flush(crtc); >> } >> >> uint32_t mdp5_crtc_vblank(struct drm_crtc *crtc) >> @@ -501,7 +497,7 @@ void mdp5_crtc_set_intf(struct drm_crtc *crtc, int intf, >> MDP5_CTL_OP_MODE(MODE_NONE) | >> MDP5_CTL_OP_INTF_NUM(intfnum[intf])); >> >> - crtc_flush(crtc); >> + mdp5_crtc_flush(crtc); >> } >> >> static void set_attach(struct drm_crtc *crtc, enum mdp5_pipe pipe_id, >> @@ -517,7 +513,7 @@ static void set_attach(struct drm_crtc *crtc, enum mdp5_pipe pipe_id, >> mdp5_crtc->planes[pipe_id] = plane; >> blend_setup(crtc); >> if (mdp5_crtc->enabled && (plane != mdp5_crtc->plane)) >> - crtc_flush(crtc); >> + mdp5_crtc_flush(crtc); >> } >> >> void mdp5_crtc_attach(struct drm_crtc *crtc, struct drm_plane *plane) >> @@ -563,8 +559,6 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev, >> if (ret) >> goto fail; >> >> - INIT_FENCE_CB(&mdp5_crtc->pageflip_cb, pageflip_cb); >> - >> drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp5_crtc_funcs); >> drm_crtc_helper_add(crtc, &mdp5_crtc_helper_funcs); >> >> diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c >> index ee8446c..01c3d70 100644 >> --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c >> +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c >> @@ -91,6 +91,11 @@ static long mdp5_round_pixclk(struct msm_kms *kms, unsigned long rate, >> return rate; >> } >> >> +static void mdp5_flush(struct msm_kms *kms, struct drm_crtc *crtc) >> +{ >> + mdp5_crtc_flush(crtc); >> +} >> + >> static void mdp5_preclose(struct msm_kms *kms, struct drm_file *file) >> { >> struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms)); >> @@ -118,6 +123,7 @@ static const struct mdp_kms_funcs kms_funcs = { >> .disable_vblank = mdp5_disable_vblank, >> .get_format = mdp_get_format, >> .round_pixclk = mdp5_round_pixclk, >> + .flush = mdp5_flush, >> .preclose = mdp5_preclose, >> .destroy = mdp5_destroy, >> }, >> diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h >> index c8b1a25..18b031b 100644 >> --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h >> +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h >> @@ -197,8 +197,8 @@ enum mdp5_pipe mdp5_plane_pipe(struct drm_plane *plane); >> struct drm_plane *mdp5_plane_init(struct drm_device *dev, >> enum mdp5_pipe pipe, bool private_plane); >> >> +void mdp5_crtc_flush(struct drm_crtc *crtc); >> uint32_t mdp5_crtc_vblank(struct drm_crtc *crtc); >> - >> void mdp5_crtc_cancel_pending_flip(struct drm_crtc *crtc, struct drm_file *file); >> void mdp5_crtc_set_intf(struct drm_crtc *crtc, int intf, >> enum mdp5_intf intf_id); >> diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c >> index 53cc8c6..f1bf3c2 100644 >> --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c >> +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c >> @@ -48,11 +48,6 @@ static int mdp5_plane_update(struct drm_plane *plane, >> >> mdp5_plane->enabled = true; >> >> - if (plane->fb) >> - drm_framebuffer_unreference(plane->fb); >> - >> - drm_framebuffer_reference(fb); >> - >> return mdp5_plane_mode_set(plane, crtc, fb, >> crtc_x, crtc_y, crtc_w, crtc_h, >> src_x, src_y, src_w, src_h); >> diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c >> new file mode 100644 >> index 0000000..b231377 >> --- /dev/null >> +++ b/drivers/gpu/drm/msm/msm_atomic.c >> @@ -0,0 +1,141 @@ >> +/* >> + * Copyright (C) 2013 Red Hat >> + * Author: Rob Clark <robdclark@gmail.com> >> + * >> + * This program is free software; you can redistribute it and/or modify it >> + * under the terms of the GNU General Public License version 2 as published by >> + * the Free Software Foundation. >> + * >> + * This program is distributed in the hope that it will be useful, but WITHOUT >> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or >> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for >> + * more details. >> + * >> + * You should have received a copy of the GNU General Public License along with >> + * this program. If not, see <http://www.gnu.org/licenses/>. >> + */ >> + >> +#include "msm_drv.h" >> +#include "msm_kms.h" >> +#include "msm_gem.h" >> + >> +struct msm_async_commit { >> + struct drm_atomic_state *state; >> + uint32_t fence; >> + struct msm_fence_cb fence_cb; >> +}; >> + >> +static void fence_cb(struct msm_fence_cb *cb); >> +static int commit_sync(struct drm_device *dev, void *state, bool unlocked); >> + >> +static struct msm_async_commit *new_commit(struct drm_atomic_state *state) >> +{ >> + struct msm_async_commit *c = kzalloc(sizeof(*c), GFP_KERNEL); >> + >> + if (!c) >> + return NULL; >> + >> + drm_atomic_state_reference(state); >> + c->state = state; >> + INIT_FENCE_CB(&c->fence_cb, fence_cb); >> + >> + return c; >> +} >> +static void free_commit(struct msm_async_commit *c) >> +{ >> + drm_atomic_state_unreference(c->state); >> + kfree(c); >> +} >> + >> +static void fence_cb(struct msm_fence_cb *cb) >> +{ >> + struct msm_async_commit *c = >> + container_of(cb, struct msm_async_commit, fence_cb); >> + commit_sync(c->state->dev, c->state, true); >> + free_commit(c); >> +} >> + >> +static void add_fb(struct msm_async_commit *c, struct drm_crtc *crtc, >> + struct drm_framebuffer *fb) >> +{ >> + struct drm_gem_object *obj = msm_framebuffer_bo(fb, 0); >> + c->fence = max(c->fence, msm_gem_fence(to_msm_bo(obj), MSM_PREP_READ)); >> +} >> + >> +static int wait_fb(struct drm_crtc *crtc, struct drm_framebuffer *fb) >> +{ >> + // XXX TODO wait.. >> + return 0; >> +} >> + >> +#define pending_fb(state) ((state) && (state)->fb && (state)->new_fb) >> + >> +static int commit_sync(struct drm_device *dev, void *state, bool unlocked) >> +{ >> + struct drm_atomic_state *a = state; >> + struct msm_drm_private *priv = dev->dev_private; >> + struct msm_kms *kms = priv->kms; >> + int ncrtcs = dev->mode_config.num_crtc; >> + uint32_t pending_crtcs = 0; >> + int i, ret; >> + >> + for (i = 0; i < ncrtcs; i++) >> + if (a->crtcs[i]) >> + pending_crtcs |= (1 << a->crtcs[i]->id); >> + >> + mutex_lock(&dev->struct_mutex); >> + WARN_ON(priv->pending_crtcs & pending_crtcs); >> + priv->pending_crtcs |= pending_crtcs; >> + mutex_unlock(&dev->struct_mutex); >> + >> + if (unlocked) >> + ret = drm_atomic_commit_unlocked(dev, state); >> + else >> + ret = drm_atomic_commit(dev, state); >> + >> + mutex_lock(&dev->struct_mutex); >> + priv->pending_crtcs &= ~pending_crtcs; >> + mutex_unlock(&dev->struct_mutex); >> + >> + if (ret) >> + return ret; >> + >> + for (i = 0; i < ncrtcs; i++) >> + if (a->crtcs[i]) >> + kms->funcs->flush(kms, a->crtcs[i]); >> + >> + return 0; >> +} >> + >> +int msm_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state) >> +{ >> + struct drm_atomic_state *a = state; >> + int nplanes = dev->mode_config.num_total_plane; >> + int i; >> + >> + if (a->flags & DRM_MODE_ATOMIC_NONBLOCK) { >> + /* non-block mode: defer commit until fb's are ready */ >> + struct msm_async_commit *c = new_commit(state); >> + >> + if (!c) >> + return -ENOMEM; >> + >> + for (i = 0; i < nplanes; i++) >> + if (pending_fb(a->pstates[i])) >> + add_fb(c, a->pstates[i]->crtc, a->pstates[i]->fb); >> + >> + return msm_queue_fence_cb(dev, &c->fence_cb, c->fence); >> + } else { >> + /* blocking mode: wait until fb's are ready */ >> + int ret = 0; >> + >> + for (i = 0; i < nplanes && !ret; i++) >> + if (pending_fb(a->pstates[i])) >> + ret = wait_fb(a->pstates[i]->crtc, a->pstates[i]->fb); >> + >> + if (ret) >> + return ret; >> + >> + return commit_sync(dev, state, false); >> + } >> +} > > Ok, I think I should have read your msm implementation a _lot_ earlier. > Explains your desing choices neatly. > > Two observations: > > - A GO bit makes nuclear pageflips ridiculously easy to implement, > presuming the hardware actually works. And it's the sane model, so imo a > good one to wrap the atomic helpers around. > > But reality is often a lot more ugly, especially if you're employed by > Intel. Which is why I'm harping so much on this helpers-vs-core > interface issues ... We really need the full state transition in one > piece to do anything useful. > > - msm doesn't have any resource sharing going on for modeset operations > (which I mean lighting up crtcs to feed pixel streams to connectors). > Which means the simplistic "loop over all crtcs and call the old > ->setcrtc" approach actually works. we do actually have some shared resources on mdp5 generation (the "smp" blocks, basically a shared buffer which we need to allocate fifo space from for each plane).. I'm mostly ignoring this at the moment, because we don't support enough crtc's yet to run out of smp blocks. But hooking in at the current ->atomic_commit() would be enough for me, I think. Tbh, it is something that should be handled at the ->atomic_check() stage so I hadn't given much though to ->atomic_commit() stage. That all said, I've no problem with adding one more hook, after ->atomic_commit() and lock magic, before loop over planes/crtcs, so drivers that need to can replace the loops and do their own thing. Current state should be reasonably good for sane hw. I'm just waiting for patches for i915 to add whatever it needs. > The problem I see here is that if you have hardware with more elaborate > needs (e.g. shared dplls), but otherwise sanity for plane updates (i.e. > a GO bit) then your current atomic helpers will make it rather hard to > mix this. So I think we should pimp the crtc helpers a bit to be atomic > compliant (i.e. kill all outputs first before enabling anything new) and > try to integrate this with the atomic helpers used for GO bit style > updates. Not really, I don't think. You can still do whatever shared resource setup in ->atomic_commit(). For drivers which want to defer commit (ie. doing commit after fb's ready from cpu, rather than from gpu), it would probably be more convenient to split up atomic_commit() so drivers have a post lock hook. I think it is just a few line patch, and I think that it probably isn't really needed until i915 is ready. I'm pretty sure we can change this to do what i915 needs, while at the same time keeping it simple for 'GO bit' drivers. The bit about crtc helpers, I'll have to think about. I'm not sure that we want to require that 'atomic' (modeset or pageflip) completely *replaces* current state. So disabling unrelated crtcs doesn't seem like the right thing. But we have some time to decide about the semantics of an atomic modeset before we expose to userspace. > i915 has dpll sharing on ivb/hsw, but doesn't use the the crtc helpers > anymore. But the radone eyefinity (or whatever the damn thing is called) > I have lying around here fits the bill: It has 5 DP+ outputs but only 2 > dplls. So you can drive e.g. 3 DP screens and then switch to 2 HDMI > screens atomically and it should all pan out. > > But since your current helpers just loop over all crtcs without any > regard to ordering constraints this will fall over if the 2 HDMI outputs > get enabled before the 3 DP outputs get disabled all disabled. the driver should have rejected the config before it gets to commit stage, if it were an impossible config. > So with all that in mind I have 3 sanity checks for the atomic interfaces > and helpers: > > 1. The atomic helpers should make enabling sane hw with a GO bit easy for > nuclear pageflips. Benchmark would be sane hw like msm or omap. > > 2. It should cooperate with the crtc helpers such that hw with some shared > resources (like dplls) works for atomic modesets. That pretty much means > "disable everything (including crtc/connetor pipelines that only changed > some parts) before enabling anything". Benchmark would be a platform with > shared dplls. btw, not something I'd thought about before, but shared dplls seem common enough, that it is something core could help with. (Assuming they are all related to pixel clk and not some derived thing that core wouldn't know about.) I think we do need to decide what partial state updates me in the context of modeset or pageflip. I kinda think the right thing is different for modeset vs pageflip. Maybe we want to define an atomic flag to mean "disable/discard everything else".. at any rate, we only need to sort that before exposing the API to userspace. > 3. The core->driver interface should be powerful enough to support > insanity like i915, but no more. Which means all the code that's share > (i.e. the set_prop code I've been harping all over the place about) should > be done in the core, without any means for drivers to override. Currently > the drm core also takes care of a bunch of things like basic locking and > refcounting, and that's kinda the spirit for this. i915 is the obvious > benchmark here. The more I think about it, the more I think we should leave set_prop as it is (although possibly with drm_atomic_state -> drm_{plane,crtc,etc}_state). In the past, before primary plane, I really needed this. And I expect having a convenient way to 'sniff' changing properties as they go by will be useful in some cases. > I think we can roll this out piecemeal (and it's probably better to do it > that way), but I also think that until we've resolved the requirements of > 2&3 we should try to minimize subsystem wide changes as much as possible > by making them opt-in and the vfuncs optional. the new mandatory vfuncs don't amount to too much churn.. not as much as the extra param for crtc lock/unlock fxns. Maybe if I was planning on keeping this alive on a branch for so long in the beginning I would have arranged a few things slightly different, but meh. A very early iteration of atomic preserved the old pageflip, setcrtc, setplane, etc paths for drivers not implementing atomic fxns. But that was at least a bit ugly. I like the current approach since the code paths are very much the same for atomic and non-atomic. BR, -R > If you compare this approach with my review for Matt's universal plane > patches it's exactly the same song. > > I hope this elaboration of my thinking clarifies all my review comments a > bit and explains what I'm aiming for. > > Cheers, Daniel > -- > Daniel Vetter > Software Engineer, Intel Corporation > +41 (0) 79 365 57 48 - http://blog.ffwll.ch
On Tue, May 27, 2014 at 11:58:33AM -0400, Rob Clark wrote: > On Mon, May 26, 2014 at 1:54 PM, Daniel Vetter <daniel@ffwll.ch> wrote: > > Ok, I think I should have read your msm implementation a _lot_ earlier. > > Explains your desing choices neatly. > > > > Two observations: > > > > - A GO bit makes nuclear pageflips ridiculously easy to implement, > > presuming the hardware actually works. And it's the sane model, so imo a > > good one to wrap the atomic helpers around. > > > > But reality is often a lot more ugly, especially if you're employed by > > Intel. Which is why I'm harping so much on this helpers-vs-core > > interface issues ... We really need the full state transition in one > > piece to do anything useful. > > > > - msm doesn't have any resource sharing going on for modeset operations > > (which I mean lighting up crtcs to feed pixel streams to connectors). > > Which means the simplistic "loop over all crtcs and call the old > > ->setcrtc" approach actually works. > > we do actually have some shared resources on mdp5 generation (the > "smp" blocks, basically a shared buffer which we need to allocate fifo > space from for each plane).. > > I'm mostly ignoring this at the moment, because we don't support > enough crtc's yet to run out of smp blocks. But hooking in at the > current ->atomic_commit() would be enough for me, I think. Tbh, it is > something that should be handled at the ->atomic_check() stage so I > hadn't given much though to ->atomic_commit() stage. > > That all said, I've no problem with adding one more hook, after > ->atomic_commit() and lock magic, before loop over planes/crtcs, so > drivers that need to can replace the loops and do their own thing. > Current state should be reasonably good for sane hw. I'm just waiting > for patches for i915 to add whatever it needs. I don't think that will be enough for you. Example, slightly hypothetical: - Configuration A: crtc 0 displays a small screen, crtc 1 a giant screen. Together they just barely fit into your fifo space. - Configuration B: crtc 0 drives a giant screen, crtc 1 a tiny one. Presume different outputs here so that no implicit output stealing happens upon mode switching. Atomic switch should work, but don't since if you just loop over crtcs you have the intermediate stage where you try to drive 2 giant screens, which will run out of fifo resources. And I think it's really common to have such implicit resource sharing, maybe except for the most beefy desktop gpu which simply can't run out of memory bw with today's screen. So afaics you really need to push a bit of smarts into the crtc helpers to make atomic possible, which then leaves you with interaction issues between the atomic stuff for GO bit capable hw for plane updates and the modeset ordering hell. > > The problem I see here is that if you have hardware with more elaborate > > needs (e.g. shared dplls), but otherwise sanity for plane updates (i.e. > > a GO bit) then your current atomic helpers will make it rather hard to > > mix this. So I think we should pimp the crtc helpers a bit to be atomic > > compliant (i.e. kill all outputs first before enabling anything new) and > > try to integrate this with the atomic helpers used for GO bit style > > updates. > > Not really, I don't think. You can still do whatever shared resource > setup in ->atomic_commit(). For drivers which want to defer commit > (ie. doing commit after fb's ready from cpu, rather than from gpu), it > would probably be more convenient to split up atomic_commit() so > drivers have a post lock hook. I think it is just a few line patch, > and I think that it probably isn't really needed until i915 is ready. > I'm pretty sure we can change this to do what i915 needs, while at the > same time keeping it simple for 'GO bit' drivers. > > The bit about crtc helpers, I'll have to think about. I'm not sure > that we want to require that 'atomic' (modeset or pageflip) completely > *replaces* current state. So disabling unrelated crtcs doesn't seem > like the right thing. But we have some time to decide about the > semantics of an atomic modeset before we expose to userspace. I'm not talking about replacing unrelated crtcs. It's also not about underspecified state or about enabling/changing global resources. It is _only_ about ordering of the various operations: If both the current and the desired new configuration are at the limit of what the hw can do you can't switch to the new configuration by looping over all crtcs. The fact that this doesn't work is the entire point of atomic modesets. And if we have helpers which aren't cut out for the task at hand for any hw where we need to have atomic modesets to make such changes possible without userspace going nuts, then the helpers are imo simply not useful > > i915 has dpll sharing on ivb/hsw, but doesn't use the the crtc helpers > > anymore. But the radone eyefinity (or whatever the damn thing is called) > > I have lying around here fits the bill: It has 5 DP+ outputs but only 2 > > dplls. So you can drive e.g. 3 DP screens and then switch to 2 HDMI > > screens atomically and it should all pan out. > > > > But since your current helpers just loop over all crtcs without any > > regard to ordering constraints this will fall over if the 2 HDMI outputs > > get enabled before the 3 DP outputs get disabled all disabled. > > the driver should have rejected the config before it gets to commit > stage, if it were an impossible config. The configuration _is_ possible. We simply have to be somewhat careful with transitioning to it, since not _all_ intermediate states are possible. Your current helpers presume that's the case, which afaik isn't the case on any hw where we have global limits. For modesets. Nuclear pageflips are a completely different thing, as long as your hw has a GO bit. > > So with all that in mind I have 3 sanity checks for the atomic interfaces > > and helpers: > > > > 1. The atomic helpers should make enabling sane hw with a GO bit easy for > > nuclear pageflips. Benchmark would be sane hw like msm or omap. > > > > 2. It should cooperate with the crtc helpers such that hw with some shared > > resources (like dplls) works for atomic modesets. That pretty much means > > "disable everything (including crtc/connetor pipelines that only changed > > some parts) before enabling anything". Benchmark would be a platform with > > shared dplls. > > btw, not something I'd thought about before, but shared dplls seem > common enough, that it is something core could help with. (Assuming > they are all related to pixel clk and not some derived thing that core > wouldn't know about.) Yup, that's what I'm trying to say ;-) But it was just an example, I believe that atm your helpers don't help for any shared modeset resources at all. > I think we do need to decide what partial state updates me in the > context of modeset or pageflip. I kinda think the right thing is > different for modeset vs pageflip. Maybe we want to define an atomic > flag to mean "disable/discard everything else".. at any rate, we only > need to sort that before exposing the API to userspace. Yeah, I still strongly support this split in the api itself. For i915 my plan is to have separate configuration structures for modeset state and pageflip/plane config state. When we do an atomic modeset we compute both (maybe with some shortcut if we know that the pipe config didn't change at all). Then comes the big switch: - If we have the same pipe config, we simply to a vblank synce nuclear flip to the new config. - If pipe configs change it will be much more elaborate: 1. Do a vblank synced nuclear flip to black on all pipes that need to go off (whether they get disabled or reconfigured doesn't matter for now). 2. Disable pipes. 3. Commit new state on the sw side. 4. Enable all pipes with the new config, but only scanning out black. 5. Do a vblank-synced nuclear flip to the new config. This would also be the one that would signale the drm events that the atomic update completed. For fastboot we might need some hacks to fuse this all together, e.g for some panel fitter changes we don't need to disable the pipe completely. But that's the advanced stuff really. I think modelling the crtc helpers after this model could work. But that means that the crtc helpers and the nuclear flip atomic helpers for GO bit capable hw need to be rather tightly integrated, while still allowing drivers to override the nuclear flip parts. > > 3. The core->driver interface should be powerful enough to support > > insanity like i915, but no more. Which means all the code that's share > > (i.e. the set_prop code I've been harping all over the place about) should > > be done in the core, without any means for drivers to override. Currently > > the drm core also takes care of a bunch of things like basic locking and > > refcounting, and that's kinda the spirit for this. i915 is the obvious > > benchmark here. > > The more I think about it, the more I think we should leave set_prop > as it is (although possibly with drm_atomic_state -> > drm_{plane,crtc,etc}_state). > > In the past, before primary plane, I really needed this. And I expect > having a convenient way to 'sniff' changing properties as they go by > will be useful in some cases. I actually really like the addition of the state object to ->set_prop. At least for i915 (which already has a fair pile of additional properties) that looks like the perfect way to prep the stage. But at least for the transition we should keep the impact minimal. So no manadatory callbacks and don't enforce the usage of the state object until the drm core is fully converted to always follow a set_prop with a ->commit. Since currently we have internal mode_set calls in all i915 set_prop functions and we need to convert them all. But we can't do that until all the core stuff uses the atomic interface for all legacy ioctl ops. > > I think we can roll this out piecemeal (and it's probably better to do it > > that way), but I also think that until we've resolved the requirements of > > 2&3 we should try to minimize subsystem wide changes as much as possible > > by making them opt-in and the vfuncs optional. > > the new mandatory vfuncs don't amount to too much churn.. not as much > as the extra param for crtc lock/unlock fxns. Maybe if I was planning > on keeping this alive on a branch for so long in the beginning I would > have arranged a few things slightly different, but meh. > > A very early iteration of atomic preserved the old pageflip, setcrtc, > setplane, etc paths for drivers not implementing atomic fxns. But > that was at least a bit ugly. I like the current approach since the > code paths are very much the same for atomic and non-atomic. I'm not advertising for keeping the old driver interfaces, that would be much worse. I'm only saying that we'll likely change this a few times, and we will transition fairly slowly anyway. Reducing the overall churn and especially reducing the amount of code we might need to fix up if we change our minds seems like a good approach to me. -Daniel
On Tue, May 27, 2014 at 1:50 PM, Daniel Vetter <daniel@ffwll.ch> wrote: > On Tue, May 27, 2014 at 11:58:33AM -0400, Rob Clark wrote: >> On Mon, May 26, 2014 at 1:54 PM, Daniel Vetter <daniel@ffwll.ch> wrote: >> > Ok, I think I should have read your msm implementation a _lot_ earlier. >> > Explains your desing choices neatly. >> > >> > Two observations: >> > >> > - A GO bit makes nuclear pageflips ridiculously easy to implement, >> > presuming the hardware actually works. And it's the sane model, so imo a >> > good one to wrap the atomic helpers around. >> > >> > But reality is often a lot more ugly, especially if you're employed by >> > Intel. Which is why I'm harping so much on this helpers-vs-core >> > interface issues ... We really need the full state transition in one >> > piece to do anything useful. >> > >> > - msm doesn't have any resource sharing going on for modeset operations >> > (which I mean lighting up crtcs to feed pixel streams to connectors). >> > Which means the simplistic "loop over all crtcs and call the old >> > ->setcrtc" approach actually works. >> >> we do actually have some shared resources on mdp5 generation (the >> "smp" blocks, basically a shared buffer which we need to allocate fifo >> space from for each plane).. >> >> I'm mostly ignoring this at the moment, because we don't support >> enough crtc's yet to run out of smp blocks. But hooking in at the >> current ->atomic_commit() would be enough for me, I think. Tbh, it is >> something that should be handled at the ->atomic_check() stage so I >> hadn't given much though to ->atomic_commit() stage. >> >> That all said, I've no problem with adding one more hook, after >> ->atomic_commit() and lock magic, before loop over planes/crtcs, so >> drivers that need to can replace the loops and do their own thing. >> Current state should be reasonably good for sane hw. I'm just waiting >> for patches for i915 to add whatever it needs. > > I don't think that will be enough for you. Example, slightly hypothetical: > > - Configuration A: crtc 0 displays a small screen, crtc 1 a giant screen. > Together they just barely fit into your fifo space. > > - Configuration B: crtc 0 drives a giant screen, crtc 1 a tiny one. > Presume different outputs here so that no implicit output stealing > happens upon mode switching. > > Atomic switch should work, but don't since if you just loop over crtcs you > have the intermediate stage where you try to drive 2 giant screens, which > will run out of fifo resources. And I think it's really common to have > such implicit resource sharing, maybe except for the most beefy desktop > gpu which simply can't run out of memory bw with today's screen. Well, this situation is a bit problematic in other similar cases.. moving plane between crtc's which needs to wait on a vblank is another vaguely similar case. I was kinda thinking of punting on that one (ie. -EBUSY and userspace tries again on next frame). Maybe for modeset that doesn't work out so well, since frame N+1 you'll still be at configuration A and have the same problem. Would be kinda nice if helpers could order things according to what decreases resource requirements vs what increases. Although we could probably get a lot of mileage out of just making the 'atomic loop over things helper' apply config for crtcs/planes which will be disabled first, before the ones which will be enabled at the end of the commit. Hmm. Either way, if you have to replace 'atomic loop over things helper' with your own i915 specific thing, it shouldn't make much difference to you. And I don't really think we need to solve all the worlds problems in the first version. But seems like there could be some value in making helpers a bit more aware of shared resource constraints. > So afaics you really need to push a bit of smarts into the crtc helpers to > make atomic possible, which then leaves you with interaction issues > between the atomic stuff for GO bit capable hw for plane updates and the > modeset ordering hell. > >> > The problem I see here is that if you have hardware with more elaborate >> > needs (e.g. shared dplls), but otherwise sanity for plane updates (i.e. >> > a GO bit) then your current atomic helpers will make it rather hard to >> > mix this. So I think we should pimp the crtc helpers a bit to be atomic >> > compliant (i.e. kill all outputs first before enabling anything new) and >> > try to integrate this with the atomic helpers used for GO bit style >> > updates. >> >> Not really, I don't think. You can still do whatever shared resource >> setup in ->atomic_commit(). For drivers which want to defer commit >> (ie. doing commit after fb's ready from cpu, rather than from gpu), it >> would probably be more convenient to split up atomic_commit() so >> drivers have a post lock hook. I think it is just a few line patch, >> and I think that it probably isn't really needed until i915 is ready. >> I'm pretty sure we can change this to do what i915 needs, while at the >> same time keeping it simple for 'GO bit' drivers. >> >> The bit about crtc helpers, I'll have to think about. I'm not sure >> that we want to require that 'atomic' (modeset or pageflip) completely >> *replaces* current state. So disabling unrelated crtcs doesn't seem >> like the right thing. But we have some time to decide about the >> semantics of an atomic modeset before we expose to userspace. > > I'm not talking about replacing unrelated crtcs. It's also not about > underspecified state or about enabling/changing global resources. > > It is _only_ about ordering of the various operations: If both the > current and the desired new configuration are at the limit of what the hw > can do you can't switch to the new configuration by looping over all > crtcs. The fact that this doesn't work is the entire point of atomic > modesets. And if we have helpers which aren't cut out for the task at hand > for any hw where we need to have atomic modesets to make such changes > possible without userspace going nuts, then the helpers are imo simply not > useful > >> > i915 has dpll sharing on ivb/hsw, but doesn't use the the crtc helpers >> > anymore. But the radone eyefinity (or whatever the damn thing is called) >> > I have lying around here fits the bill: It has 5 DP+ outputs but only 2 >> > dplls. So you can drive e.g. 3 DP screens and then switch to 2 HDMI >> > screens atomically and it should all pan out. >> > >> > But since your current helpers just loop over all crtcs without any >> > regard to ordering constraints this will fall over if the 2 HDMI outputs >> > get enabled before the 3 DP outputs get disabled all disabled. >> >> the driver should have rejected the config before it gets to commit >> stage, if it were an impossible config. > > The configuration _is_ possible. We simply have to be somewhat careful > with transitioning to it, since not _all_ intermediate states are > possible. Your current helpers presume that's the case, which afaik isn't > the case on any hw where we have global limits. For modesets. > > Nuclear pageflips are a completely different thing, as long as your hw has > a GO bit. > >> > So with all that in mind I have 3 sanity checks for the atomic interfaces >> > and helpers: >> > >> > 1. The atomic helpers should make enabling sane hw with a GO bit easy for >> > nuclear pageflips. Benchmark would be sane hw like msm or omap. >> > >> > 2. It should cooperate with the crtc helpers such that hw with some shared >> > resources (like dplls) works for atomic modesets. That pretty much means >> > "disable everything (including crtc/connetor pipelines that only changed >> > some parts) before enabling anything". Benchmark would be a platform with >> > shared dplls. >> >> btw, not something I'd thought about before, but shared dplls seem >> common enough, that it is something core could help with. (Assuming >> they are all related to pixel clk and not some derived thing that core >> wouldn't know about.) > > Yup, that's what I'm trying to say ;-) But it was just an example, I > believe that atm your helpers don't help for any shared modeset resources > at all. no, not at all (other than the ww_mutex stuff which should be useful for shared resources and more fine grained locking within driver). It wasn't really a goal. But having some knowledge about shared resources seems like it could make core helpers more useful when I get closer to exploiting the limits of the hw I have.. I suspect i915 is just further down that path than the other drivers. >> I think we do need to decide what partial state updates me in the >> context of modeset or pageflip. I kinda think the right thing is >> different for modeset vs pageflip. Maybe we want to define an atomic >> flag to mean "disable/discard everything else".. at any rate, we only >> need to sort that before exposing the API to userspace. > > Yeah, I still strongly support this split in the api itself. For i915 my > plan is to have separate configuration structures for modeset state and > pageflip/plane config state. When we do an atomic modeset we compute both > (maybe with some shortcut if we know that the pipe config didn't change at > all). Then comes the big switch: > > - If we have the same pipe config, we simply to a vblank synce nuclear > flip to the new config. > > - If pipe configs change it will be much more elaborate: > > 1. Do a vblank synced nuclear flip to black on all pipes that need to go > off (whether they get disabled or reconfigured doesn't matter for now). > > 2. Disable pipes. > > 3. Commit new state on the sw side. > > 4. Enable all pipes with the new config, but only scanning out black. > > 5. Do a vblank-synced nuclear flip to the new config. This would also be > the one that would signale the drm events that the atomic update > completed. > > For fastboot we might need some hacks to fuse this all together, e.g for > some panel fitter changes we don't need to disable the pipe completely. > But that's the advanced stuff really. > > I think modelling the crtc helpers after this model could work. But that > means that the crtc helpers and the nuclear flip atomic helpers for GO > bit capable hw need to be rather tightly integrated, while still allowing > drivers to override the nuclear flip parts. > >> > 3. The core->driver interface should be powerful enough to support >> > insanity like i915, but no more. Which means all the code that's share >> > (i.e. the set_prop code I've been harping all over the place about) should >> > be done in the core, without any means for drivers to override. Currently >> > the drm core also takes care of a bunch of things like basic locking and >> > refcounting, and that's kinda the spirit for this. i915 is the obvious >> > benchmark here. >> >> The more I think about it, the more I think we should leave set_prop >> as it is (although possibly with drm_atomic_state -> >> drm_{plane,crtc,etc}_state). >> >> In the past, before primary plane, I really needed this. And I expect >> having a convenient way to 'sniff' changing properties as they go by >> will be useful in some cases. > > I actually really like the addition of the state object to ->set_prop. At > least for i915 (which already has a fair pile of additional properties) > that looks like the perfect way to prep the stage. > > But at least for the transition we should keep the impact minimal. So no > manadatory callbacks and don't enforce the usage of the state object until > the drm core is fully converted to always follow a set_prop with a > ->commit. Since currently we have internal mode_set calls in all i915 > set_prop functions and we need to convert them all. But we can't do that > until all the core stuff uses the atomic interface for all legacy ioctl > ops. > fwiw, at least all set_prop ioctl stuff from core follows up with a atomic_commit now. There are one or two more places doing mode_set un-atomically. And I'm not sure if you call set_prop anywhere internally from i915. BR, -R
On Tue, May 27, 2014 at 02:48:41PM -0400, Rob Clark wrote: > On Tue, May 27, 2014 at 1:50 PM, Daniel Vetter <daniel@ffwll.ch> wrote: > > On Tue, May 27, 2014 at 11:58:33AM -0400, Rob Clark wrote: > >> On Mon, May 26, 2014 at 1:54 PM, Daniel Vetter <daniel@ffwll.ch> wrote: > >> > Ok, I think I should have read your msm implementation a _lot_ earlier. > >> > Explains your desing choices neatly. > >> > > >> > Two observations: > >> > > >> > - A GO bit makes nuclear pageflips ridiculously easy to implement, > >> > presuming the hardware actually works. And it's the sane model, so imo a > >> > good one to wrap the atomic helpers around. > >> > > >> > But reality is often a lot more ugly, especially if you're employed by > >> > Intel. Which is why I'm harping so much on this helpers-vs-core > >> > interface issues ... We really need the full state transition in one > >> > piece to do anything useful. > >> > > >> > - msm doesn't have any resource sharing going on for modeset operations > >> > (which I mean lighting up crtcs to feed pixel streams to connectors). > >> > Which means the simplistic "loop over all crtcs and call the old > >> > ->setcrtc" approach actually works. > >> > >> we do actually have some shared resources on mdp5 generation (the > >> "smp" blocks, basically a shared buffer which we need to allocate fifo > >> space from for each plane).. > >> > >> I'm mostly ignoring this at the moment, because we don't support > >> enough crtc's yet to run out of smp blocks. But hooking in at the > >> current ->atomic_commit() would be enough for me, I think. Tbh, it is > >> something that should be handled at the ->atomic_check() stage so I > >> hadn't given much though to ->atomic_commit() stage. > >> > >> That all said, I've no problem with adding one more hook, after > >> ->atomic_commit() and lock magic, before loop over planes/crtcs, so > >> drivers that need to can replace the loops and do their own thing. > >> Current state should be reasonably good for sane hw. I'm just waiting > >> for patches for i915 to add whatever it needs. > > > > I don't think that will be enough for you. Example, slightly hypothetical: > > > > - Configuration A: crtc 0 displays a small screen, crtc 1 a giant screen. > > Together they just barely fit into your fifo space. > > > > - Configuration B: crtc 0 drives a giant screen, crtc 1 a tiny one. > > Presume different outputs here so that no implicit output stealing > > happens upon mode switching. > > > > Atomic switch should work, but don't since if you just loop over crtcs you > > have the intermediate stage where you try to drive 2 giant screens, which > > will run out of fifo resources. And I think it's really common to have > > such implicit resource sharing, maybe except for the most beefy desktop > > gpu which simply can't run out of memory bw with today's screen. > > Well, this situation is a bit problematic in other similar cases.. > moving plane between crtc's which needs to wait on a vblank is another > vaguely similar case. I was kinda thinking of punting on that one > (ie. -EBUSY and userspace tries again on next frame). Maybe for > modeset that doesn't work out so well, since frame N+1 you'll still be > at configuration A and have the same problem. > > Would be kinda nice if helpers could order things according to what > decreases resource requirements vs what increases. Although we could > probably get a lot of mileage out of just making the 'atomic loop over > things helper' apply config for crtcs/planes which will be disabled > first, before the ones which will be enabled at the end of the commit. > Hmm. Yeah, that one should go a long way, but only for modeset changes. If we do this for plane updates it will look _really_ bad ;-) So for the "move plane from crtc to crtc" I think we need a separate step. Presuming we don't have any modeset operations we should be able to group all the plane updates per crtc. This ofc presumes a sane hw with GO bit. Then the helper could figure out which crtc it needs to nuclear flip first to be able to move the plane. At least with the current atomic ioctl proposal we can't reject this with -EBUSY since with a full modeset (which takes longer than one vblank anyway) it's possible. Otoh we _should_ reject it when userspace expects a vblank synced update. Which is another reason for why I think we really should have a flag somewhere for vblank synced atomic updates vs. atomic updates which take longer than one vblank and might even involved a few msec of blank screens for switching. > Either way, if you have to replace 'atomic loop over things helper' > with your own i915 specific thing, it shouldn't make much difference > to you. And I don't really think we need to solve all the worlds > problems in the first version. But seems like there could be some > value in making helpers a bit more aware of shared resource > constraints. This isn't about i915, but about all the drivers using crtc helpers. Correct ordering of the crtc helper hooks should pretty much solve this, without the need to track global resources (i.e. disable everything before we start enabling). At least for modeset-like atomic updates. i915 will roll their own, but not because atomic updates aren't possible with the crtc helpers, but because the crtc helpers are inadequate for our hw. For modeset updates atomic or not doesn't factor in here. And imo if we can make the crtc helpers work, we should do that. Otherwise there won't be a whole lot of use behind the atomic modeset updates imo. > > So afaics you really need to push a bit of smarts into the crtc helpers to > > make atomic possible, which then leaves you with interaction issues > > between the atomic stuff for GO bit capable hw for plane updates and the > > modeset ordering hell. > > > >> > The problem I see here is that if you have hardware with more elaborate > >> > needs (e.g. shared dplls), but otherwise sanity for plane updates (i.e. > >> > a GO bit) then your current atomic helpers will make it rather hard to > >> > mix this. So I think we should pimp the crtc helpers a bit to be atomic > >> > compliant (i.e. kill all outputs first before enabling anything new) and > >> > try to integrate this with the atomic helpers used for GO bit style > >> > updates. > >> > >> Not really, I don't think. You can still do whatever shared resource > >> setup in ->atomic_commit(). For drivers which want to defer commit > >> (ie. doing commit after fb's ready from cpu, rather than from gpu), it > >> would probably be more convenient to split up atomic_commit() so > >> drivers have a post lock hook. I think it is just a few line patch, > >> and I think that it probably isn't really needed until i915 is ready. > >> I'm pretty sure we can change this to do what i915 needs, while at the > >> same time keeping it simple for 'GO bit' drivers. > >> > >> The bit about crtc helpers, I'll have to think about. I'm not sure > >> that we want to require that 'atomic' (modeset or pageflip) completely > >> *replaces* current state. So disabling unrelated crtcs doesn't seem > >> like the right thing. But we have some time to decide about the > >> semantics of an atomic modeset before we expose to userspace. > > > > I'm not talking about replacing unrelated crtcs. It's also not about > > underspecified state or about enabling/changing global resources. > > > > It is _only_ about ordering of the various operations: If both the > > current and the desired new configuration are at the limit of what the hw > > can do you can't switch to the new configuration by looping over all > > crtcs. The fact that this doesn't work is the entire point of atomic > > modesets. And if we have helpers which aren't cut out for the task at hand > > for any hw where we need to have atomic modesets to make such changes > > possible without userspace going nuts, then the helpers are imo simply not > > useful > > > >> > i915 has dpll sharing on ivb/hsw, but doesn't use the the crtc helpers > >> > anymore. But the radone eyefinity (or whatever the damn thing is called) > >> > I have lying around here fits the bill: It has 5 DP+ outputs but only 2 > >> > dplls. So you can drive e.g. 3 DP screens and then switch to 2 HDMI > >> > screens atomically and it should all pan out. > >> > > >> > But since your current helpers just loop over all crtcs without any > >> > regard to ordering constraints this will fall over if the 2 HDMI outputs > >> > get enabled before the 3 DP outputs get disabled all disabled. > >> > >> the driver should have rejected the config before it gets to commit > >> stage, if it were an impossible config. > > > > The configuration _is_ possible. We simply have to be somewhat careful > > with transitioning to it, since not _all_ intermediate states are > > possible. Your current helpers presume that's the case, which afaik isn't > > the case on any hw where we have global limits. For modesets. > > > > Nuclear pageflips are a completely different thing, as long as your hw has > > a GO bit. > > > >> > So with all that in mind I have 3 sanity checks for the atomic interfaces > >> > and helpers: > >> > > >> > 1. The atomic helpers should make enabling sane hw with a GO bit easy for > >> > nuclear pageflips. Benchmark would be sane hw like msm or omap. > >> > > >> > 2. It should cooperate with the crtc helpers such that hw with some shared > >> > resources (like dplls) works for atomic modesets. That pretty much means > >> > "disable everything (including crtc/connetor pipelines that only changed > >> > some parts) before enabling anything". Benchmark would be a platform with > >> > shared dplls. > >> > >> btw, not something I'd thought about before, but shared dplls seem > >> common enough, that it is something core could help with. (Assuming > >> they are all related to pixel clk and not some derived thing that core > >> wouldn't know about.) > > > > Yup, that's what I'm trying to say ;-) But it was just an example, I > > believe that atm your helpers don't help for any shared modeset resources > > at all. > > no, not at all (other than the ww_mutex stuff which should be useful > for shared resources and more fine grained locking within driver). It > wasn't really a goal. > > But having some knowledge about shared resources seems like it could > make core helpers more useful when I get closer to exploiting the > limits of the hw I have.. I suspect i915 is just further down that > path than the other drivers. I'm repeating myself, but simply ordering updates correctly should already solve it. At least if the driver provides checks to make sure the new config doesn't go over limits (e.g. by counting plls or required fifo space). If we don't have that, the helpers are imo not sufficiently validated as generally useful. And I have seen _way_ too much single use code in the drm core from the old ums/dri1 days. > >> I think we do need to decide what partial state updates me in the > >> context of modeset or pageflip. I kinda think the right thing is > >> different for modeset vs pageflip. Maybe we want to define an atomic > >> flag to mean "disable/discard everything else".. at any rate, we only > >> need to sort that before exposing the API to userspace. > > > > Yeah, I still strongly support this split in the api itself. For i915 my > > plan is to have separate configuration structures for modeset state and > > pageflip/plane config state. When we do an atomic modeset we compute both > > (maybe with some shortcut if we know that the pipe config didn't change at > > all). Then comes the big switch: > > > > - If we have the same pipe config, we simply to a vblank synce nuclear > > flip to the new config. > > > > - If pipe configs change it will be much more elaborate: > > > > 1. Do a vblank synced nuclear flip to black on all pipes that need to go > > off (whether they get disabled or reconfigured doesn't matter for now). > > > > 2. Disable pipes. > > > > 3. Commit new state on the sw side. > > > > 4. Enable all pipes with the new config, but only scanning out black. > > > > 5. Do a vblank-synced nuclear flip to the new config. This would also be > > the one that would signale the drm events that the atomic update > > completed. > > > > For fastboot we might need some hacks to fuse this all together, e.g for > > some panel fitter changes we don't need to disable the pipe completely. > > But that's the advanced stuff really. > > > > I think modelling the crtc helpers after this model could work. But that > > means that the crtc helpers and the nuclear flip atomic helpers for GO > > bit capable hw need to be rather tightly integrated, while still allowing > > drivers to override the nuclear flip parts. > > > >> > 3. The core->driver interface should be powerful enough to support > >> > insanity like i915, but no more. Which means all the code that's share > >> > (i.e. the set_prop code I've been harping all over the place about) should > >> > be done in the core, without any means for drivers to override. Currently > >> > the drm core also takes care of a bunch of things like basic locking and > >> > refcounting, and that's kinda the spirit for this. i915 is the obvious > >> > benchmark here. > >> > >> The more I think about it, the more I think we should leave set_prop > >> as it is (although possibly with drm_atomic_state -> > >> drm_{plane,crtc,etc}_state). > >> > >> In the past, before primary plane, I really needed this. And I expect > >> having a convenient way to 'sniff' changing properties as they go by > >> will be useful in some cases. > > > > I actually really like the addition of the state object to ->set_prop. At > > least for i915 (which already has a fair pile of additional properties) > > that looks like the perfect way to prep the stage. > > > > But at least for the transition we should keep the impact minimal. So no > > manadatory callbacks and don't enforce the usage of the state object until > > the drm core is fully converted to always follow a set_prop with a > > ->commit. Since currently we have internal mode_set calls in all i915 > > set_prop functions and we need to convert them all. But we can't do that > > until all the core stuff uses the atomic interface for all legacy ioctl > > ops. > > > > fwiw, at least all set_prop ioctl stuff from core follows up with a > atomic_commit now. There are one or two more places doing mode_set > un-atomically. And I'm not sure if you call set_prop anywhere > internally from i915. We do modesets internally, but as long as those aren't externally visible (which they shouldn't be if we grab locks before checking the state) it should be all fine. Also from my pov the ->set_prop stuff is just interface to construct the in-kernel representation of the desired config. The real magic happens in the check/commit hooks (which is the same level i915-internal modeset changes happens at). I think one excellent use-case we get for free (almost) without the ioctl would be fbcon. It very much wants to do an atomic update, so converting that over to the atomic interface would be good imo. -Daniel
On Tue, May 27, 2014 at 3:26 PM, Daniel Vetter <daniel@ffwll.ch> wrote: > On Tue, May 27, 2014 at 02:48:41PM -0400, Rob Clark wrote: >> On Tue, May 27, 2014 at 1:50 PM, Daniel Vetter <daniel@ffwll.ch> wrote: >> > On Tue, May 27, 2014 at 11:58:33AM -0400, Rob Clark wrote: >> >> On Mon, May 26, 2014 at 1:54 PM, Daniel Vetter <daniel@ffwll.ch> wrote: >> >> > Ok, I think I should have read your msm implementation a _lot_ earlier. >> >> > Explains your desing choices neatly. >> >> > >> >> > Two observations: >> >> > >> >> > - A GO bit makes nuclear pageflips ridiculously easy to implement, >> >> > presuming the hardware actually works. And it's the sane model, so imo a >> >> > good one to wrap the atomic helpers around. >> >> > >> >> > But reality is often a lot more ugly, especially if you're employed by >> >> > Intel. Which is why I'm harping so much on this helpers-vs-core >> >> > interface issues ... We really need the full state transition in one >> >> > piece to do anything useful. >> >> > >> >> > - msm doesn't have any resource sharing going on for modeset operations >> >> > (which I mean lighting up crtcs to feed pixel streams to connectors). >> >> > Which means the simplistic "loop over all crtcs and call the old >> >> > ->setcrtc" approach actually works. >> >> >> >> we do actually have some shared resources on mdp5 generation (the >> >> "smp" blocks, basically a shared buffer which we need to allocate fifo >> >> space from for each plane).. >> >> >> >> I'm mostly ignoring this at the moment, because we don't support >> >> enough crtc's yet to run out of smp blocks. But hooking in at the >> >> current ->atomic_commit() would be enough for me, I think. Tbh, it is >> >> something that should be handled at the ->atomic_check() stage so I >> >> hadn't given much though to ->atomic_commit() stage. >> >> >> >> That all said, I've no problem with adding one more hook, after >> >> ->atomic_commit() and lock magic, before loop over planes/crtcs, so >> >> drivers that need to can replace the loops and do their own thing. >> >> Current state should be reasonably good for sane hw. I'm just waiting >> >> for patches for i915 to add whatever it needs. >> > >> > I don't think that will be enough for you. Example, slightly hypothetical: >> > >> > - Configuration A: crtc 0 displays a small screen, crtc 1 a giant screen. >> > Together they just barely fit into your fifo space. >> > >> > - Configuration B: crtc 0 drives a giant screen, crtc 1 a tiny one. >> > Presume different outputs here so that no implicit output stealing >> > happens upon mode switching. >> > >> > Atomic switch should work, but don't since if you just loop over crtcs you >> > have the intermediate stage where you try to drive 2 giant screens, which >> > will run out of fifo resources. And I think it's really common to have >> > such implicit resource sharing, maybe except for the most beefy desktop >> > gpu which simply can't run out of memory bw with today's screen. >> >> Well, this situation is a bit problematic in other similar cases.. >> moving plane between crtc's which needs to wait on a vblank is another >> vaguely similar case. I was kinda thinking of punting on that one >> (ie. -EBUSY and userspace tries again on next frame). Maybe for >> modeset that doesn't work out so well, since frame N+1 you'll still be >> at configuration A and have the same problem. >> >> Would be kinda nice if helpers could order things according to what >> decreases resource requirements vs what increases. Although we could >> probably get a lot of mileage out of just making the 'atomic loop over >> things helper' apply config for crtcs/planes which will be disabled >> first, before the ones which will be enabled at the end of the commit. >> Hmm. > > Yeah, that one should go a long way, but only for modeset changes. If we > do this for plane updates it will look _really_ bad ;-) > > So for the "move plane from crtc to crtc" I think we need a separate step. > Presuming we don't have any modeset operations we should be able to group > all the plane updates per crtc. This ofc presumes a sane hw with GO bit. > Then the helper could figure out which crtc it needs to nuclear flip first > to be able to move the plane. > > At least with the current atomic ioctl proposal we can't reject this with > -EBUSY since with a full modeset (which takes longer than one vblank > anyway) it's possible. Otoh we _should_ reject it when userspace expects a > vblank synced update. > > Which is another reason for why I think we really should have a flag > somewhere for vblank synced atomic updates vs. atomic updates which take > longer than one vblank and might even involved a few msec of blank screens > for switching. Well, there was the NONBLOCK atomic flag.. I'm not entirely sure if we should hang so much off of that one flag. >> Either way, if you have to replace 'atomic loop over things helper' >> with your own i915 specific thing, it shouldn't make much difference >> to you. And I don't really think we need to solve all the worlds >> problems in the first version. But seems like there could be some >> value in making helpers a bit more aware of shared resource >> constraints. > > This isn't about i915, but about all the drivers using crtc helpers. > Correct ordering of the crtc helper hooks should pretty much solve this, > without the need to track global resources (i.e. disable everything before > we start enabling). At least for modeset-like atomic updates. > > i915 will roll their own, but not because atomic updates aren't possible > with the crtc helpers, but because the crtc helpers are inadequate for our > hw. For modeset updates atomic or not doesn't factor in here. > > And imo if we can make the crtc helpers work, we should do that. Otherwise > there won't be a whole lot of use behind the atomic modeset updates imo. > >> > So afaics you really need to push a bit of smarts into the crtc helpers to >> > make atomic possible, which then leaves you with interaction issues >> > between the atomic stuff for GO bit capable hw for plane updates and the >> > modeset ordering hell. >> > >> >> > The problem I see here is that if you have hardware with more elaborate >> >> > needs (e.g. shared dplls), but otherwise sanity for plane updates (i.e. >> >> > a GO bit) then your current atomic helpers will make it rather hard to >> >> > mix this. So I think we should pimp the crtc helpers a bit to be atomic >> >> > compliant (i.e. kill all outputs first before enabling anything new) and >> >> > try to integrate this with the atomic helpers used for GO bit style >> >> > updates. >> >> >> >> Not really, I don't think. You can still do whatever shared resource >> >> setup in ->atomic_commit(). For drivers which want to defer commit >> >> (ie. doing commit after fb's ready from cpu, rather than from gpu), it >> >> would probably be more convenient to split up atomic_commit() so >> >> drivers have a post lock hook. I think it is just a few line patch, >> >> and I think that it probably isn't really needed until i915 is ready. >> >> I'm pretty sure we can change this to do what i915 needs, while at the >> >> same time keeping it simple for 'GO bit' drivers. >> >> >> >> The bit about crtc helpers, I'll have to think about. I'm not sure >> >> that we want to require that 'atomic' (modeset or pageflip) completely >> >> *replaces* current state. So disabling unrelated crtcs doesn't seem >> >> like the right thing. But we have some time to decide about the >> >> semantics of an atomic modeset before we expose to userspace. >> > >> > I'm not talking about replacing unrelated crtcs. It's also not about >> > underspecified state or about enabling/changing global resources. >> > >> > It is _only_ about ordering of the various operations: If both the >> > current and the desired new configuration are at the limit of what the hw >> > can do you can't switch to the new configuration by looping over all >> > crtcs. The fact that this doesn't work is the entire point of atomic >> > modesets. And if we have helpers which aren't cut out for the task at hand >> > for any hw where we need to have atomic modesets to make such changes >> > possible without userspace going nuts, then the helpers are imo simply not >> > useful >> > >> >> > i915 has dpll sharing on ivb/hsw, but doesn't use the the crtc helpers >> >> > anymore. But the radone eyefinity (or whatever the damn thing is called) >> >> > I have lying around here fits the bill: It has 5 DP+ outputs but only 2 >> >> > dplls. So you can drive e.g. 3 DP screens and then switch to 2 HDMI >> >> > screens atomically and it should all pan out. >> >> > >> >> > But since your current helpers just loop over all crtcs without any >> >> > regard to ordering constraints this will fall over if the 2 HDMI outputs >> >> > get enabled before the 3 DP outputs get disabled all disabled. >> >> >> >> the driver should have rejected the config before it gets to commit >> >> stage, if it were an impossible config. >> > >> > The configuration _is_ possible. We simply have to be somewhat careful >> > with transitioning to it, since not _all_ intermediate states are >> > possible. Your current helpers presume that's the case, which afaik isn't >> > the case on any hw where we have global limits. For modesets. >> > >> > Nuclear pageflips are a completely different thing, as long as your hw has >> > a GO bit. >> > >> >> > So with all that in mind I have 3 sanity checks for the atomic interfaces >> >> > and helpers: >> >> > >> >> > 1. The atomic helpers should make enabling sane hw with a GO bit easy for >> >> > nuclear pageflips. Benchmark would be sane hw like msm or omap. >> >> > >> >> > 2. It should cooperate with the crtc helpers such that hw with some shared >> >> > resources (like dplls) works for atomic modesets. That pretty much means >> >> > "disable everything (including crtc/connetor pipelines that only changed >> >> > some parts) before enabling anything". Benchmark would be a platform with >> >> > shared dplls. >> >> >> >> btw, not something I'd thought about before, but shared dplls seem >> >> common enough, that it is something core could help with. (Assuming >> >> they are all related to pixel clk and not some derived thing that core >> >> wouldn't know about.) >> > >> > Yup, that's what I'm trying to say ;-) But it was just an example, I >> > believe that atm your helpers don't help for any shared modeset resources >> > at all. >> >> no, not at all (other than the ww_mutex stuff which should be useful >> for shared resources and more fine grained locking within driver). It >> wasn't really a goal. >> >> But having some knowledge about shared resources seems like it could >> make core helpers more useful when I get closer to exploiting the >> limits of the hw I have.. I suspect i915 is just further down that >> path than the other drivers. > > I'm repeating myself, but simply ordering updates correctly should already > solve it. At least if the driver provides checks to make sure the new > config doesn't go over limits (e.g. by counting plls or required fifo > space). If we don't have that, the helpers are imo not sufficiently > validated as generally useful. And I have seen _way_ too much single use > code in the drm core from the old ums/dri1 days. > >> >> I think we do need to decide what partial state updates me in the >> >> context of modeset or pageflip. I kinda think the right thing is >> >> different for modeset vs pageflip. Maybe we want to define an atomic >> >> flag to mean "disable/discard everything else".. at any rate, we only >> >> need to sort that before exposing the API to userspace. >> > >> > Yeah, I still strongly support this split in the api itself. For i915 my >> > plan is to have separate configuration structures for modeset state and >> > pageflip/plane config state. When we do an atomic modeset we compute both >> > (maybe with some shortcut if we know that the pipe config didn't change at >> > all). Then comes the big switch: >> > >> > - If we have the same pipe config, we simply to a vblank synce nuclear >> > flip to the new config. >> > >> > - If pipe configs change it will be much more elaborate: >> > >> > 1. Do a vblank synced nuclear flip to black on all pipes that need to go >> > off (whether they get disabled or reconfigured doesn't matter for now). >> > >> > 2. Disable pipes. >> > >> > 3. Commit new state on the sw side. >> > >> > 4. Enable all pipes with the new config, but only scanning out black. >> > >> > 5. Do a vblank-synced nuclear flip to the new config. This would also be >> > the one that would signale the drm events that the atomic update >> > completed. >> > >> > For fastboot we might need some hacks to fuse this all together, e.g for >> > some panel fitter changes we don't need to disable the pipe completely. >> > But that's the advanced stuff really. >> > >> > I think modelling the crtc helpers after this model could work. But that >> > means that the crtc helpers and the nuclear flip atomic helpers for GO >> > bit capable hw need to be rather tightly integrated, while still allowing >> > drivers to override the nuclear flip parts. >> > >> >> > 3. The core->driver interface should be powerful enough to support >> >> > insanity like i915, but no more. Which means all the code that's share >> >> > (i.e. the set_prop code I've been harping all over the place about) should >> >> > be done in the core, without any means for drivers to override. Currently >> >> > the drm core also takes care of a bunch of things like basic locking and >> >> > refcounting, and that's kinda the spirit for this. i915 is the obvious >> >> > benchmark here. >> >> >> >> The more I think about it, the more I think we should leave set_prop >> >> as it is (although possibly with drm_atomic_state -> >> >> drm_{plane,crtc,etc}_state). >> >> >> >> In the past, before primary plane, I really needed this. And I expect >> >> having a convenient way to 'sniff' changing properties as they go by >> >> will be useful in some cases. >> > >> > I actually really like the addition of the state object to ->set_prop. At >> > least for i915 (which already has a fair pile of additional properties) >> > that looks like the perfect way to prep the stage. >> > >> > But at least for the transition we should keep the impact minimal. So no >> > manadatory callbacks and don't enforce the usage of the state object until >> > the drm core is fully converted to always follow a set_prop with a >> > ->commit. Since currently we have internal mode_set calls in all i915 >> > set_prop functions and we need to convert them all. But we can't do that >> > until all the core stuff uses the atomic interface for all legacy ioctl >> > ops. >> > >> >> fwiw, at least all set_prop ioctl stuff from core follows up with a >> atomic_commit now. There are one or two more places doing mode_set >> un-atomically. And I'm not sure if you call set_prop anywhere >> internally from i915. > > We do modesets internally, but as long as those aren't externally visible > (which they shouldn't be if we grab locks before checking the state) it > should be all fine. Also from my pov the ->set_prop stuff is just > interface to construct the in-kernel representation of the desired config. > The real magic happens in the check/commit hooks (which is the same level > i915-internal modeset changes happens at). > > I think one excellent use-case we get for free (almost) without the ioctl > would be fbcon. It very much wants to do an atomic update, so converting > that over to the atomic interface would be good imo. Yes, iirc the remaining non-atomic paths are fbcon related. In principle it should be a simple matter to increment the refcnt on fbcon state object and re-apply it. Although at the moment we keep track of *how* to apply the state (ie. page_flip vs set_config, etc) as the state object is built up.. which isn't very conducive to re-committing an existing state object. Which is part of the reason I wanted to deprecate the various existing ->page_flip/->update_plane/->set_config/etc and introduce per object ->commit()'s. (Which could either be called by helpers, or called internally by driver or completely ignored by driver) I've been a bit reluctant so far to do too much additional refactoring on top of atomic, since I'm about at the limit of what I have time to repeatedly rebase each kernel version. This is why I'm a bit anxious to start merging some of atomic, even if it doesn't do absolutely everything yet. BR, -R > -Daniel > -- > Daniel Vetter > Software Engineer, Intel Corporation > +41 (0) 79 365 57 48 - http://blog.ffwll.ch
On Tue, May 27, 2014 at 04:06:28PM -0400, Rob Clark wrote: > On Tue, May 27, 2014 at 3:26 PM, Daniel Vetter <daniel@ffwll.ch> wrote: [snip] > Well, there was the NONBLOCK atomic flag.. I'm not entirely sure if we > should hang so much off of that one flag. Yeah, a separate VBLANK_SYNCED might be useful. Apparently people also want non-blocking modesets. [snip] > > I think one excellent use-case we get for free (almost) without the ioctl > > would be fbcon. It very much wants to do an atomic update, so converting > > that over to the atomic interface would be good imo. > > Yes, iirc the remaining non-atomic paths are fbcon related. In > principle it should be a simple matter to increment the refcnt on > fbcon state object and re-apply it. Although at the moment we keep > track of *how* to apply the state (ie. page_flip vs set_config, etc) > as the state object is built up.. which isn't very conducive to > re-committing an existing state object. Which is part of the reason I > wanted to deprecate the various existing > ->page_flip/->update_plane/->set_config/etc and introduce per object > ->commit()'s. (Which could either be called by helpers, or called > internally by driver or completely ignored by driver) Yeah, I think the approach in here with a few helpers to bend atomic ->commit to the old hooks (somewhat-ish) is good. And with the crtc helpers we should be able to move most drivers away from the old hooks quickly. The exception will be pageflips/cursors, but that requires a lot of driver-specific work, and probably first a full conversion to universal planes (which atm don't support everything even for the primary plane due to the arbitrary restriction to rgbx8888). > I've been a bit reluctant so far to do too much additional refactoring > on top of atomic, since I'm about at the limit of what I have time to > repeatedly rebase each kernel version. This is why I'm a bit anxious > to start merging some of atomic, even if it doesn't do absolutely > everything yet. I understand that, which is why I suggested a bunch of things to split out already so we can get them in. On top of that I think with the split-up mode_config.mutex like I've just proposed in an RFC we have a clear path for the locking issues, too. So could go ahead an merge the w/w conversion, too. That leaves the set_prop refactoring which is still under discussion. Those three pieces hopefully help a lot with reducing rebasing pain. On top of that I think we should look at cutting away functional pieces of the conversion, e.g. ignore planes at first and only look at atomic modeset. Or ignore atomic modesets and only look at plane updates, rejecting anything that changes connectors or crtcs. By cutting out slices we should be able to get patches into shape step-by-step. -Daniel
On Tue, May 27, 2014 at 6:09 PM, Daniel Vetter <daniel@ffwll.ch> wrote: > On Tue, May 27, 2014 at 04:06:28PM -0400, Rob Clark wrote: >> On Tue, May 27, 2014 at 3:26 PM, Daniel Vetter <daniel@ffwll.ch> wrote: > > [snip] > >> Well, there was the NONBLOCK atomic flag.. I'm not entirely sure if we >> should hang so much off of that one flag. > > Yeah, a separate VBLANK_SYNCED might be useful. Apparently people also > want non-blocking modesets. random-diverging-from-original-topic-thought.. seems like userspace just wants a deadline/timeout (hopefully in units of vblanks).. at least to the level of "I want this to happen on the next vblank (after rendering completes), or give me an error" vs "I want this to complete atomically even if it takes a few extra vblanks for things to sort out".. I guess that amounts to what you mean by VBLANK_SYNCED flag, but VBLANKED_SYNCED might not be a good name.. at least for some hw all you can do is vblank sync'd.. BR, -R
On Tue, May 27, 2014 at 6:09 PM, Daniel Vetter <daniel@ffwll.ch> wrote: > On Tue, May 27, 2014 at 04:06:28PM -0400, Rob Clark wrote: >> On Tue, May 27, 2014 at 3:26 PM, Daniel Vetter <daniel@ffwll.ch> wrote: > > [snip] > >> Well, there was the NONBLOCK atomic flag.. I'm not entirely sure if we >> should hang so much off of that one flag. > > Yeah, a separate VBLANK_SYNCED might be useful. Apparently people also > want non-blocking modesets. > > [snip] > >> > I think one excellent use-case we get for free (almost) without the ioctl >> > would be fbcon. It very much wants to do an atomic update, so converting >> > that over to the atomic interface would be good imo. >> >> Yes, iirc the remaining non-atomic paths are fbcon related. In >> principle it should be a simple matter to increment the refcnt on >> fbcon state object and re-apply it. Although at the moment we keep >> track of *how* to apply the state (ie. page_flip vs set_config, etc) >> as the state object is built up.. which isn't very conducive to >> re-committing an existing state object. Which is part of the reason I >> wanted to deprecate the various existing >> ->page_flip/->update_plane/->set_config/etc and introduce per object >> ->commit()'s. (Which could either be called by helpers, or called >> internally by driver or completely ignored by driver) > > Yeah, I think the approach in here with a few helpers to bend atomic > ->commit to the old hooks (somewhat-ish) is good. And with the crtc > helpers we should be able to move most drivers away from the old hooks > quickly. The exception will be pageflips/cursors, but that requires a lot > of driver-specific work, and probably first a full conversion to universal > planes (which atm don't support everything even for the primary plane due > to the arbitrary restriction to rgbx8888). btw, I guess/hope most SoC drivers (ie. the ones that want to do crazy things with overlays) would be moving rapidly away from using primary plane helpers. I think native primary planes fits better most of the non-trivial mobile display controller blocks.. >> I've been a bit reluctant so far to do too much additional refactoring >> on top of atomic, since I'm about at the limit of what I have time to >> repeatedly rebase each kernel version. This is why I'm a bit anxious >> to start merging some of atomic, even if it doesn't do absolutely >> everything yet. > > I understand that, which is why I suggested a bunch of things to split out > already so we can get them in. > > On top of that I think with the split-up mode_config.mutex like I've just > proposed in an RFC we have a clear path for the locking issues, too. So > could go ahead an merge the w/w conversion, too. I suppose now that I split modeset acquire ctx out into it's own thing (originally it was inline w/ drm_atomic_state), I could reshuffle the patches and move ww_mutex stuff below atomic.. will take a bit of patch surgery since some parts of that patch would have to move to later patches, but I can sort it out > That leaves the set_prop refactoring which is still under discussion. > Those three pieces hopefully help a lot with reducing rebasing pain. > > On top of that I think we should look at cutting away functional pieces of > the conversion, e.g. ignore planes at first and only look at atomic > modeset. Or ignore atomic modesets and only look at plane updates, > rejecting anything that changes connectors or crtcs. By cutting out slices > we should be able to get patches into shape step-by-step. hmm, well, not sure how much value it is splitting up like that. I suppose the only-plane approach would sidestep/punt some issues. But once locking and set_prop are in place / agreed, it isn't that much more from a churn perspective. tbh, I think the worse problems are when we actually expose atomic ioctl to userspace. Right now we are rather restricted in the updates triggered via atomic in that there are fixed well defined entry points to atomic. Ie. only pageflip() sets _NONBLOCK flag, and it does not set mode property, so you won't ever see a _NONBLOCK modeset. BR, -R > -Daniel > -- > Daniel Vetter > Software Engineer, Intel Corporation > +41 (0) 79 365 57 48 - http://blog.ffwll.ch
On Tue, May 27, 2014 at 07:32:46PM -0400, Rob Clark wrote: > On Tue, May 27, 2014 at 6:09 PM, Daniel Vetter <daniel@ffwll.ch> wrote: > > On Tue, May 27, 2014 at 04:06:28PM -0400, Rob Clark wrote: > >> On Tue, May 27, 2014 at 3:26 PM, Daniel Vetter <daniel@ffwll.ch> wrote: > > > > [snip] > > > >> Well, there was the NONBLOCK atomic flag.. I'm not entirely sure if we > >> should hang so much off of that one flag. > > > > Yeah, a separate VBLANK_SYNCED might be useful. Apparently people also > > want non-blocking modesets. > > random-diverging-from-original-topic-thought.. seems like userspace > just wants a deadline/timeout (hopefully in units of vblanks).. at > least to the level of "I want this to happen on the next vblank (after > rendering completes), or give me an error" vs "I want this to complete > atomically even if it takes a few extra vblanks for things to sort > out".. > > I guess that amounts to what you mean by VBLANK_SYNCED flag, but > VBLANKED_SYNCED might not be a good name.. at least for some hw all > you can do is vblank sync'd.. Hm, we might as well go full monty and allow userspace to request a specific vblank counter. Would be useful for e.g. queuing up a bunch of video frames, which some hw can do. But that would then require cancelling of existing flips, so I guess for now a simple VBLANK_SYNCED flag which emulates pageflip behaviour should be good enough. That would also be useful if userspace attempts an atomic update on drivers which only support atomic modeset (through the crtc helpers), but can't do vblank synced updates. Then they could easily reject those. After all current drivers also often lack a pageflip implementation ... -Daniel
On Tue, May 27, 2014 at 07:47:42PM -0400, Rob Clark wrote: > On Tue, May 27, 2014 at 6:09 PM, Daniel Vetter <daniel@ffwll.ch> wrote: > > On Tue, May 27, 2014 at 04:06:28PM -0400, Rob Clark wrote: > >> On Tue, May 27, 2014 at 3:26 PM, Daniel Vetter <daniel@ffwll.ch> wrote: > > > > [snip] > > > >> Well, there was the NONBLOCK atomic flag.. I'm not entirely sure if we > >> should hang so much off of that one flag. > > > > Yeah, a separate VBLANK_SYNCED might be useful. Apparently people also > > want non-blocking modesets. > > > > [snip] > > > >> > I think one excellent use-case we get for free (almost) without the ioctl > >> > would be fbcon. It very much wants to do an atomic update, so converting > >> > that over to the atomic interface would be good imo. > >> > >> Yes, iirc the remaining non-atomic paths are fbcon related. In > >> principle it should be a simple matter to increment the refcnt on > >> fbcon state object and re-apply it. Although at the moment we keep > >> track of *how* to apply the state (ie. page_flip vs set_config, etc) > >> as the state object is built up.. which isn't very conducive to > >> re-committing an existing state object. Which is part of the reason I > >> wanted to deprecate the various existing > >> ->page_flip/->update_plane/->set_config/etc and introduce per object > >> ->commit()'s. (Which could either be called by helpers, or called > >> internally by driver or completely ignored by driver) > > > > Yeah, I think the approach in here with a few helpers to bend atomic > > ->commit to the old hooks (somewhat-ish) is good. And with the crtc > > helpers we should be able to move most drivers away from the old hooks > > quickly. The exception will be pageflips/cursors, but that requires a lot > > of driver-specific work, and probably first a full conversion to universal > > planes (which atm don't support everything even for the primary plane due > > to the arbitrary restriction to rgbx8888). > > btw, I guess/hope most SoC drivers (ie. the ones that want to do crazy > things with overlays) would be moving rapidly away from using primary > plane helpers. I think native primary planes fits better most of the > non-trivial mobile display controller blocks.. Fully agreed. See my comment about the VBLANK_SYNCED flag, I think without a driver implementation we should simply aggressively reject such updates. Same with disabling the primary plane, at least until we've reworked the interfaces a bit. > >> I've been a bit reluctant so far to do too much additional refactoring > >> on top of atomic, since I'm about at the limit of what I have time to > >> repeatedly rebase each kernel version. This is why I'm a bit anxious > >> to start merging some of atomic, even if it doesn't do absolutely > >> everything yet. > > > > I understand that, which is why I suggested a bunch of things to split out > > already so we can get them in. > > > > On top of that I think with the split-up mode_config.mutex like I've just > > proposed in an RFC we have a clear path for the locking issues, too. So > > could go ahead an merge the w/w conversion, too. > > I suppose now that I split modeset acquire ctx out into it's own thing > (originally it was inline w/ drm_atomic_state), I could reshuffle the > patches and move ww_mutex stuff below atomic.. will take a bit of > patch surgery since some parts of that patch would have to move to > later patches, but I can sort it out below = earlier in the series or later? I'll assume earlier since that what git log shows ;-) As mentioned on irc I think we could also throw per-plane locks into the mix to really validate the locking side of this. One thing we didn't discuss that much in the last few emails is accessing the obj->state data and other stuff which is autodetect (e.g. whether the sink is audio capable or not). I still think we need one additional plain mutex to protect that. It would sit between the mode_config.mutex and all the ww mutexes in the locking hierarchy. But we can look at that once the mode_config.mutex mess is untangled and clear. > > That leaves the set_prop refactoring which is still under discussion. > > Those three pieces hopefully help a lot with reducing rebasing pain. > > > > On top of that I think we should look at cutting away functional pieces of > > the conversion, e.g. ignore planes at first and only look at atomic > > modeset. Or ignore atomic modesets and only look at plane updates, > > rejecting anything that changes connectors or crtcs. By cutting out slices > > we should be able to get patches into shape step-by-step. > > hmm, well, not sure how much value it is splitting up like that. I > suppose the only-plane approach would sidestep/punt some issues. But > once locking and set_prop are in place / agreed, it isn't that much > more from a churn perspective. Hm, could be that it's not worth it, was just an idea. > tbh, I think the worse problems are when we actually expose atomic > ioctl to userspace. Right now we are rather restricted in the updates > triggered via atomic in that there are fixed well defined entry points > to atomic. Ie. only pageflip() sets _NONBLOCK flag, and it does not > set mode property, so you won't ever see a _NONBLOCK modeset. I guess we could lift limits step-by-step, e.g. reject any NONBLOCK updates if they change crtc/connector properties. And reject any NONBLOCK which isn't also vsynced. Wrt earlier testing fbcon should give us basic in-kernel validation, since it really wants to kill all the planes and everything and put a new config into place in one go. Another good one would be suspend/resume. It needs vt-switchless resume enabled (which is a per-driver knob and only i915 has it for now, but it's easy to do). Then we could suspend/resume a full X session with overlays, cursors and all and see whether the atomic engine can cope and restore it all. Once that's proven the ioctl shouldn't be that bad really any more. For i915 I'll demand lots of tests for corner-cases, but the easiest to implement workload might be the vt restore in the ddx. If we can do that with atomic you can fire up a bunch of ridiculous (and conflicting) X configs with cursors and overlays and the vt-switch between them like mad. That should give us good testing I hope. -Daniel
On Wed, May 28, 2014 at 03:21:41PM +0200, Daniel Vetter wrote: > On Tue, May 27, 2014 at 07:32:46PM -0400, Rob Clark wrote: > > On Tue, May 27, 2014 at 6:09 PM, Daniel Vetter <daniel@ffwll.ch> wrote: > > > On Tue, May 27, 2014 at 04:06:28PM -0400, Rob Clark wrote: > > >> On Tue, May 27, 2014 at 3:26 PM, Daniel Vetter <daniel@ffwll.ch> wrote: > > > > > > [snip] > > > > > >> Well, there was the NONBLOCK atomic flag.. I'm not entirely sure if we > > >> should hang so much off of that one flag. > > > > > > Yeah, a separate VBLANK_SYNCED might be useful. Apparently people also > > > want non-blocking modesets. > > > > random-diverging-from-original-topic-thought.. seems like userspace > > just wants a deadline/timeout (hopefully in units of vblanks).. at > > least to the level of "I want this to happen on the next vblank (after > > rendering completes), or give me an error" vs "I want this to complete > > atomically even if it takes a few extra vblanks for things to sort > > out".. > > > > I guess that amounts to what you mean by VBLANK_SYNCED flag, but > > VBLANKED_SYNCED might not be a good name.. at least for some hw all > > you can do is vblank sync'd.. > > Hm, we might as well go full monty and allow userspace to request a > specific vblank counter. Would be useful for e.g. queuing up a bunch of > video frames, which some hw can do. But that would then require cancelling > of existing flips, The hard part there would be rolling back the user visible state. But if we were to disallow it for anything but simple page flips, we could rather easily keep the fb pointers around in the structure that tracks the progress of the operation. Otherwise cancelling should be trivial when using mmio instead of the cs. But we could certainly do the target vblank thing without allowing multiple queued flips initially, and without a cancel capability. However using vblank counter is a bit problematic with multiple crtcs. But I guess userspace can try to do the something_else->vbl conversion itself if it wants to use some other units. There's also the question whether we should allow a target vbl count for each crtc, or just one of them. We could make it so that you can specify the target vblank count only for a single crtc, and the rest of the crtcs will flip soon before or soon after. The reason for allowing the "soon before" case is because it'll make the implementation much simpler. We just have to perform all the register writes somewhere between 'target_vbl-1 - target_vbl' of the specified crtc. The order in which the flips actually happen then depends on the vblank period and phase of each crtc. If user space wants target counts for all crtcs, it could issue separate nuclear flips for each crtc, although that does raise the issue that we can't check the entire target state, so we can't guarantee that all of the nuclear flips will succeed. So that's a bit bad. So I guess we could allow a target vbl count for all the crtcs, just need to convey that information inside another array in the ioctl to avoid imposing a limit in the number of crtcs. But then there's the question what happens if only a subset of the involved crtcs have a target count. Return an error? Or just pick one of the crtcs that did get a target vblank and flip the rest at the same time? I guess the latter option is better to allow one crtc to act as the master clock for the whole thing, and the rest just hop along as best they can.
On Wed, May 28, 2014 at 05:14:21PM +0300, Ville Syrjälä wrote: > On Wed, May 28, 2014 at 03:21:41PM +0200, Daniel Vetter wrote: > > On Tue, May 27, 2014 at 07:32:46PM -0400, Rob Clark wrote: > > > On Tue, May 27, 2014 at 6:09 PM, Daniel Vetter <daniel@ffwll.ch> wrote: > > > > On Tue, May 27, 2014 at 04:06:28PM -0400, Rob Clark wrote: > > > >> On Tue, May 27, 2014 at 3:26 PM, Daniel Vetter <daniel@ffwll.ch> wrote: > > > > > > > > [snip] > > > > > > > >> Well, there was the NONBLOCK atomic flag.. I'm not entirely sure if we > > > >> should hang so much off of that one flag. > > > > > > > > Yeah, a separate VBLANK_SYNCED might be useful. Apparently people also > > > > want non-blocking modesets. > > > > > > random-diverging-from-original-topic-thought.. seems like userspace > > > just wants a deadline/timeout (hopefully in units of vblanks).. at > > > least to the level of "I want this to happen on the next vblank (after > > > rendering completes), or give me an error" vs "I want this to complete > > > atomically even if it takes a few extra vblanks for things to sort > > > out".. > > > > > > I guess that amounts to what you mean by VBLANK_SYNCED flag, but > > > VBLANKED_SYNCED might not be a good name.. at least for some hw all > > > you can do is vblank sync'd.. > > > > Hm, we might as well go full monty and allow userspace to request a > > specific vblank counter. Would be useful for e.g. queuing up a bunch of > > video frames, which some hw can do. But that would then require cancelling > > of existing flips, > > The hard part there would be rolling back the user visible state. But > if we were to disallow it for anything but simple page flips, we could > rather easily keep the fb pointers around in the structure that tracks > the progress of the operation. > > Otherwise cancelling should be trivial when using mmio instead of the > cs. > > But we could certainly do the target vblank thing without allowing > multiple queued flips initially, and without a cancel capability. > > However using vblank counter is a bit problematic with multiple crtcs. > But I guess userspace can try to do the something_else->vbl conversion > itself if it wants to use some other units. > > There's also the question whether we should allow a target vbl count for > each crtc, or just one of them. > > We could make it so that you can specify the target vblank count only > for a single crtc, and the rest of the crtcs will flip soon before or > soon after. The reason for allowing the "soon before" case is because > it'll make the implementation much simpler. We just have to perform > all the register writes somewhere between 'target_vbl-1 - target_vbl' > of the specified crtc. The order in which the flips actually happen > then depends on the vblank period and phase of each crtc. > > If user space wants target counts for all crtcs, it could issue separate > nuclear flips for each crtc, although that does raise the issue that we > can't check the entire target state, so we can't guarantee that all of > the nuclear flips will succeed. So that's a bit bad. > > So I guess we could allow a target vbl count for all the crtcs, just > need to convey that information inside another array in the ioctl to > avoid imposing a limit in the number of crtcs. But then there's the > question what happens if only a subset of the involved crtcs have a > target count. Return an error? Or just pick one of the crtcs that > did get a target vblank and flip the rest at the same time? I guess > the latter option is better to allow one crtc to act as the master > clock for the whole thing, and the rest just hop along as best they > can. I guess if userspace asks for a target count on more than one crtc we'd reject the request. That leaves a bit a window unfortunately where userspace partially submitted a request and then the kernel starts to tell it that it's too late. But then I don't think we can really avoid this situation without going completely nuts on the implementation side. So a best effort (maybe with the guarantee that if we're too late it wont happen so that userspace could try to render the next frame and display that for smoothness) approach should be good enough here. Anyway, let's not get too distracted with fancy ideas before we have the basics ;-) Cheers, Daniel
On Wed, May 28, 2014 at 9:21 AM, Daniel Vetter <daniel@ffwll.ch> wrote: > On Tue, May 27, 2014 at 07:32:46PM -0400, Rob Clark wrote: >> On Tue, May 27, 2014 at 6:09 PM, Daniel Vetter <daniel@ffwll.ch> wrote: >> > On Tue, May 27, 2014 at 04:06:28PM -0400, Rob Clark wrote: >> >> On Tue, May 27, 2014 at 3:26 PM, Daniel Vetter <daniel@ffwll.ch> wrote: >> > >> > [snip] >> > >> >> Well, there was the NONBLOCK atomic flag.. I'm not entirely sure if we >> >> should hang so much off of that one flag. >> > >> > Yeah, a separate VBLANK_SYNCED might be useful. Apparently people also >> > want non-blocking modesets. >> >> random-diverging-from-original-topic-thought.. seems like userspace >> just wants a deadline/timeout (hopefully in units of vblanks).. at >> least to the level of "I want this to happen on the next vblank (after >> rendering completes), or give me an error" vs "I want this to complete >> atomically even if it takes a few extra vblanks for things to sort >> out".. >> >> I guess that amounts to what you mean by VBLANK_SYNCED flag, but >> VBLANKED_SYNCED might not be a good name.. at least for some hw all >> you can do is vblank sync'd.. > > Hm, we might as well go full monty and allow userspace to request a > specific vblank counter. Would be useful for e.g. queuing up a bunch of > video frames, which some hw can do. But that would then require cancelling > of existing flips, so I guess for now a simple VBLANK_SYNCED flag which > emulates pageflip behaviour should be good enough. The full on vblank counter and queue stuff up could be interesting.. not entirely sure how we'd manage ->atomic_check() against a not yet applied base state. I suppose it is just a matter of copying the previous-state values for new state objects from the not-yet applied state, rather than {plane,crtc,etc}->state.. And since state objects are refcnt'd I guess forming a linked list out of 'em would not be hard. Probably we should not allow queuing in the beginning. But does kinda seem like some magic is possible to handle this. > That would also be useful if userspace attempts an atomic update on > drivers which only support atomic modeset (through the crtc helpers), but > can't do vblank synced updates. Then they could easily reject those. After > all current drivers also often lack a pageflip implementation ... yeah.. but I guess this mainly the server chips and old stuff? I wonder how many drivers support multiple crtcs but not pageflip? BR, -R > -Daniel > -- > Daniel Vetter > Software Engineer, Intel Corporation > +41 (0) 79 365 57 48 - http://blog.ffwll.ch
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 5e1e6b0..dd12f56 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -27,6 +27,7 @@ msm-y := \ mdp/mdp5/mdp5_kms.o \ mdp/mdp5/mdp5_plane.o \ mdp/mdp5/mdp5_smp.o \ + msm_atomic.o \ msm_drv.o \ msm_fb.o \ msm_gem.o \ diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c index d0d8befd..2fa6b75 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c @@ -52,7 +52,6 @@ struct mdp4_crtc { /* if there is a pending flip, these will be non-null: */ struct drm_pending_vblank_event *event; - struct msm_fence_cb pageflip_cb; #define PENDING_CURSOR 0x1 #define PENDING_FLIP 0x2 @@ -93,12 +92,16 @@ static void request_pending(struct drm_crtc *crtc, uint32_t pending) mdp_irq_register(&get_kms(crtc)->base, &mdp4_crtc->vblank); } -static void crtc_flush(struct drm_crtc *crtc) +void mdp4_crtc_flush(struct drm_crtc *crtc) { + struct msm_drm_private *priv = crtc->dev->dev_private; struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); struct mdp4_kms *mdp4_kms = get_kms(crtc); uint32_t i, flush = 0; + if (priv->pending_crtcs & (1 << crtc->id)) + return; + for (i = 0; i < ARRAY_SIZE(mdp4_crtc->planes); i++) { struct drm_plane *plane = mdp4_crtc->planes[i]; if (plane) { @@ -142,7 +145,7 @@ static void update_scanout(struct drm_crtc *crtc, struct drm_framebuffer *fb) * so that we can safely queue unref to current fb (ie. next * vblank we know hw is done w/ previous scanout_fb). */ - crtc_flush(crtc); + mdp4_crtc_flush(crtc); if (mdp4_crtc->scanout_fb) drm_flip_work_queue(&mdp4_crtc->unref_fb_work, @@ -177,21 +180,6 @@ static void complete_flip(struct drm_crtc *crtc, struct drm_file *file) spin_unlock_irqrestore(&dev->event_lock, flags); } -static void pageflip_cb(struct msm_fence_cb *cb) -{ - struct mdp4_crtc *mdp4_crtc = - container_of(cb, struct mdp4_crtc, pageflip_cb); - struct drm_crtc *crtc = &mdp4_crtc->base; - struct drm_framebuffer *fb = crtc->primary->fb; - - if (!fb) - return; - - drm_framebuffer_reference(fb); - mdp4_plane_set_scanout(mdp4_crtc->plane, fb); - update_scanout(crtc, fb); -} - static void unref_fb_worker(struct drm_flip_work *work, void *val) { struct mdp4_crtc *mdp4_crtc = @@ -406,7 +394,7 @@ static void mdp4_crtc_prepare(struct drm_crtc *crtc) static void mdp4_crtc_commit(struct drm_crtc *crtc) { mdp4_crtc_dpms(crtc, DRM_MODE_DPMS_ON); - crtc_flush(crtc); + mdp4_crtc_flush(crtc); /* drop the ref to mdp clk's that we got in prepare: */ mdp4_disable(get_kms(crtc)); } @@ -448,23 +436,27 @@ static int mdp4_crtc_page_flip(struct drm_crtc *crtc, { struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); struct drm_device *dev = crtc->dev; - struct drm_gem_object *obj; unsigned long flags; + spin_lock_irqsave(&dev->event_lock, flags); if (mdp4_crtc->event) { dev_err(dev->dev, "already pending flip!\n"); + spin_unlock_irqrestore(&dev->event_lock, flags); return -EBUSY; } - obj = msm_framebuffer_bo(new_fb, 0); - - spin_lock_irqsave(&dev->event_lock, flags); mdp4_crtc->event = event; spin_unlock_irqrestore(&dev->event_lock, flags); update_fb(crtc, new_fb); - return msm_gem_queue_inactive_cb(obj, &mdp4_crtc->pageflip_cb); + + /* grab extra ref for update_scanout() */ + drm_framebuffer_reference(new_fb); + mdp4_plane_set_scanout(mdp4_crtc->plane, new_fb); + update_scanout(crtc, new_fb); + + return 0; } static int mdp4_crtc_set_property(struct drm_crtc *crtc, @@ -598,7 +590,7 @@ static int mdp4_crtc_cursor_move(struct drm_crtc *crtc, int x, int y) mdp4_crtc->cursor.y = y; spin_unlock_irqrestore(&mdp4_crtc->cursor.lock, flags); - crtc_flush(crtc); + mdp4_crtc_flush(crtc); request_pending(crtc, PENDING_CURSOR); return 0; @@ -635,8 +627,13 @@ static void mdp4_crtc_vblank_irq(struct mdp_irq *irq, uint32_t irqstatus) pending = atomic_xchg(&mdp4_crtc->pending, 0); if (pending & PENDING_FLIP) { - complete_flip(crtc, NULL); - drm_flip_work_commit(&mdp4_crtc->unref_fb_work, priv->wq); + if (priv->pending_crtcs & (1 << crtc->id)) { + /* our update hasn't been flushed yet: */ + request_pending(crtc, PENDING_FLIP); + } else { + complete_flip(crtc, NULL); + drm_flip_work_commit(&mdp4_crtc->unref_fb_work, priv->wq); + } } if (pending & PENDING_CURSOR) { @@ -650,7 +647,7 @@ static void mdp4_crtc_err_irq(struct mdp_irq *irq, uint32_t irqstatus) struct mdp4_crtc *mdp4_crtc = container_of(irq, struct mdp4_crtc, err); struct drm_crtc *crtc = &mdp4_crtc->base; DBG("%s: error: %08x", mdp4_crtc->name, irqstatus); - crtc_flush(crtc); + mdp4_crtc_flush(crtc); } uint32_t mdp4_crtc_vblank(struct drm_crtc *crtc) @@ -730,7 +727,7 @@ static void set_attach(struct drm_crtc *crtc, enum mdp4_pipe pipe_id, mdp4_crtc->planes[pipe_id] = plane; blend_setup(crtc); if (mdp4_crtc->enabled && (plane != mdp4_crtc->plane)) - crtc_flush(crtc); + mdp4_crtc_flush(crtc); } void mdp4_crtc_attach(struct drm_crtc *crtc, struct drm_plane *plane) @@ -792,8 +789,6 @@ struct drm_crtc *mdp4_crtc_init(struct drm_device *dev, ret = drm_flip_work_init(&mdp4_crtc->unref_cursor_work, 64, "unref cursor", unref_cursor_worker); - INIT_FENCE_CB(&mdp4_crtc->pageflip_cb, pageflip_cb); - drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp4_crtc_funcs); drm_crtc_helper_add(crtc, &mdp4_crtc_helper_funcs); diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c index 0bb4faa..af0bfb1 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c @@ -131,6 +131,11 @@ static long mdp4_round_pixclk(struct msm_kms *kms, unsigned long rate, return mdp4_dtv_round_pixclk(encoder, rate); } +static void mdp4_flush(struct msm_kms *kms, struct drm_crtc *crtc) +{ + mdp4_crtc_flush(crtc); +} + static void mdp4_preclose(struct msm_kms *kms, struct drm_file *file) { struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms)); @@ -162,6 +167,7 @@ static const struct mdp_kms_funcs kms_funcs = { .disable_vblank = mdp4_disable_vblank, .get_format = mdp_get_format, .round_pixclk = mdp4_round_pixclk, + .flush = mdp4_flush, .preclose = mdp4_preclose, .destroy = mdp4_destroy, }, diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h index 715520c5..834454c 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h @@ -184,6 +184,7 @@ enum mdp4_pipe mdp4_plane_pipe(struct drm_plane *plane); struct drm_plane *mdp4_plane_init(struct drm_device *dev, enum mdp4_pipe pipe_id, bool private_plane); +void mdp4_crtc_flush(struct drm_crtc *crtc); uint32_t mdp4_crtc_vblank(struct drm_crtc *crtc); void mdp4_crtc_cancel_pending_flip(struct drm_crtc *crtc, struct drm_file *file); void mdp4_crtc_set_config(struct drm_crtc *crtc, uint32_t config); diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c index 4c92985..f35848d 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c @@ -48,11 +48,6 @@ static int mdp4_plane_update(struct drm_plane *plane, mdp4_plane->enabled = true; - if (plane->fb) - drm_framebuffer_unreference(plane->fb); - - drm_framebuffer_reference(fb); - return mdp4_plane_mode_set(plane, crtc, fb, crtc_x, crtc_y, crtc_w, crtc_h, src_x, src_y, src_w, src_h); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c index 7f4ee99..0e74317 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c @@ -35,7 +35,6 @@ struct mdp5_crtc { /* if there is a pending flip, these will be non-null: */ struct drm_pending_vblank_event *event; - struct msm_fence_cb pageflip_cb; #define PENDING_CURSOR 0x1 #define PENDING_FLIP 0x2 @@ -73,13 +72,17 @@ static void request_pending(struct drm_crtc *crtc, uint32_t pending) mdp_irq_register(&get_kms(crtc)->base, &mdp5_crtc->vblank); } -static void crtc_flush(struct drm_crtc *crtc) +void mdp5_crtc_flush(struct drm_crtc *crtc) { + struct msm_drm_private *priv = crtc->dev->dev_private; struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc); struct mdp5_kms *mdp5_kms = get_kms(crtc); int id = mdp5_crtc->id; uint32_t i, flush = 0; + if (priv->pending_crtcs & (1 << crtc->id)) + return; + for (i = 0; i < ARRAY_SIZE(mdp5_crtc->planes); i++) { struct drm_plane *plane = mdp5_crtc->planes[i]; if (plane) { @@ -124,7 +127,7 @@ static void update_scanout(struct drm_crtc *crtc, struct drm_framebuffer *fb) * so that we can safely queue unref to current fb (ie. next * vblank we know hw is done w/ previous scanout_fb). */ - crtc_flush(crtc); + mdp5_crtc_flush(crtc); if (mdp5_crtc->scanout_fb) drm_flip_work_queue(&mdp5_crtc->unref_fb_work, @@ -165,21 +168,6 @@ static void complete_flip(struct drm_crtc *crtc, struct drm_file *file) } } -static void pageflip_cb(struct msm_fence_cb *cb) -{ - struct mdp5_crtc *mdp5_crtc = - container_of(cb, struct mdp5_crtc, pageflip_cb); - struct drm_crtc *crtc = &mdp5_crtc->base; - struct drm_framebuffer *fb = mdp5_crtc->fb; - - if (!fb) - return; - - drm_framebuffer_reference(fb); - mdp5_plane_set_scanout(mdp5_crtc->plane, fb); - update_scanout(crtc, fb); -} - static void unref_fb_worker(struct drm_flip_work *work, void *val) { struct mdp5_crtc *mdp5_crtc = @@ -324,7 +312,7 @@ static void mdp5_crtc_prepare(struct drm_crtc *crtc) static void mdp5_crtc_commit(struct drm_crtc *crtc) { mdp5_crtc_dpms(crtc, DRM_MODE_DPMS_ON); - crtc_flush(crtc); + mdp5_crtc_flush(crtc); /* drop the ref to mdp clk's that we got in prepare: */ mdp5_disable(get_kms(crtc)); } @@ -366,23 +354,26 @@ static int mdp5_crtc_page_flip(struct drm_crtc *crtc, { struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc); struct drm_device *dev = crtc->dev; - struct drm_gem_object *obj; unsigned long flags; + spin_lock_irqsave(&dev->event_lock, flags); if (mdp5_crtc->event) { dev_err(dev->dev, "already pending flip!\n"); + spin_unlock_irqrestore(&dev->event_lock, flags); return -EBUSY; } - obj = msm_framebuffer_bo(new_fb, 0); - - spin_lock_irqsave(&dev->event_lock, flags); mdp5_crtc->event = event; spin_unlock_irqrestore(&dev->event_lock, flags); update_fb(crtc, new_fb); - return msm_gem_queue_inactive_cb(obj, &mdp5_crtc->pageflip_cb); + /* grab extra ref for update_scanout() */ + drm_framebuffer_reference(new_fb); + mdp5_plane_set_scanout(mdp5_crtc->plane, new_fb); + update_scanout(crtc, new_fb); + + return 0; } static int mdp5_crtc_set_property(struct drm_crtc *crtc, @@ -424,8 +415,13 @@ static void mdp5_crtc_vblank_irq(struct mdp_irq *irq, uint32_t irqstatus) pending = atomic_xchg(&mdp5_crtc->pending, 0); if (pending & PENDING_FLIP) { - complete_flip(crtc, NULL); - drm_flip_work_commit(&mdp5_crtc->unref_fb_work, priv->wq); + if (priv->pending_crtcs & (1 << crtc->id)) { + /* our update hasn't been flushed yet: */ + request_pending(crtc, PENDING_FLIP); + } else { + complete_flip(crtc, NULL); + drm_flip_work_commit(&mdp5_crtc->unref_fb_work, priv->wq); + } } } @@ -434,7 +430,7 @@ static void mdp5_crtc_err_irq(struct mdp_irq *irq, uint32_t irqstatus) struct mdp5_crtc *mdp5_crtc = container_of(irq, struct mdp5_crtc, err); struct drm_crtc *crtc = &mdp5_crtc->base; DBG("%s: error: %08x", mdp5_crtc->name, irqstatus); - crtc_flush(crtc); + mdp5_crtc_flush(crtc); } uint32_t mdp5_crtc_vblank(struct drm_crtc *crtc) @@ -501,7 +497,7 @@ void mdp5_crtc_set_intf(struct drm_crtc *crtc, int intf, MDP5_CTL_OP_MODE(MODE_NONE) | MDP5_CTL_OP_INTF_NUM(intfnum[intf])); - crtc_flush(crtc); + mdp5_crtc_flush(crtc); } static void set_attach(struct drm_crtc *crtc, enum mdp5_pipe pipe_id, @@ -517,7 +513,7 @@ static void set_attach(struct drm_crtc *crtc, enum mdp5_pipe pipe_id, mdp5_crtc->planes[pipe_id] = plane; blend_setup(crtc); if (mdp5_crtc->enabled && (plane != mdp5_crtc->plane)) - crtc_flush(crtc); + mdp5_crtc_flush(crtc); } void mdp5_crtc_attach(struct drm_crtc *crtc, struct drm_plane *plane) @@ -563,8 +559,6 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev, if (ret) goto fail; - INIT_FENCE_CB(&mdp5_crtc->pageflip_cb, pageflip_cb); - drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp5_crtc_funcs); drm_crtc_helper_add(crtc, &mdp5_crtc_helper_funcs); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c index ee8446c..01c3d70 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c @@ -91,6 +91,11 @@ static long mdp5_round_pixclk(struct msm_kms *kms, unsigned long rate, return rate; } +static void mdp5_flush(struct msm_kms *kms, struct drm_crtc *crtc) +{ + mdp5_crtc_flush(crtc); +} + static void mdp5_preclose(struct msm_kms *kms, struct drm_file *file) { struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms)); @@ -118,6 +123,7 @@ static const struct mdp_kms_funcs kms_funcs = { .disable_vblank = mdp5_disable_vblank, .get_format = mdp_get_format, .round_pixclk = mdp5_round_pixclk, + .flush = mdp5_flush, .preclose = mdp5_preclose, .destroy = mdp5_destroy, }, diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h index c8b1a25..18b031b 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h @@ -197,8 +197,8 @@ enum mdp5_pipe mdp5_plane_pipe(struct drm_plane *plane); struct drm_plane *mdp5_plane_init(struct drm_device *dev, enum mdp5_pipe pipe, bool private_plane); +void mdp5_crtc_flush(struct drm_crtc *crtc); uint32_t mdp5_crtc_vblank(struct drm_crtc *crtc); - void mdp5_crtc_cancel_pending_flip(struct drm_crtc *crtc, struct drm_file *file); void mdp5_crtc_set_intf(struct drm_crtc *crtc, int intf, enum mdp5_intf intf_id); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c index 53cc8c6..f1bf3c2 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c @@ -48,11 +48,6 @@ static int mdp5_plane_update(struct drm_plane *plane, mdp5_plane->enabled = true; - if (plane->fb) - drm_framebuffer_unreference(plane->fb); - - drm_framebuffer_reference(fb); - return mdp5_plane_mode_set(plane, crtc, fb, crtc_x, crtc_y, crtc_w, crtc_h, src_x, src_y, src_w, src_h); diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c new file mode 100644 index 0000000..b231377 --- /dev/null +++ b/drivers/gpu/drm/msm/msm_atomic.c @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "msm_drv.h" +#include "msm_kms.h" +#include "msm_gem.h" + +struct msm_async_commit { + struct drm_atomic_state *state; + uint32_t fence; + struct msm_fence_cb fence_cb; +}; + +static void fence_cb(struct msm_fence_cb *cb); +static int commit_sync(struct drm_device *dev, void *state, bool unlocked); + +static struct msm_async_commit *new_commit(struct drm_atomic_state *state) +{ + struct msm_async_commit *c = kzalloc(sizeof(*c), GFP_KERNEL); + + if (!c) + return NULL; + + drm_atomic_state_reference(state); + c->state = state; + INIT_FENCE_CB(&c->fence_cb, fence_cb); + + return c; +} +static void free_commit(struct msm_async_commit *c) +{ + drm_atomic_state_unreference(c->state); + kfree(c); +} + +static void fence_cb(struct msm_fence_cb *cb) +{ + struct msm_async_commit *c = + container_of(cb, struct msm_async_commit, fence_cb); + commit_sync(c->state->dev, c->state, true); + free_commit(c); +} + +static void add_fb(struct msm_async_commit *c, struct drm_crtc *crtc, + struct drm_framebuffer *fb) +{ + struct drm_gem_object *obj = msm_framebuffer_bo(fb, 0); + c->fence = max(c->fence, msm_gem_fence(to_msm_bo(obj), MSM_PREP_READ)); +} + +static int wait_fb(struct drm_crtc *crtc, struct drm_framebuffer *fb) +{ + // XXX TODO wait.. + return 0; +} + +#define pending_fb(state) ((state) && (state)->fb && (state)->new_fb) + +static int commit_sync(struct drm_device *dev, void *state, bool unlocked) +{ + struct drm_atomic_state *a = state; + struct msm_drm_private *priv = dev->dev_private; + struct msm_kms *kms = priv->kms; + int ncrtcs = dev->mode_config.num_crtc; + uint32_t pending_crtcs = 0; + int i, ret; + + for (i = 0; i < ncrtcs; i++) + if (a->crtcs[i]) + pending_crtcs |= (1 << a->crtcs[i]->id); + + mutex_lock(&dev->struct_mutex); + WARN_ON(priv->pending_crtcs & pending_crtcs); + priv->pending_crtcs |= pending_crtcs; + mutex_unlock(&dev->struct_mutex); + + if (unlocked) + ret = drm_atomic_commit_unlocked(dev, state); + else + ret = drm_atomic_commit(dev, state); + + mutex_lock(&dev->struct_mutex); + priv->pending_crtcs &= ~pending_crtcs; + mutex_unlock(&dev->struct_mutex); + + if (ret) + return ret; + + for (i = 0; i < ncrtcs; i++) + if (a->crtcs[i]) + kms->funcs->flush(kms, a->crtcs[i]); + + return 0; +} + +int msm_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state) +{ + struct drm_atomic_state *a = state; + int nplanes = dev->mode_config.num_total_plane; + int i; + + if (a->flags & DRM_MODE_ATOMIC_NONBLOCK) { + /* non-block mode: defer commit until fb's are ready */ + struct msm_async_commit *c = new_commit(state); + + if (!c) + return -ENOMEM; + + for (i = 0; i < nplanes; i++) + if (pending_fb(a->pstates[i])) + add_fb(c, a->pstates[i]->crtc, a->pstates[i]->fb); + + return msm_queue_fence_cb(dev, &c->fence_cb, c->fence); + } else { + /* blocking mode: wait until fb's are ready */ + int ret = 0; + + for (i = 0; i < nplanes && !ret; i++) + if (pending_fb(a->pstates[i])) + ret = wait_fb(a->pstates[i]->crtc, a->pstates[i]->fb); + + if (ret) + return ret; + + return commit_sync(dev, state, false); + } +} diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index d24ca45..bcee218 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -600,6 +600,26 @@ int msm_wait_fence_interruptable(struct drm_device *dev, uint32_t fence, return ret; } +int msm_queue_fence_cb(struct drm_device *dev, + struct msm_fence_cb *cb, uint32_t fence) +{ + struct msm_drm_private *priv = dev->dev_private; + int ret = 0; + + mutex_lock(&dev->struct_mutex); + if (!list_empty(&cb->work.entry)) { + ret = -EINVAL; + } else if (fence > priv->completed_fence) { + cb->fence = fence; + list_add_tail(&cb->work.entry, &priv->fence_cbs); + } else { + queue_work(priv->wq, &cb->work); + } + mutex_unlock(&dev->struct_mutex); + + return ret; +} + /* called from workqueue */ void msm_update_fence(struct drm_device *dev, uint32_t fence) { @@ -815,7 +835,7 @@ static struct drm_driver msm_driver = { .atomic_begin = drm_atomic_begin, .atomic_set_event = drm_atomic_set_event, .atomic_check = drm_atomic_check, - .atomic_commit = drm_atomic_commit, + .atomic_commit = msm_atomic_commit, .atomic_end = drm_atomic_end, .atomic_funcs = &drm_atomic_funcs, #ifdef CONFIG_DEBUG_FS diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index afc990e..5e1e7da 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -110,6 +110,9 @@ struct msm_drm_private { unsigned int num_connectors; struct drm_connector *connectors[8]; + /* crtc's pending atomic update: */ + uint32_t pending_crtcs; + /* VRAM carveout, used when no IOMMU: */ struct { unsigned long size; @@ -143,11 +146,15 @@ int msm_register_mmu(struct drm_device *dev, struct msm_mmu *mmu); int msm_wait_fence_interruptable(struct drm_device *dev, uint32_t fence, struct timespec *timeout); +int msm_queue_fence_cb(struct drm_device *dev, + struct msm_fence_cb *cb, uint32_t fence); void msm_update_fence(struct drm_device *dev, uint32_t fence); int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct drm_file *file); +int msm_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state); + int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma); int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index bb8026d..90cb138 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -392,23 +392,11 @@ void *msm_gem_vaddr(struct drm_gem_object *obj) int msm_gem_queue_inactive_cb(struct drm_gem_object *obj, struct msm_fence_cb *cb) { - struct drm_device *dev = obj->dev; - struct msm_drm_private *priv = dev->dev_private; struct msm_gem_object *msm_obj = to_msm_bo(obj); - int ret = 0; + uint32_t fence = msm_gem_fence(msm_obj, + MSM_PREP_READ | MSM_PREP_WRITE); - mutex_lock(&dev->struct_mutex); - if (!list_empty(&cb->work.entry)) { - ret = -EINVAL; - } else if (is_active(msm_obj)) { - cb->fence = max(msm_obj->read_fence, msm_obj->write_fence); - list_add_tail(&cb->work.entry, &priv->fence_cbs); - } else { - queue_work(priv->wq, &cb->work); - } - mutex_unlock(&dev->struct_mutex); - - return ret; + return msm_queue_fence_cb(obj->dev, cb, fence); } void msm_gem_move_to_active(struct drm_gem_object *obj, @@ -447,12 +435,8 @@ int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, int ret = 0; if (is_active(msm_obj)) { - uint32_t fence = 0; + uint32_t fence = msm_gem_fence(msm_obj, op); - if (op & MSM_PREP_READ) - fence = msm_obj->write_fence; - if (op & MSM_PREP_WRITE) - fence = max(fence, msm_obj->read_fence); if (op & MSM_PREP_NOSYNC) timeout = NULL; diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 3246bb4..41a60cb 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -70,6 +70,19 @@ static inline bool is_active(struct msm_gem_object *msm_obj) return msm_obj->gpu != NULL; } +static inline uint32_t msm_gem_fence(struct msm_gem_object *msm_obj, + uint32_t op) +{ + uint32_t fence = 0; + + if (op & MSM_PREP_READ) + fence = msm_obj->write_fence; + if (op & MSM_PREP_WRITE) + fence = max(fence, msm_obj->read_fence); + + return fence; +} + #define MAX_CMDS 4 /* Created per submit-ioctl, to track bo's and cmdstream bufs, etc, diff --git a/drivers/gpu/drm/msm/msm_kms.h b/drivers/gpu/drm/msm/msm_kms.h index 0643774..91ddffc 100644 --- a/drivers/gpu/drm/msm/msm_kms.h +++ b/drivers/gpu/drm/msm/msm_kms.h @@ -42,6 +42,7 @@ struct msm_kms_funcs { const struct msm_format *(*get_format)(struct msm_kms *kms, uint32_t format); long (*round_pixclk)(struct msm_kms *kms, unsigned long rate, struct drm_encoder *encoder); + void (*flush)(struct msm_kms *kms, struct drm_crtc *crtc); /* cleanup: */ void (*preclose)(struct msm_kms *kms, struct drm_file *file); void (*destroy)(struct msm_kms *kms);
Signed-off-by: Rob Clark <robdclark@gmail.com> --- drivers/gpu/drm/msm/Makefile | 1 + drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c | 57 ++++++------ drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c | 6 ++ drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h | 1 + drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c | 5 -- drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c | 56 ++++++------ drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c | 6 ++ drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h | 2 +- drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c | 5 -- drivers/gpu/drm/msm/msm_atomic.c | 141 ++++++++++++++++++++++++++++++ drivers/gpu/drm/msm/msm_drv.c | 22 ++++- drivers/gpu/drm/msm/msm_drv.h | 7 ++ drivers/gpu/drm/msm/msm_gem.c | 24 +---- drivers/gpu/drm/msm/msm_gem.h | 13 +++ drivers/gpu/drm/msm/msm_kms.h | 1 + 15 files changed, 253 insertions(+), 94 deletions(-) create mode 100644 drivers/gpu/drm/msm/msm_atomic.c