Message ID | 20240417054032.3145721-1-airlied@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | nouveau: rip out busy fence waits | expand |
On 17/4/24 15:40, Dave Airlie wrote: > External email: Use caution opening links or attachments > > > From: Dave Airlie <airlied@redhat.com> > > I'm pretty sure this optimisation is actually not a great idea, > and is racy with other things waiting for fences. > > Just nuke it, there should be no need to do fence waits in a > busy CPU loop. > > Signed-off-by: Dave Airlie <airlied@redhat.com> Reviewed-by: Ben Skeggs <bskeggs@nvidia.com> > --- > drivers/gpu/drm/nouveau/nouveau_bo.c | 2 +- > drivers/gpu/drm/nouveau/nouveau_chan.c | 2 +- > drivers/gpu/drm/nouveau/nouveau_dmem.c | 2 +- > drivers/gpu/drm/nouveau/nouveau_fence.c | 30 +------------------------ > drivers/gpu/drm/nouveau/nouveau_fence.h | 2 +- > drivers/gpu/drm/nouveau/nouveau_gem.c | 2 +- > 6 files changed, 6 insertions(+), 34 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c > index 8a30f5a0525b..a4e8f625fce6 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_bo.c > +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c > @@ -902,7 +902,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, > * Without this the operation can timeout and we'll fallback to a > * software copy, which might take several minutes to finish. > */ > - nouveau_fence_wait(fence, false, false); > + nouveau_fence_wait(fence, false); > ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false, > new_reg); > nouveau_fence_unref(&fence); > diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c > index 7c97b2886807..66fca95c10c7 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_chan.c > +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c > @@ -72,7 +72,7 @@ nouveau_channel_idle(struct nouveau_channel *chan) > > ret = nouveau_fence_new(&fence, chan); > if (!ret) { > - ret = nouveau_fence_wait(fence, false, false); > + ret = nouveau_fence_wait(fence, false); > nouveau_fence_unref(&fence); > } > > diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c > index 12feecf71e75..033a09cd3c8f 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c > +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c > @@ -128,7 +128,7 @@ static void nouveau_dmem_page_free(struct page *page) > static void nouveau_dmem_fence_done(struct nouveau_fence **fence) > { > if (fence) { > - nouveau_fence_wait(*fence, true, false); > + nouveau_fence_wait(*fence, false); > nouveau_fence_unref(fence); > } else { > /* > diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c > index c3ea3cd933cd..8de941379324 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_fence.c > +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c > @@ -312,39 +312,11 @@ nouveau_fence_wait_legacy(struct dma_fence *f, bool intr, long wait) > return timeout - t; > } > > -static int > -nouveau_fence_wait_busy(struct nouveau_fence *fence, bool intr) > -{ > - int ret = 0; > - > - while (!nouveau_fence_done(fence)) { > - if (time_after_eq(jiffies, fence->timeout)) { > - ret = -EBUSY; > - break; > - } > - > - __set_current_state(intr ? > - TASK_INTERRUPTIBLE : > - TASK_UNINTERRUPTIBLE); > - > - if (intr && signal_pending(current)) { > - ret = -ERESTARTSYS; > - break; > - } > - } > - > - __set_current_state(TASK_RUNNING); > - return ret; > -} > - > int > -nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr) > +nouveau_fence_wait(struct nouveau_fence *fence, bool intr) > { > long ret; > > - if (!lazy) > - return nouveau_fence_wait_busy(fence, intr); > - > ret = dma_fence_wait_timeout(&fence->base, intr, 15 * HZ); > if (ret < 0) > return ret; > diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h > index bc13110bdfa4..88213014b675 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_fence.h > +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h > @@ -23,7 +23,7 @@ void nouveau_fence_unref(struct nouveau_fence **); > > int nouveau_fence_emit(struct nouveau_fence *); > bool nouveau_fence_done(struct nouveau_fence *); > -int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr); > +int nouveau_fence_wait(struct nouveau_fence *, bool intr); > int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr); > > struct nouveau_fence_chan { > diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c > index 49c2bcbef129..f715e381da69 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_gem.c > +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c > @@ -928,7 +928,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, > } > > if (sync) { > - if (!(ret = nouveau_fence_wait(fence, false, false))) { > + if (!(ret = nouveau_fence_wait(fence, false))) { > if ((ret = dma_fence_get_status(&fence->base)) == 1) > ret = 0; > } > -- > 2.43.2 >
On 4/17/24 07:40, Dave Airlie wrote: > From: Dave Airlie <airlied@redhat.com> > > I'm pretty sure this optimisation is actually not a great idea, > and is racy with other things waiting for fences. Yes, I tried to use it in the past on scheduler tear down, to have an indicator whether all jobs had the chance to finish. However, it happened that using a CPU busy loop saw the fence as signaled, while an (event based) dma_fence was still seen as unsignaled. > > Just nuke it, there should be no need to do fence waits in a > busy CPU loop. > > Signed-off-by: Dave Airlie <airlied@redhat.com> Applied to drm-misc-next. > --- > drivers/gpu/drm/nouveau/nouveau_bo.c | 2 +- > drivers/gpu/drm/nouveau/nouveau_chan.c | 2 +- > drivers/gpu/drm/nouveau/nouveau_dmem.c | 2 +- > drivers/gpu/drm/nouveau/nouveau_fence.c | 30 +------------------------ > drivers/gpu/drm/nouveau/nouveau_fence.h | 2 +- > drivers/gpu/drm/nouveau/nouveau_gem.c | 2 +- > 6 files changed, 6 insertions(+), 34 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c > index 8a30f5a0525b..a4e8f625fce6 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_bo.c > +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c > @@ -902,7 +902,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, > * Without this the operation can timeout and we'll fallback to a > * software copy, which might take several minutes to finish. > */ > - nouveau_fence_wait(fence, false, false); > + nouveau_fence_wait(fence, false); > ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false, > new_reg); > nouveau_fence_unref(&fence); > diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c > index 7c97b2886807..66fca95c10c7 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_chan.c > +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c > @@ -72,7 +72,7 @@ nouveau_channel_idle(struct nouveau_channel *chan) > > ret = nouveau_fence_new(&fence, chan); > if (!ret) { > - ret = nouveau_fence_wait(fence, false, false); > + ret = nouveau_fence_wait(fence, false); > nouveau_fence_unref(&fence); > } > > diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c > index 12feecf71e75..033a09cd3c8f 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c > +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c > @@ -128,7 +128,7 @@ static void nouveau_dmem_page_free(struct page *page) > static void nouveau_dmem_fence_done(struct nouveau_fence **fence) > { > if (fence) { > - nouveau_fence_wait(*fence, true, false); > + nouveau_fence_wait(*fence, false); > nouveau_fence_unref(fence); > } else { > /* > diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c > index c3ea3cd933cd..8de941379324 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_fence.c > +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c > @@ -312,39 +312,11 @@ nouveau_fence_wait_legacy(struct dma_fence *f, bool intr, long wait) > return timeout - t; > } > > -static int > -nouveau_fence_wait_busy(struct nouveau_fence *fence, bool intr) > -{ > - int ret = 0; > - > - while (!nouveau_fence_done(fence)) { > - if (time_after_eq(jiffies, fence->timeout)) { > - ret = -EBUSY; > - break; > - } > - > - __set_current_state(intr ? > - TASK_INTERRUPTIBLE : > - TASK_UNINTERRUPTIBLE); > - > - if (intr && signal_pending(current)) { > - ret = -ERESTARTSYS; > - break; > - } > - } > - > - __set_current_state(TASK_RUNNING); > - return ret; > -} > - > int > -nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr) > +nouveau_fence_wait(struct nouveau_fence *fence, bool intr) > { > long ret; > > - if (!lazy) > - return nouveau_fence_wait_busy(fence, intr); > - > ret = dma_fence_wait_timeout(&fence->base, intr, 15 * HZ); > if (ret < 0) > return ret; > diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h > index bc13110bdfa4..88213014b675 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_fence.h > +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h > @@ -23,7 +23,7 @@ void nouveau_fence_unref(struct nouveau_fence **); > > int nouveau_fence_emit(struct nouveau_fence *); > bool nouveau_fence_done(struct nouveau_fence *); > -int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr); > +int nouveau_fence_wait(struct nouveau_fence *, bool intr); > int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr); > > struct nouveau_fence_chan { > diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c > index 49c2bcbef129..f715e381da69 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_gem.c > +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c > @@ -928,7 +928,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, > } > > if (sync) { > - if (!(ret = nouveau_fence_wait(fence, false, false))) { > + if (!(ret = nouveau_fence_wait(fence, false))) { > if ((ret = dma_fence_get_status(&fence->base)) == 1) > ret = 0; > }
On Mon, Jun 17, 2024 at 05:11:34PM +0200, Danilo Krummrich wrote: > On 4/17/24 07:40, Dave Airlie wrote: > > From: Dave Airlie <airlied@redhat.com> > > > > I'm pretty sure this optimisation is actually not a great idea, > > and is racy with other things waiting for fences. > > Yes, I tried to use it in the past on scheduler tear down, to have an > indicator whether all jobs had the chance to finish. > > However, it happened that using a CPU busy loop saw the fence as signaled, > while an (event based) dma_fence was still seen as unsignaled. If the busy loop goes through dma_fence_is_signalled this kind of stuff shouldn't happen. But if you open-code it, it can. -Sima > > > > > Just nuke it, there should be no need to do fence waits in a > > busy CPU loop. > > > > Signed-off-by: Dave Airlie <airlied@redhat.com> > > Applied to drm-misc-next. > > > --- > > drivers/gpu/drm/nouveau/nouveau_bo.c | 2 +- > > drivers/gpu/drm/nouveau/nouveau_chan.c | 2 +- > > drivers/gpu/drm/nouveau/nouveau_dmem.c | 2 +- > > drivers/gpu/drm/nouveau/nouveau_fence.c | 30 +------------------------ > > drivers/gpu/drm/nouveau/nouveau_fence.h | 2 +- > > drivers/gpu/drm/nouveau/nouveau_gem.c | 2 +- > > 6 files changed, 6 insertions(+), 34 deletions(-) > > > > diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c > > index 8a30f5a0525b..a4e8f625fce6 100644 > > --- a/drivers/gpu/drm/nouveau/nouveau_bo.c > > +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c > > @@ -902,7 +902,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, > > * Without this the operation can timeout and we'll fallback to a > > * software copy, which might take several minutes to finish. > > */ > > - nouveau_fence_wait(fence, false, false); > > + nouveau_fence_wait(fence, false); > > ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false, > > new_reg); > > nouveau_fence_unref(&fence); > > diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c > > index 7c97b2886807..66fca95c10c7 100644 > > --- a/drivers/gpu/drm/nouveau/nouveau_chan.c > > +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c > > @@ -72,7 +72,7 @@ nouveau_channel_idle(struct nouveau_channel *chan) > > ret = nouveau_fence_new(&fence, chan); > > if (!ret) { > > - ret = nouveau_fence_wait(fence, false, false); > > + ret = nouveau_fence_wait(fence, false); > > nouveau_fence_unref(&fence); > > } > > diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c > > index 12feecf71e75..033a09cd3c8f 100644 > > --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c > > +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c > > @@ -128,7 +128,7 @@ static void nouveau_dmem_page_free(struct page *page) > > static void nouveau_dmem_fence_done(struct nouveau_fence **fence) > > { > > if (fence) { > > - nouveau_fence_wait(*fence, true, false); > > + nouveau_fence_wait(*fence, false); > > nouveau_fence_unref(fence); > > } else { > > /* > > diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c > > index c3ea3cd933cd..8de941379324 100644 > > --- a/drivers/gpu/drm/nouveau/nouveau_fence.c > > +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c > > @@ -312,39 +312,11 @@ nouveau_fence_wait_legacy(struct dma_fence *f, bool intr, long wait) > > return timeout - t; > > } > > -static int > > -nouveau_fence_wait_busy(struct nouveau_fence *fence, bool intr) > > -{ > > - int ret = 0; > > - > > - while (!nouveau_fence_done(fence)) { > > - if (time_after_eq(jiffies, fence->timeout)) { > > - ret = -EBUSY; > > - break; > > - } > > - > > - __set_current_state(intr ? > > - TASK_INTERRUPTIBLE : > > - TASK_UNINTERRUPTIBLE); > > - > > - if (intr && signal_pending(current)) { > > - ret = -ERESTARTSYS; > > - break; > > - } > > - } > > - > > - __set_current_state(TASK_RUNNING); > > - return ret; > > -} > > - > > int > > -nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr) > > +nouveau_fence_wait(struct nouveau_fence *fence, bool intr) > > { > > long ret; > > - if (!lazy) > > - return nouveau_fence_wait_busy(fence, intr); > > - > > ret = dma_fence_wait_timeout(&fence->base, intr, 15 * HZ); > > if (ret < 0) > > return ret; > > diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h > > index bc13110bdfa4..88213014b675 100644 > > --- a/drivers/gpu/drm/nouveau/nouveau_fence.h > > +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h > > @@ -23,7 +23,7 @@ void nouveau_fence_unref(struct nouveau_fence **); > > int nouveau_fence_emit(struct nouveau_fence *); > > bool nouveau_fence_done(struct nouveau_fence *); > > -int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr); > > +int nouveau_fence_wait(struct nouveau_fence *, bool intr); > > int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr); > > struct nouveau_fence_chan { > > diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c > > index 49c2bcbef129..f715e381da69 100644 > > --- a/drivers/gpu/drm/nouveau/nouveau_gem.c > > +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c > > @@ -928,7 +928,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, > > } > > if (sync) { > > - if (!(ret = nouveau_fence_wait(fence, false, false))) { > > + if (!(ret = nouveau_fence_wait(fence, false))) { > > if ((ret = dma_fence_get_status(&fence->base)) == 1) > > ret = 0; > > } >
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 8a30f5a0525b..a4e8f625fce6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -902,7 +902,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, * Without this the operation can timeout and we'll fallback to a * software copy, which might take several minutes to finish. */ - nouveau_fence_wait(fence, false, false); + nouveau_fence_wait(fence, false); ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false, new_reg); nouveau_fence_unref(&fence); diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index 7c97b2886807..66fca95c10c7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -72,7 +72,7 @@ nouveau_channel_idle(struct nouveau_channel *chan) ret = nouveau_fence_new(&fence, chan); if (!ret) { - ret = nouveau_fence_wait(fence, false, false); + ret = nouveau_fence_wait(fence, false); nouveau_fence_unref(&fence); } diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 12feecf71e75..033a09cd3c8f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -128,7 +128,7 @@ static void nouveau_dmem_page_free(struct page *page) static void nouveau_dmem_fence_done(struct nouveau_fence **fence) { if (fence) { - nouveau_fence_wait(*fence, true, false); + nouveau_fence_wait(*fence, false); nouveau_fence_unref(fence); } else { /* diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index c3ea3cd933cd..8de941379324 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -312,39 +312,11 @@ nouveau_fence_wait_legacy(struct dma_fence *f, bool intr, long wait) return timeout - t; } -static int -nouveau_fence_wait_busy(struct nouveau_fence *fence, bool intr) -{ - int ret = 0; - - while (!nouveau_fence_done(fence)) { - if (time_after_eq(jiffies, fence->timeout)) { - ret = -EBUSY; - break; - } - - __set_current_state(intr ? - TASK_INTERRUPTIBLE : - TASK_UNINTERRUPTIBLE); - - if (intr && signal_pending(current)) { - ret = -ERESTARTSYS; - break; - } - } - - __set_current_state(TASK_RUNNING); - return ret; -} - int -nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr) +nouveau_fence_wait(struct nouveau_fence *fence, bool intr) { long ret; - if (!lazy) - return nouveau_fence_wait_busy(fence, intr); - ret = dma_fence_wait_timeout(&fence->base, intr, 15 * HZ); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index bc13110bdfa4..88213014b675 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -23,7 +23,7 @@ void nouveau_fence_unref(struct nouveau_fence **); int nouveau_fence_emit(struct nouveau_fence *); bool nouveau_fence_done(struct nouveau_fence *); -int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr); +int nouveau_fence_wait(struct nouveau_fence *, bool intr); int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr); struct nouveau_fence_chan { diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 49c2bcbef129..f715e381da69 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -928,7 +928,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, } if (sync) { - if (!(ret = nouveau_fence_wait(fence, false, false))) { + if (!(ret = nouveau_fence_wait(fence, false))) { if ((ret = dma_fence_get_status(&fence->base)) == 1) ret = 0; }