From patchwork Thu Sep 4 11:43:19 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maarten Lankhorst X-Patchwork-Id: 4844761 Return-Path: X-Original-To: patchwork-dri-devel@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork2.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.19.201]) by patchwork2.web.kernel.org (Postfix) with ESMTP id A7FD0C0338 for ; Thu, 4 Sep 2014 11:43:27 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id 9F11220265 for ; Thu, 4 Sep 2014 11:43:25 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) by mail.kernel.org (Postfix) with ESMTP id 57FD120220 for ; Thu, 4 Sep 2014 11:43:23 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 8A0656E72F; Thu, 4 Sep 2014 04:43:22 -0700 (PDT) X-Original-To: dri-devel@lists.freedesktop.org Delivered-To: dri-devel@lists.freedesktop.org Received: from youngberry.canonical.com (youngberry.canonical.com [91.189.89.112]) by gabe.freedesktop.org (Postfix) with ESMTP id 4FA206E690 for ; Thu, 4 Sep 2014 04:43:20 -0700 (PDT) Received: from 5ed49945.cm-7-5c.dynamic.ziggo.nl ([94.212.153.69] helo=[192.168.1.128]) by youngberry.canonical.com with esmtpsa (TLS1.0:DHE_RSA_AES_128_CBC_SHA1:16) (Exim 4.71) (envelope-from ) id 1XPVRf-0008Tp-Oe for dri-devel@lists.freedesktop.org; Thu, 04 Sep 2014 11:43:19 +0000 Message-ID: <54085057.6070904@canonical.com> Date: Thu, 04 Sep 2014 13:43:19 +0200 From: Maarten Lankhorst User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.0 MIME-Version: 1.0 To: "dri-devel@lists.freedesktop.org" Subject: [PATCH 7/7] drm/nouveau: allow asynchronous waiting using gart fences References: <54084F0E.9020500@canonical.com> In-Reply-To: <54084F0E.9020500@canonical.com> X-BeenThere: dri-devel@lists.freedesktop.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: Direct Rendering Infrastructure - Development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dri-devel-bounces@lists.freedesktop.org Sender: "dri-devel" X-Spam-Status: No, score=-5.8 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_MED, RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP This requires allocating a fence sooner to annotate any cross-dev fences, and making sure that enough memory is available before emitting the fence. The current seqno is written to the GART bo on completion, and a list of finished fences is kept to allow arbitrary depth. Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/nouveau/nouveau_bo.c | 28 ++-- drivers/gpu/drm/nouveau/nouveau_chan.c | 6 +- drivers/gpu/drm/nouveau/nouveau_display.c | 45 ++++--- drivers/gpu/drm/nouveau/nouveau_fence.c | 212 ++++++++++++++++++++++++++---- drivers/gpu/drm/nouveau/nouveau_fence.h | 29 ++-- drivers/gpu/drm/nouveau/nouveau_gem.c | 25 ++-- drivers/gpu/drm/nouveau/nv04_fence.c | 9 +- drivers/gpu/drm/nouveau/nv10_fence.c | 9 +- drivers/gpu/drm/nouveau/nv84_fence.c | 31 +++-- drivers/gpu/drm/nouveau/nvc0_fence.c | 4 +- 10 files changed, 305 insertions(+), 93 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index f89b4a7c93fe..24c941927926 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -970,21 +970,21 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr, } mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING); - ret = nouveau_fence_sync(nouveau_bo(bo), chan, true); - if (ret == 0) { + ret = nouveau_fence_new(chan, &fence); + if (ret) + goto out; + + ret = nouveau_fence_sync(nouveau_bo(bo), fence, true); + if (ret == 0) ret = drm->ttm.move(chan, bo, &bo->mem, new_mem); - if (ret == 0) { - ret = nouveau_fence_new(chan, false, &fence); - if (ret == 0) { - ret = ttm_bo_move_accel_cleanup(bo, - &fence->base, - evict, - no_wait_gpu, - new_mem); - nouveau_fence_unref(&fence); - } - } - } + if (ret == 0) + ret = nouveau_fence_emit(fence); + if (ret == 0) + ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, + no_wait_gpu, new_mem); + nouveau_fence_unref(&fence); + +out: mutex_unlock(&cli->mutex); return ret; } diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index d639750379d6..1e5c76dfed3a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -46,9 +46,11 @@ nouveau_channel_idle(struct nouveau_channel *chan) struct nouveau_fence *fence = NULL; int ret; - ret = nouveau_fence_new(chan, false, &fence); + ret = nouveau_fence_new(chan, &fence); if (!ret) { - ret = nouveau_fence_wait(fence, false, false); + ret = nouveau_fence_emit(fence); + if (!ret) + ret = nouveau_fence_wait(fence, false, false); nouveau_fence_unref(&fence); } diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index a9ec525c0994..adbf870686aa 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -26,6 +26,7 @@ #include #include +#include #include @@ -36,7 +37,6 @@ #include "nouveau_gem.h" #include "nouveau_connector.h" #include "nv50_display.h" - #include "nouveau_fence.h" #include @@ -644,7 +644,7 @@ nouveau_page_flip_emit(struct nouveau_channel *chan, struct nouveau_bo *old_bo, struct nouveau_bo *new_bo, struct nouveau_page_flip_state *s, - struct nouveau_fence **pfence) + struct nouveau_fence *fence) { struct nouveau_fence_chan *fctx = chan->fence; struct nouveau_drm *drm = chan->drm; @@ -657,11 +657,6 @@ nouveau_page_flip_emit(struct nouveau_channel *chan, list_add_tail(&s->head, &fctx->flip); spin_unlock_irqrestore(&dev->event_lock, flags); - /* Synchronize with the old framebuffer */ - ret = nouveau_fence_sync(old_bo, chan, false); - if (ret) - goto fail; - /* Emit the pageflip */ ret = RING_SPACE(chan, 2); if (ret) @@ -674,7 +669,7 @@ nouveau_page_flip_emit(struct nouveau_channel *chan, OUT_RING (chan, 0x00000000); FIRE_RING (chan); - ret = nouveau_fence_new(chan, false, pfence); + ret = nouveau_fence_emit(fence); if (ret) goto fail; @@ -700,6 +695,12 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct nouveau_cli *cli; struct nouveau_fence *fence; int ret; + struct ttm_validate_buffer resv[2] = { + { .bo = &old_bo->bo }, + { .bo = &new_bo->bo }, + }; + struct ww_acquire_ctx ticket; + LIST_HEAD(res); chan = drm->channel; if (!chan) @@ -714,28 +715,31 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, ret = nouveau_bo_pin(new_bo, TTM_PL_FLAG_VRAM); if (ret) goto fail_free; + list_add(&resv[1].head, &res); } + list_add(&resv[0].head, &res); mutex_lock(&cli->mutex); - ret = ttm_bo_reserve(&new_bo->bo, true, false, false, NULL); + ret = nouveau_fence_new(chan, &fence); if (ret) goto fail_unpin; - /* synchronise rendering channel with the kernel's channel */ - ret = nouveau_fence_sync(new_bo, chan, false); - if (ret) { - ttm_bo_unreserve(&new_bo->bo); + ret = ttm_eu_reserve_buffers(&ticket, &res, true); + if (ret) goto fail_unpin; - } if (new_bo != old_bo) { - ttm_bo_unreserve(&new_bo->bo); - - ret = ttm_bo_reserve(&old_bo->bo, true, false, false, NULL); + /* synchronise rendering channel with the kernel's channel */ + ret = nouveau_fence_sync(new_bo, fence, false); if (ret) - goto fail_unpin; + goto fail_unreserve; } + /* Synchronize with the old framebuffer */ + ret = nouveau_fence_sync(old_bo, fence, false); + if (ret) + goto fail_unreserve; + /* Initialize a page flip struct */ *s = (struct nouveau_page_flip_state) { { }, event, nouveau_crtc(crtc)->index, @@ -772,7 +776,7 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, nouveau_bo_ref(new_bo, &dispnv04->image[head]); } - ret = nouveau_page_flip_emit(chan, old_bo, new_bo, s, &fence); + ret = nouveau_page_flip_emit(chan, old_bo, new_bo, s, fence); if (ret) goto fail_unreserve; mutex_unlock(&cli->mutex); @@ -781,7 +785,7 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, crtc->primary->fb = fb; nouveau_bo_fence(old_bo, fence, false); - ttm_bo_unreserve(&old_bo->bo); + ttm_eu_backoff_reservation(&ticket, &res); if (old_bo != new_bo) nouveau_bo_unpin(old_bo); nouveau_fence_unref(&fence); @@ -792,6 +796,7 @@ fail_unreserve: ttm_bo_unreserve(&old_bo->bo); fail_unpin: mutex_unlock(&cli->mutex); + nouveau_fence_unref(&fence); if (old_bo != new_bo) nouveau_bo_unpin(new_bo); fail_free: diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 574517a396fd..b1a1f0bfbe5a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -143,6 +143,8 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha struct nouveau_fence_priv *priv = (void*)chan->drm->fence; int ret; + spin_lock_init(&fctx->trigger_lock); + INIT_LIST_HEAD(&fctx->triggers); INIT_LIST_HEAD(&fctx->flip); INIT_LIST_HEAD(&fctx->pending); spin_lock_init(&fctx->lock); @@ -218,33 +220,128 @@ err: func(data); } +static void nouveau_fence_cpu_triggered(struct nouveau_fence *fence) +{ + struct nouveau_channel *chan = fence->channel; + struct nouveau_fence_chan *fctx = chan->fence; + u32 any_seq = false, seq = ~0U; + + /* unblock fence, this function is called with irqs disabled */ + kfree(fence->waiters); + fence->waiters = NULL; + + spin_lock(&fctx->trigger_lock); + + /* + * signal all fences for which waiters == NULL until the + * first entry is found for which this is not true. + * + * This allows the wait >= seq op to work correctly on sysmem. + */ + while (!list_empty(&fctx->triggers)) { + struct nouveau_fence *chk = list_entry(fctx->triggers.next, + struct nouveau_fence, + trigger); + + if (chk->waiters) + break; + + any_seq = true; + seq = chk->base.seqno; + + list_del(&chk->trigger); + fence_put(&chk->base); + } + + if (any_seq) + fctx->signal_sysmem(chan, seq); + + spin_unlock(&fctx->trigger_lock); +} + +static void nouveau_fence_cpu_trigger(struct fence *other_fence, + struct fence_cb *fence_cb) +{ + struct nouveau_fence_cb *cb = (struct nouveau_fence_cb*)fence_cb; + struct nouveau_fence *fence = (struct nouveau_fence *)cb->fence; + +#ifdef CONFIG_FENCE_TRACE + int ret = atomic_dec_return(&fence->readers); + + if (ret) + FENCE_TRACE(&fence->base, "triggered from %u#%u, %i remaining\n", + ret, other_fence->context, other_fence->seqno); + else +#else + if (atomic_dec_and_test(&fence->readers)) +#endif + { + FENCE_TRACE(&fence->base, "triggered from %u#%u, starting work\n", + other_fence->context, other_fence->seqno); + + nouveau_fence_cpu_triggered(fence); + } +} + +static void +nouveau_fence_emit_waiters(struct nouveau_fence *fence, + struct nouveau_fence_chan *fctx) +{ + unsigned i, skipped = 0; + + atomic_set(&fence->readers, fence->num_waiters); + + /* add to triggers */ + fence_get(&fence->base); + spin_lock_irq(&fctx->trigger_lock); + list_add_tail(&fence->trigger, &fctx->triggers); + spin_unlock_irq(&fctx->trigger_lock); + + for (i = 0; i < fence->num_waiters; ++i) { + struct fence *other = fence->waiters[i].fence; + + if (other) { + fence->waiters[i].fence = &fence->base; + trace_fence_annotate_wait_on(&fence->base, other); + + FENCE_TRACE(&fence->base, "queued wait on %u#%u\n", + other->context, other->seqno); + + if (!fence_add_callback(other, &fence->waiters[i].base, + nouveau_fence_cpu_trigger)) + continue; + } + skipped++; + } + + if (skipped && atomic_sub_and_test(skipped, &fence->readers)) { + FENCE_TRACE(&fence->base, "No triggers, starting..\n"); + + nouveau_fence_cpu_triggered(fence); + } +} + int -nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan) +nouveau_fence_emit(struct nouveau_fence *fence) { + struct nouveau_channel *chan = fence->channel; struct nouveau_fence_chan *fctx = chan->fence; - struct nouveau_fence_priv *priv = (void*)chan->drm->fence; int ret; - fence->channel = chan; + WARN(fence->head.next, "fence is emitted twice!\n"); fence->timeout = jiffies + (15 * HZ); - if (priv->uevent) - fence_init(&fence->base, &nouveau_fence_ops_uevent, - &fctx->lock, - priv->context_base + chan->chid, ++fctx->sequence); - else - fence_init(&fence->base, &nouveau_fence_ops_legacy, - &fctx->lock, - priv->context_base + chan->chid, ++fctx->sequence); - trace_fence_emit(&fence->base); - ret = fctx->emit(fence); + ret = fctx->emit(fence, false); if (!ret) { fence_get(&fence->base); spin_lock_irq(&fctx->lock); nouveau_fence_update(chan, fctx); list_add_tail(&fence->head, &fctx->pending); spin_unlock_irq(&fctx->lock); + + if (fence->num_waiters) + nouveau_fence_emit_waiters(fence, fctx); } return ret; @@ -345,9 +442,58 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr) return 0; } +static int nouveau_fence_reserve_waiter(struct nouveau_fence *fence) +{ + int max = 8; + struct nouveau_fence_cb *waiters; + + if (fence->num_waiters + 1 <= fence->max_waiters) + return 0; + + if (fence->max_waiters) + max = fence->max_waiters * 2; + + waiters = krealloc(fence->waiters, max * sizeof(*waiters), GFP_KERNEL); + if (!waiters) + return -ENOMEM; + fence->waiters = waiters; + fence->max_waiters = max; + return 0; +} + +static int nouveau_fence_add_fence_list(struct nouveau_fence *fence, + struct fence *victim) +{ + struct nouveau_fence_cb *empty = NULL; + unsigned i; + int ret; + + for (i = 0; i < fence->num_waiters; ++i) { + struct fence *other = fence->waiters[i].fence; + + if (!other) + empty = &fence->waiters[i]; + else if (other->context == victim->context) { + fence->waiters[i].fence = fence_later(other, victim); + return 0; + } + } + + if (!empty) { + ret = nouveau_fence_reserve_waiter(fence); + if (ret) + return ret; + empty = &fence->waiters[fence->num_waiters++]; + } + + empty->fence = victim; + return 0; +} + int -nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool exclusive) +nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_fence *nvfence, bool exclusive) { + struct nouveau_channel *chan = nvfence->channel; struct nouveau_fence_chan *fctx = chan->fence; struct fence *fence; struct reservation_object *resv = nvbo->bo.resv; @@ -371,6 +517,8 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e f = nouveau_local_fence(fence, chan->drm); if (f) prev = f->channel; + else if (fctx->signal_sysmem) + return nouveau_fence_add_fence_list(nvfence, fence); if (!prev || (prev != chan && (ret = fctx->sync(f, prev, chan)))) ret = fence_wait(fence, true); @@ -390,6 +538,11 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e f = nouveau_local_fence(fence, chan->drm); if (f) prev = f->channel; + else if (fctx->signal_sysmem) { + ret = nouveau_fence_add_fence_list(nvfence, fence); + if (ret) + break; + } if (!prev || (ret = fctx->sync(f, prev, chan))) ret = fence_wait(fence, true); @@ -404,15 +557,22 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e void nouveau_fence_unref(struct nouveau_fence **pfence) { - if (*pfence) - fence_put(&(*pfence)->base); + struct nouveau_fence *fence = *pfence; + + if (!fence) + return; + *pfence = NULL; + fence_put(&fence->base); } int -nouveau_fence_new(struct nouveau_channel *chan, bool sysmem, +nouveau_fence_new(struct nouveau_channel *chan, struct nouveau_fence **pfence) { + struct nouveau_fifo_chan *fifo = (void*)chan->object; + struct nouveau_fence_priv *priv = (void*)chan->drm->fence; + struct nouveau_fence_chan *fctx = chan->fence; struct nouveau_fence *fence; int ret = 0; @@ -423,11 +583,11 @@ nouveau_fence_new(struct nouveau_channel *chan, bool sysmem, if (!fence) return -ENOMEM; - fence->sysmem = sysmem; + fence->channel = chan; - ret = nouveau_fence_emit(fence, chan); - if (ret) - nouveau_fence_unref(&fence); + fence_init(&fence->base, priv->uevent ? &nouveau_fence_ops_uevent : + &nouveau_fence_ops_legacy, &fctx->lock, + priv->context_base + fifo->chid, ++fctx->sequence); *pfence = fence; return ret; @@ -486,13 +646,21 @@ static bool nouveau_fence_no_signaling(struct fence *f) return true; } +static void nouveau_fence_release(struct fence *f) +{ + struct nouveau_fence *fence = from_fence(f); + + kfree(fence->waiters); + fence_free(&fence->base); +} + static const struct fence_ops nouveau_fence_ops_legacy = { .get_driver_name = nouveau_fence_get_get_driver_name, .get_timeline_name = nouveau_fence_get_timeline_name, .enable_signaling = nouveau_fence_no_signaling, .signaled = nouveau_fence_is_signaled, .wait = nouveau_fence_wait_legacy, - .release = NULL + .release = nouveau_fence_release }; static bool nouveau_fence_enable_signaling(struct fence *f) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 986c8135e564..f2a56c940a2c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -12,33 +12,41 @@ struct nouveau_fence { struct list_head head; - bool sysmem; - struct nouveau_channel *channel; unsigned long timeout; + + atomic_t readers; + struct list_head trigger; + struct nouveau_fence_cb { + struct fence_cb base; + struct fence *fence; + } *waiters; + int num_waiters, max_waiters; }; -int nouveau_fence_new(struct nouveau_channel *, bool sysmem, +int nouveau_fence_new(struct nouveau_channel *, struct nouveau_fence **); void nouveau_fence_unref(struct nouveau_fence **); -int nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *); +int nouveau_fence_emit(struct nouveau_fence *); bool nouveau_fence_done(struct nouveau_fence *); void nouveau_fence_work(struct fence *, void (*)(void *), void *); int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr); -int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive); +int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_fence *fence, bool exclusive); struct nouveau_fence_chan { spinlock_t lock; struct list_head pending; struct list_head flip; - int (*emit)(struct nouveau_fence *); + spinlock_t trigger_lock; + struct list_head triggers; + + int (*emit)(struct nouveau_fence *, bool); int (*sync)(struct nouveau_fence *, struct nouveau_channel *, struct nouveau_channel *); u32 (*read)(struct nouveau_channel *); - int (*emit32)(struct nouveau_channel *, u64, u32); - int (*sync32)(struct nouveau_channel *, u64, u32); + void (*signal_sysmem)(struct nouveau_channel *, u32 seq); u32 sequence; u32 context; @@ -67,7 +75,7 @@ void nouveau_fence_context_del(struct nouveau_fence_chan *); int nv04_fence_create(struct nouveau_drm *); int nv04_fence_mthd(struct nouveau_channel *, u32, u32, u32); -int nv10_fence_emit(struct nouveau_fence *); +int nv10_fence_emit(struct nouveau_fence *, bool sysmem); int nv17_fence_sync(struct nouveau_fence *, struct nouveau_channel *, struct nouveau_channel *); u32 nv10_fence_read(struct nouveau_channel *); @@ -86,6 +94,9 @@ int nouveau_flip_complete(void *chan); struct nv84_fence_chan { struct nouveau_fence_chan base; + int (*emit32)(struct nouveau_channel *, u64, u32); + int (*sync32)(struct nouveau_channel *, u64, u32); + struct nouveau_vma vma; struct nouveau_vma vma_gart; struct nouveau_vma dispc_vma[4]; diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 1bc4eb33b60f..e6f11a60c453 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -433,7 +433,7 @@ retry: static int validate_list(struct nouveau_channel *chan, struct nouveau_cli *cli, struct list_head *list, struct drm_nouveau_gem_pushbuf_bo *pbbo, - uint64_t user_pbbo_ptr) + uint64_t user_pbbo_ptr, struct nouveau_fence *fence) { struct nouveau_drm *drm = chan->drm; struct drm_nouveau_gem_pushbuf_bo __user *upbbo = @@ -459,7 +459,7 @@ validate_list(struct nouveau_channel *chan, struct nouveau_cli *cli, return ret; } - ret = nouveau_fence_sync(nvbo, chan, !!b->write_domains); + ret = nouveau_fence_sync(nvbo, fence, !!b->write_domains); if (unlikely(ret)) { if (ret != -ERESTARTSYS) NV_PRINTK(error, cli, "fail post-validate sync\n"); @@ -496,7 +496,8 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan, struct drm_file *file_priv, struct drm_nouveau_gem_pushbuf_bo *pbbo, uint64_t user_buffers, int nr_buffers, - struct validate_op *op, int *apply_relocs) + struct validate_op *op, int *apply_relocs, + struct nouveau_fence *fence) { struct nouveau_cli *cli = nouveau_cli(file_priv); int ret; @@ -513,7 +514,7 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan, return ret; } - ret = validate_list(chan, cli, &op->list, pbbo, user_buffers); + ret = validate_list(chan, cli, &op->list, pbbo, user_buffers, fence); if (unlikely(ret < 0)) { if (ret != -ERESTARTSYS) NV_PRINTK(error, cli, "validating bo list\n"); @@ -707,9 +708,14 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, } } + ret = nouveau_fence_new(chan, &fence); + if (ret) + goto out_prevalid; + /* Validate buffer list */ ret = nouveau_gem_pushbuf_validate(chan, file_priv, bo, req->buffers, - req->nr_buffers, &op, &do_reloc); + req->nr_buffers, &op, &do_reloc, + fence); if (ret) { if (ret != -ERESTARTSYS) NV_PRINTK(error, cli, "validate: %d\n", ret); @@ -793,18 +799,21 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, } } - ret = nouveau_fence_new(chan, false, &fence); + ret = nouveau_fence_emit(fence); if (ret) { NV_PRINTK(error, cli, "error fencing pushbuf: %d\n", ret); WIND_RING(chan); goto out; } -out: validate_fini(&op, fence, bo); - nouveau_fence_unref(&fence); + +out: + if (ret) + validate_fini(&op, NULL, bo); out_prevalid: + nouveau_fence_unref(&fence); u_free(bo); u_free(push); diff --git a/drivers/gpu/drm/nouveau/nv04_fence.c b/drivers/gpu/drm/nouveau/nv04_fence.c index 4484131d826a..de4d69166a37 100644 --- a/drivers/gpu/drm/nouveau/nv04_fence.c +++ b/drivers/gpu/drm/nouveau/nv04_fence.c @@ -35,10 +35,15 @@ struct nv04_fence_priv { }; static int -nv04_fence_emit(struct nouveau_fence *fence) +nv04_fence_emit(struct nouveau_fence *fence, bool sysmem) { struct nouveau_channel *chan = fence->channel; - int ret = RING_SPACE(chan, 2); + int ret; + + if (sysmem) + return -ENODEV; + + ret = RING_SPACE(chan, 2); if (ret == 0) { BEGIN_NV04(chan, NvSubSw, 0x0150, 1); OUT_RING (chan, fence->base.seqno); diff --git a/drivers/gpu/drm/nouveau/nv10_fence.c b/drivers/gpu/drm/nouveau/nv10_fence.c index 737d066ffc60..1608b0acfe0b 100644 --- a/drivers/gpu/drm/nouveau/nv10_fence.c +++ b/drivers/gpu/drm/nouveau/nv10_fence.c @@ -27,10 +27,15 @@ #include "nv10_fence.h" int -nv10_fence_emit(struct nouveau_fence *fence) +nv10_fence_emit(struct nouveau_fence *fence, bool sysmem) { struct nouveau_channel *chan = fence->channel; - int ret = RING_SPACE(chan, 2); + int ret; + + if (sysmem) + return -ENODEV; + + ret = RING_SPACE(chan, 2); if (ret == 0) { BEGIN_NV04(chan, 0, NV10_SUBCHAN_REF_CNT, 1); OUT_RING (chan, fence->base.seqno); diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c index 7b372a68aa4e..84fc0c3c5c9a 100644 --- a/drivers/gpu/drm/nouveau/nv84_fence.c +++ b/drivers/gpu/drm/nouveau/nv84_fence.c @@ -71,18 +71,18 @@ nv84_fence_sync32(struct nouveau_channel *chan, u64 virtual, u32 sequence) } static int -nv84_fence_emit(struct nouveau_fence *fence) +nv84_fence_emit(struct nouveau_fence *fence, bool sysmem) { struct nouveau_channel *chan = fence->channel; struct nv84_fence_chan *fctx = chan->fence; u64 addr = chan->chid * 16; - if (fence->sysmem) + if (sysmem) addr += fctx->vma_gart.offset; else addr += fctx->vma.offset; - return fctx->base.emit32(chan, addr, fence->base.seqno); + return fctx->emit32(chan, addr, fence->base.seqno); } static int @@ -92,12 +92,9 @@ nv84_fence_sync(struct nouveau_fence *fence, struct nv84_fence_chan *fctx = chan->fence; u64 addr = prev->chid * 16; - if (fence->sysmem) - addr += fctx->vma_gart.offset; - else - addr += fctx->vma.offset; + addr += fctx->vma.offset; - return fctx->base.sync32(chan, addr, fence->base.seqno); + return fctx->sync32(chan, addr, fence->base.seqno); } static u32 @@ -108,6 +105,15 @@ nv84_fence_read(struct nouveau_channel *chan) } static void +nv84_fence_signal_sysmem(struct nouveau_channel *chan, u32 seq) +{ + struct nouveau_fifo_chan *fifo = (void *)chan->object; + struct nv84_fence_priv *priv = chan->drm->fence; + + return nouveau_bo_wr32(priv->bo_gart, fifo->chid * 16/4, seq); +} + +static void nv84_fence_context_del(struct nouveau_channel *chan) { struct drm_device *dev = chan->drm->dev; @@ -140,12 +146,15 @@ nv84_fence_context_new(struct nouveau_channel *chan) return -ENOMEM; nouveau_fence_context_new(chan, &fctx->base); + fctx->base.emit = nv84_fence_emit; fctx->base.sync = nv84_fence_sync; fctx->base.read = nv84_fence_read; - fctx->base.emit32 = nv84_fence_emit32; - fctx->base.sync32 = nv84_fence_sync32; + fctx->base.signal_sysmem = nv84_fence_signal_sysmem; fctx->base.sequence = nv84_fence_read(chan); + nouveau_bo_wr32(priv->bo_gart, chan->chid * 16/4, fctx->base.sequence); + fctx->emit32 = nv84_fence_emit32; + fctx->sync32 = nv84_fence_sync32; ret = nouveau_bo_vma_add(priv->bo, cli->vm, &fctx->vma); if (ret == 0) { @@ -159,8 +168,6 @@ nv84_fence_context_new(struct nouveau_channel *chan) ret = nouveau_bo_vma_add(bo, cli->vm, &fctx->dispc_vma[i]); } - nouveau_bo_wr32(priv->bo, chan->chid * 16/4, 0x00000000); - if (ret) nv84_fence_context_del(chan); return ret; diff --git a/drivers/gpu/drm/nouveau/nvc0_fence.c b/drivers/gpu/drm/nouveau/nvc0_fence.c index becf19abda2d..612689a5e35a 100644 --- a/drivers/gpu/drm/nouveau/nvc0_fence.c +++ b/drivers/gpu/drm/nouveau/nvc0_fence.c @@ -66,8 +66,8 @@ nvc0_fence_context_new(struct nouveau_channel *chan) int ret = nv84_fence_context_new(chan); if (ret == 0) { struct nv84_fence_chan *fctx = chan->fence; - fctx->base.emit32 = nvc0_fence_emit32; - fctx->base.sync32 = nvc0_fence_sync32; + fctx->emit32 = nvc0_fence_emit32; + fctx->sync32 = nvc0_fence_sync32; } return ret; }