Message ID | 1302945465-32115-19-git-send-email-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Sat, Apr 16, 2011 at 10:17:42AM +0100, Chris Wilson wrote: > As we can make use of the ability to insert semaphores to serialise > accessing buffers between ring elsewhere, separate out the function from > the execbuffer code and make it generic. Perhaps add a small note somewhere that move_to_ring now does the right thing when to == NULL (falling back to wait_rendering). I've hunted around a bit for that ... Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
On Sat, 16 Apr 2011 15:54:56 +0200, Daniel Vetter <daniel@ffwll.ch> wrote: > On Sat, Apr 16, 2011 at 10:17:42AM +0100, Chris Wilson wrote: > > As we can make use of the ability to insert semaphores to serialise > > accessing buffers between ring elsewhere, separate out the function from > > the execbuffer code and make it generic. > > Perhaps add a small note somewhere that move_to_ring now does the right > thing when to == NULL (falling back to wait_rendering). I've hunted around > a bit for that ... /** * Serialise an object between rings: wait for it to complete on the first * ring, before it can be used on the next. * * If the object is staying on the same ring, this is a no-op. * * If the object is not currently on a ring, this is a no-op. * * If the object is moving off a ring (i.e. to == NULL), then we wait for * rendering to complete entirely. * * The interesting case is when we move between two different rings. On * pre-SandyBridge hw, we have no choice but to wait until rendering has * finished. SandyBridge, however introduces the GPU semaphore which we * can use to cause one ring to wait upon the signal of another - avoiding * the CPU stall. * * We assume that the caller has emitted all required flushes. */ -Chris
On Sat, Apr 16, 2011 at 03:18:46PM +0100, Chris Wilson wrote: > On Sat, 16 Apr 2011 15:54:56 +0200, Daniel Vetter <daniel@ffwll.ch> wrote: > > On Sat, Apr 16, 2011 at 10:17:42AM +0100, Chris Wilson wrote: > > > As we can make use of the ability to insert semaphores to serialise > > > accessing buffers between ring elsewhere, separate out the function from > > > the execbuffer code and make it generic. > > > > Perhaps add a small note somewhere that move_to_ring now does the right > > thing when to == NULL (falling back to wait_rendering). I've hunted around > > a bit for that ... > > /** > * Serialise an object between rings: wait for it to complete on the first > * ring, before it can be used on the next. > * > * If the object is staying on the same ring, this is a no-op. > * > * If the object is not currently on a ring, this is a no-op. > * > * If the object is moving off a ring (i.e. to == NULL), then we wait for > * rendering to complete entirely. > * > * The interesting case is when we move between two different rings. On > * pre-SandyBridge hw, we have no choice but to wait until rendering has > * finished. SandyBridge, however introduces the GPU semaphore which we > * can use to cause one ring to wait upon the signal of another - avoiding > * the CPU stall. > * > * We assume that the caller has emitted all required flushes. > */ Perfect! -Daniel
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 76e111c..30fbf3b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -940,6 +940,7 @@ enum intel_chip_family { #define HAS_BSD(dev) (INTEL_INFO(dev)->has_bsd_ring) #define HAS_BLT(dev) (INTEL_INFO(dev)->has_blt_ring) #define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws) +#define HAS_GPU_SEMAPHORES(dev) (INTEL_INFO(dev)->gen >= 6 && i915_semaphores) #define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay) #define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical) @@ -1198,6 +1199,10 @@ void i915_gem_detach_phys_object(struct drm_device *dev, void i915_gem_free_all_phys_object(struct drm_device *dev); void i915_gem_release(struct drm_device *dev, struct drm_file *file); +int __must_check +i915_gem_object_move_to_ring(struct drm_i915_gem_object *obj, + struct intel_ring_buffer *to); + uint32_t i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 58e77d6..801496a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1658,6 +1658,46 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) obj->pages = NULL; } +int +i915_gem_object_move_to_ring(struct drm_i915_gem_object *obj, + struct intel_ring_buffer *to) +{ + struct intel_ring_buffer *from = obj->ring; + u32 seqno; + int ret, idx; + + if (from == NULL || to == from) + return 0; + + if (to == NULL || !HAS_GPU_SEMAPHORES(obj->base.dev)) + return i915_gem_object_wait_rendering(obj); + + idx = intel_ring_sync_index(from, to); + + seqno = obj->last_rendering_seqno; + if (seqno <= from->sync_seqno[idx]) + return 0; + + if (seqno == from->outstanding_lazy_request) { + struct drm_i915_gem_request *request; + + request = kzalloc(sizeof(*request), GFP_KERNEL); + if (request == NULL) + return -ENOMEM; + + ret = i915_add_request(from, NULL, request); + if (ret) { + kfree(request); + return ret; + } + + seqno = request->seqno; + } + + from->sync_seqno[idx] = seqno; + return intel_ring_sync(to, from, seqno - 1); +} + void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, struct intel_ring_buffer *ring, diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index b6f89f9..316603e 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -756,47 +756,6 @@ i915_gem_execbuffer_flush(struct drm_device *dev, } static int -i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj, - struct intel_ring_buffer *to) -{ - struct intel_ring_buffer *from = obj->ring; - u32 seqno; - int ret, idx; - - if (from == NULL || to == from) - return 0; - - /* XXX gpu semaphores are implicated in various hard hangs on SNB */ - if (INTEL_INFO(obj->base.dev)->gen < 6 || !i915_semaphores) - return i915_gem_object_wait_rendering(obj); - - idx = intel_ring_sync_index(from, to); - - seqno = obj->last_rendering_seqno; - if (seqno <= from->sync_seqno[idx]) - return 0; - - if (seqno == from->outstanding_lazy_request) { - struct drm_i915_gem_request *request; - - request = kzalloc(sizeof(*request), GFP_KERNEL); - if (request == NULL) - return -ENOMEM; - - ret = i915_add_request(from, NULL, request); - if (ret) { - kfree(request); - return ret; - } - - seqno = request->seqno; - } - - from->sync_seqno[idx] = seqno; - return intel_ring_sync(to, from, seqno - 1); -} - -static int i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips) { u32 plane, flip_mask; @@ -857,7 +816,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, } list_for_each_entry(obj, objects, exec_list) { - ret = i915_gem_execbuffer_sync_rings(obj, ring); + ret = i915_gem_object_move_to_ring(obj, ring); if (ret) return ret; }
As we can make use of the ability to insert semaphores to serialise accessing buffers between ring elsewhere, separate out the function from the execbuffer code and make it generic. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_drv.h | 5 +++ drivers/gpu/drm/i915/i915_gem.c | 40 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 43 +--------------------------- 3 files changed, 46 insertions(+), 42 deletions(-)