Message ID | 1419000065-7533-1-git-send-email-John.C.Harrison@Intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Doh! The subject was meant to be RFC not PATCH. On 19/12/2014 14:41, John.C.Harrison@Intel.com wrote: > From: John Harrison <John.C.Harrison@Intel.com> > > The outstanding lazy request mechanism does not really work well with > a GPU scheduler. The scheduler expects each work packet, i.e. request > structure, to be a complete entity and to belong to one and only one > submitter. Whereas the whole lazy mechanism allows lots of work from > lots of different places to all be lumped together into a single > request. It also means that work is floating around in the system > unowned and untracked at various random points in time. This all > causes headaches for the scheduler. > > This patch removes the need for the outstanding lazy request. It > converts all functions which would otherwise be relying on the OLR to > explicitly manage the request. Either by allocating, passing and > submitting the request if they are the top level owner. Or by simply > taking a request in as a parameter rather than pulling it out of the > magic global variable if they are a client. The OLR itself is left in > along with a bunch of sanity check asserts that it matches the request > being passed in as a parameter. However, it should now be safe to > remove completely. > > Note that this patch is not intended as a final, shipping, isn't it > gorgeous, end product. It is merely a quick hack that I went through > as being the simplest way to actually work out what the real sequence > of events and the real ownership of work is in certain circumstances. > Most particularly to do with display and overlay work. However, I > would like to get agreement that it is a good direction to go in and > that removing the OLR would be a good thing. Or, to put it another > way, is it worth me trying to break this patch into a set of > manageable items or do I just abandon it and give up? > > Note also that the patch is based on a tree including the scheduler > prep-work patches posted earlier. So it will not apply to a clean > nightly tree. > > Signed-off-by: John Harrison <John.C.Harrison@Intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 29 ++-- > drivers/gpu/drm/i915/i915_gem.c | 182 ++++++++++++-------- > drivers/gpu/drm/i915/i915_gem_context.c | 69 +++----- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 62 +++---- > drivers/gpu/drm/i915/i915_gem_gtt.c | 64 ++++---- > drivers/gpu/drm/i915/i915_gem_gtt.h | 3 +- > drivers/gpu/drm/i915/i915_gem_render_state.c | 10 +- > drivers/gpu/drm/i915/i915_gem_render_state.h | 2 +- > drivers/gpu/drm/i915/intel_display.c | 68 ++++---- > drivers/gpu/drm/i915/intel_lrc.c | 145 +++++++++------- > drivers/gpu/drm/i915/intel_lrc.h | 8 +- > drivers/gpu/drm/i915/intel_overlay.c | 58 ++++--- > drivers/gpu/drm/i915/intel_pm.c | 33 ++-- > drivers/gpu/drm/i915/intel_ringbuffer.c | 228 ++++++++++++++------------ > drivers/gpu/drm/i915/intel_ringbuffer.h | 38 ++--- > 15 files changed, 553 insertions(+), 446 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 511f55f..7b4309e 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -513,7 +513,7 @@ struct drm_i915_display_funcs { > int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc, > struct drm_framebuffer *fb, > struct drm_i915_gem_object *obj, > - struct intel_engine_cs *ring, > + struct drm_i915_gem_request *req, > uint32_t flags); > void (*update_primary_plane)(struct drm_crtc *crtc, > struct drm_framebuffer *fb, > @@ -1796,7 +1796,8 @@ struct drm_i915_private { > /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ > struct { > int (*alloc_request)(struct intel_engine_cs *ring, > - struct intel_context *ctx); > + struct intel_context *ctx, > + struct drm_i915_gem_request **req_out); > int (*do_execbuf)(struct i915_execbuffer_params *params, > struct drm_i915_gem_execbuffer2 *args, > struct list_head *vmas); > @@ -2511,10 +2512,10 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, > int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_priv); > void i915_gem_execbuffer_move_to_active(struct list_head *vmas, > - struct intel_engine_cs *ring); > + struct drm_i915_gem_request *req); > void i915_gem_execbuffer_retire_commands(struct drm_device *dev, > struct drm_file *file, > - struct intel_engine_cs *ring, > + struct drm_i915_gem_request *req, > struct drm_i915_gem_object *obj); > void i915_gem_execbuff_release_batch_obj(struct drm_i915_gem_object *batch_obj); > int i915_gem_ringbuffer_submission(struct i915_execbuffer_params *qe, > @@ -2609,9 +2610,9 @@ int __must_check __i915_mutex_lock_interruptible(struct drm_device *dev, const c > int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); > #endif > int i915_gem_object_sync(struct drm_i915_gem_object *obj, > - struct intel_engine_cs *to, bool add_request); > + struct drm_i915_gem_request *to_req); > void i915_vma_move_to_active(struct i915_vma *vma, > - struct intel_engine_cs *ring); > + struct drm_i915_gem_request *req); > int i915_gem_dumb_create(struct drm_file *file_priv, > struct drm_device *dev, > struct drm_mode_create_dumb *args); > @@ -2678,19 +2679,19 @@ int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); > int __must_check i915_gem_init(struct drm_device *dev); > int i915_gem_init_rings(struct drm_device *dev); > int __must_check i915_gem_init_hw(struct drm_device *dev); > -int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice); > +int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice); > void i915_gem_init_swizzling(struct drm_device *dev); > void i915_gem_cleanup_ringbuffer(struct drm_device *dev); > int __must_check i915_gpu_idle(struct drm_device *dev); > int __must_check i915_gem_suspend(struct drm_device *dev); > -int __i915_add_request(struct intel_engine_cs *ring, > +int __i915_add_request(struct drm_i915_gem_request *req, > struct drm_file *file, > struct drm_i915_gem_object *batch_obj, > bool flush_caches); > -#define i915_add_request(ring) \ > - __i915_add_request(ring, NULL, NULL, true) > -#define i915_add_request_no_flush(ring) \ > - __i915_add_request(ring, NULL, NULL, false) > +#define i915_add_request(req) \ > + __i915_add_request(req, NULL, NULL, true) > +#define i915_add_request_no_flush(req) \ > + __i915_add_request(req, NULL, NULL, false) > int __i915_wait_request(struct drm_i915_gem_request *req, > unsigned reset_counter, > bool interruptible, > @@ -2810,9 +2811,9 @@ int __must_check i915_gem_context_init(struct drm_device *dev); > void i915_gem_context_fini(struct drm_device *dev); > void i915_gem_context_reset(struct drm_device *dev); > int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); > -int i915_gem_context_enable(struct drm_i915_private *dev_priv); > +int i915_gem_context_enable(struct drm_i915_gem_request *req); > void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); > -int i915_switch_context(struct intel_engine_cs *ring, > +int i915_switch_context(struct drm_i915_gem_request *req, > struct intel_context *to); > struct intel_context * > i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id); > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 1d2cbfb..dbfb4e5 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1178,7 +1178,7 @@ i915_gem_check_olr(struct drm_i915_gem_request *req) > > ret = 0; > if (req == req->ring->outstanding_lazy_request) > - ret = i915_add_request(req->ring); > + ret = i915_add_request(req); > > return ret; > } > @@ -2294,17 +2294,16 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) > > static void > i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, > - struct intel_engine_cs *ring) > + struct drm_i915_gem_request *req) > { > - struct drm_i915_gem_request *req; > - struct intel_engine_cs *old_ring; > + struct intel_engine_cs *new_ring, *old_ring; > > - BUG_ON(ring == NULL); > + BUG_ON(req == NULL); > > - req = intel_ring_get_request(ring); > + new_ring = i915_gem_request_get_ring(req); > old_ring = i915_gem_request_get_ring(obj->last_read_req); > > - if (old_ring != ring && obj->last_write_req) { > + if (old_ring != new_ring && obj->last_write_req) { > /* Keep the request relative to the current ring */ > i915_gem_request_assign(&obj->last_write_req, req); > } > @@ -2315,17 +2314,17 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, > obj->active = 1; > } > > - list_move_tail(&obj->ring_list, &ring->active_list); > + list_move_tail(&obj->ring_list, &new_ring->active_list); > > - //printk(KERN_INFO "%s:%d> <%s> obj = %p, last_read_req <= 0x%p\n", __func__, __LINE__, ring->name, obj, req); > + //printk(KERN_INFO "%s:%d> <%s> obj = %p, last_read_req <= 0x%p\n", __func__, __LINE__, new_ring->name, obj, req); > i915_gem_request_assign(&obj->last_read_req, req); > } > > void i915_vma_move_to_active(struct i915_vma *vma, > - struct intel_engine_cs *ring) > + struct drm_i915_gem_request *req) > { > list_move_tail(&vma->mm_list, &vma->vm->active_list); > - return i915_gem_object_move_to_active(vma->obj, ring); > + return i915_gem_object_move_to_active(vma->obj, req); > } > > static void > @@ -2440,26 +2439,35 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) > return 0; > } > > -int __i915_add_request(struct intel_engine_cs *ring, > +int __i915_add_request(struct drm_i915_gem_request *request, > struct drm_file *file, > struct drm_i915_gem_object *obj, > bool flush_caches) > { > - struct drm_i915_private *dev_priv = ring->dev->dev_private; > - struct drm_i915_gem_request *request; > + struct intel_engine_cs *ring; > + struct drm_i915_private *dev_priv; > struct intel_ringbuffer *ringbuf; > u32 request_ring_position, request_start; > int ret; > > - request = ring->outstanding_lazy_request; > + /*printk( KERN_ERR "<%s> request %c %d:%d, OLR %c %d:%d\n", > + request ? request->ring->name : "???", > + request ? '=' : '?', > + request ? request->uniq : -1, > + request ? request->seqno : 0, > + request->ring->outstanding_lazy_request ? '=' : '?', > + request->ring->outstanding_lazy_request ? request->ring->outstanding_lazy_request->uniq : -1, > + request->ring->outstanding_lazy_request ? request->ring->outstanding_lazy_request->seqno : 0);*/ > + //dump_stack(); > + > if (WARN_ON(request == NULL)) > return -ENOMEM; > > - if (i915.enable_execlists) { > - struct intel_context *ctx = request->ctx; > - ringbuf = ctx->engine[ring->id].ringbuf; > - } else > - ringbuf = ring->buffer; > + ring = request->ring; > + dev_priv = ring->dev->dev_private; > + ringbuf = request->ringbuf; > + > + WARN_ON(request != ring->outstanding_lazy_request); > > request_start = intel_ring_get_tail(ringbuf); > /* > @@ -2471,9 +2479,9 @@ int __i915_add_request(struct intel_engine_cs *ring, > */ > if (flush_caches) { > if (i915.enable_execlists) > - ret = logical_ring_flush_all_caches(ringbuf); > + ret = logical_ring_flush_all_caches(request); > else > - ret = intel_ring_flush_all_caches(ring); > + ret = intel_ring_flush_all_caches(request); > if (ret) > return ret; > } > @@ -2488,9 +2496,9 @@ int __i915_add_request(struct intel_engine_cs *ring, > request_ring_position = intel_ring_get_tail(ringbuf); > > if (i915.enable_execlists) > - ret = ring->emit_request(ringbuf); > + ret = ring->emit_request(request); > else > - ret = ring->add_request(ring); > + ret = ring->add_request(request); > if (ret) > return ret; > > @@ -2504,7 +2512,8 @@ int __i915_add_request(struct intel_engine_cs *ring, > * inactive_list and lose its active reference. Hence we do not need > * to explicitly hold another reference here. > */ > - request->batch_obj = obj; > + if (obj) > + request->batch_obj = obj; > > if (!i915.enable_execlists) { > /* Hold a reference to the current context so that we can inspect > @@ -2744,6 +2753,7 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, > #endif > > /* This may not have been flushed before the reset, so clean it now */ > + WARN_ON(ring->outstanding_lazy_request); > i915_gem_request_assign(&ring->outstanding_lazy_request, NULL); > } > > @@ -3114,8 +3124,6 @@ out: > * > * @obj: object which may be in use on another ring. > * @to: ring we wish to use the object on. May be NULL. > - * @add_request: do we need to add a request to track operations > - * submitted on ring with sync_to function > * > * This code is meant to abstract object synchronization with the GPU. > * Calling with NULL implies synchronizing the object with the CPU > @@ -3125,8 +3133,9 @@ out: > */ > int > i915_gem_object_sync(struct drm_i915_gem_object *obj, > - struct intel_engine_cs *to, bool add_request) > + struct drm_i915_gem_request *to_req) > { > + struct intel_engine_cs *to = to_req->ring; > struct intel_engine_cs *from; > u32 seqno; > int ret, idx; > @@ -3152,7 +3161,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, > return ret; > > trace_i915_gem_ring_sync_to(from, to, obj->last_read_req); > - ret = to->semaphore.sync_to(to, from, seqno); > + ret = to->semaphore.sync_to(to_req, from, seqno); > if (!ret) { > /* We use last_read_req because sync_to() > * might have just caused seqno wrap under > @@ -3160,8 +3169,6 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, > */ > from->semaphore.sync_seqno[idx] = > i915_gem_request_get_seqno(obj->last_read_req); > - if (add_request) > - i915_add_request_no_flush(to); > } > > return ret; > @@ -3266,18 +3273,23 @@ int i915_gpu_idle(struct drm_device *dev) > /* Flush everything onto the inactive list. */ > for_each_ring(ring, dev_priv, i) { > if (!i915.enable_execlists) { > - ret = i915_switch_context(ring, ring->default_context); > + struct drm_i915_gem_request *req; > + > + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); > if (ret) > return ret; > - } > > - /* Make sure the context switch (if one actually happened) > - * gets wrapped up and finished rather than hanging around > - * and confusing things later. */ > - if (ring->outstanding_lazy_request) { > - ret = i915_add_request(ring); > - if (ret) > + ret = i915_switch_context(req, ring->default_context); > + if (ret) { > + i915_gem_request_unreference(req); > return ret; > + } > + > + ret = i915_add_request_no_flush(req); > + if (ret) { > + i915_gem_request_unreference(req); > + return ret; > + } > } > > ret = intel_ring_idle(ring); > @@ -4099,8 +4111,19 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, > bool was_pin_display; > int ret; > > - if (pipelined != i915_gem_request_get_ring(obj->last_read_req)) { > - ret = i915_gem_object_sync(obj, pipelined, true); > + if (pipelined && (pipelined != i915_gem_request_get_ring(obj->last_read_req))) { > + struct drm_i915_private *dev_priv = pipelined->dev->dev_private; > + struct drm_i915_gem_request *req; > + > + ret = dev_priv->gt.alloc_request(pipelined, pipelined->default_context, &req); > + if (ret) > + return ret; > + > + ret = i915_gem_object_sync(obj, req); > + if (ret) > + return ret; > + > + ret = i915_add_request_no_flush(req); > if (ret) > return ret; > } > @@ -4771,8 +4794,9 @@ err: > return ret; > } > > -int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice) > +int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) > { > + struct intel_engine_cs *ring = req->ring; > struct drm_device *dev = ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); > @@ -4782,7 +4806,7 @@ int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice) > if (!HAS_L3_DPF(dev) || !remap_info) > return 0; > > - ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); > + ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3); > if (ret) > return ret; > > @@ -4962,37 +4986,67 @@ i915_gem_init_hw(struct drm_device *dev) > */ > init_unused_rings(dev); > > + BUG_ON(!dev_priv->ring[RCS].default_context); > + > + ret = i915_ppgtt_init_hw(dev); > + if (ret) { > + DRM_ERROR("PPGTT enable failed %d\n", ret); > + i915_gem_cleanup_ringbuffer(dev); > + return ret; > + } > + > for_each_ring(ring, dev_priv, i) { > + struct drm_i915_gem_request *req; > + > ret = ring->init_hw(ring); > if (ret) > return ret; > - } > > - for (i = 0; i < NUM_L3_SLICES(dev); i++) > - i915_gem_l3_remap(&dev_priv->ring[RCS], i); > + if (!ring->default_context) > + continue; > > - /* > - * XXX: Contexts should only be initialized once. Doing a switch to the > - * default context switch however is something we'd like to do after > - * reset or thaw (the latter may not actually be necessary for HW, but > - * goes with our code better). Context switching requires rings (for > - * the do_switch), but before enabling PPGTT. So don't move this. > - */ > - ret = i915_gem_context_enable(dev_priv); > - if (ret && ret != -EIO) { > - DRM_ERROR("Context enable failed %d\n", ret); > - i915_gem_cleanup_ringbuffer(dev); > + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); > + if (ret) > + return ret; > > - return ret; > - } > + if (ring->id == RCS) { > + for (i = 0; i < NUM_L3_SLICES(dev); i++) > + i915_gem_l3_remap(req, i); > + } > > - ret = i915_ppgtt_init_hw(dev); > - if (ret && ret != -EIO) { > - DRM_ERROR("PPGTT enable failed %d\n", ret); > - i915_gem_cleanup_ringbuffer(dev); > + /* > + * XXX: Contexts should only be initialized once. Doing a switch to the > + * default context switch however is something we'd like to do after > + * reset or thaw (the latter may not actually be necessary for HW, but > + * goes with our code better). Context switching requires rings (for > + * the do_switch), but before enabling PPGTT. So don't move this. > + */ > + ret = i915_gem_context_enable(req); > + if (ret && ret != -EIO) { > + DRM_ERROR("Context enable failed %d\n", ret); > + i915_gem_request_unreference(req); > + i915_gem_cleanup_ringbuffer(dev); > + > + return ret; > + } > + > + ret = i915_ppgtt_init_ring(req); > + if (ret && ret != -EIO) { > + DRM_ERROR("PPGTT enable failed %d\n", ret); > + i915_gem_request_unreference(req); > + i915_gem_cleanup_ringbuffer(dev); > + } > + > + ret = i915_add_request_no_flush(req); > + if (ret) { > + DRM_ERROR("Add request failed: %d\n", ret); > + i915_gem_request_unreference(req); > + i915_gem_cleanup_ringbuffer(dev); > + return ret; > + } > } > > - return ret; > + return 0; > } > > int i915_gem_init(struct drm_device *dev) > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index c5e1bfc..72e280b 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -401,41 +401,23 @@ void i915_gem_context_fini(struct drm_device *dev) > i915_gem_context_unreference(dctx); > } > > -int i915_gem_context_enable(struct drm_i915_private *dev_priv) > +int i915_gem_context_enable(struct drm_i915_gem_request *req) > { > - struct intel_engine_cs *ring; > - int ret, i; > - > - BUG_ON(!dev_priv->ring[RCS].default_context); > + struct intel_engine_cs *ring = req->ring; > + int ret; > > if (i915.enable_execlists) { > - for_each_ring(ring, dev_priv, i) { > - if (ring->init_context) { > - ret = ring->init_context(ring, > - ring->default_context); > - if (ret) { > - DRM_ERROR("ring init context: %d\n", > - ret); > - return ret; > - } > - } > - } > + if (ring->init_context == NULL) > + return 0; > > + ret = ring->init_context(req, ring->default_context); > } else > - for_each_ring(ring, dev_priv, i) { > - ret = i915_switch_context(ring, ring->default_context); > - if (ret) > - return ret; > - > - /* Make sure the context switch (if one actually happened) > - * gets wrapped up and finished rather than hanging around > - * and confusing things later. */ > - if (ring->outstanding_lazy_request) { > - ret = i915_add_request_no_flush(ring); > - if (ret) > - return ret; > - } > - } > + ret = i915_switch_context(req, ring->default_context); > + > + if (ret) { > + DRM_ERROR("ring init context: %d\n", ret); > + return ret; > + } > > return 0; > } > @@ -488,10 +470,11 @@ i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id) > } > > static inline int > -mi_set_context(struct intel_engine_cs *ring, > +mi_set_context(struct drm_i915_gem_request *req, > struct intel_context *new_context, > u32 hw_flags) > { > + struct intel_engine_cs *ring = req->ring; > u32 flags = hw_flags | MI_MM_SPACE_GTT; > int ret; > > @@ -501,7 +484,7 @@ mi_set_context(struct intel_engine_cs *ring, > * itlb_before_ctx_switch. > */ > if (IS_GEN6(ring->dev)) { > - ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, 0); > + ret = ring->flush(req, I915_GEM_GPU_DOMAINS, 0); > if (ret) > return ret; > } > @@ -510,7 +493,7 @@ mi_set_context(struct intel_engine_cs *ring, > if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8) > flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN); > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -540,9 +523,10 @@ mi_set_context(struct intel_engine_cs *ring, > return ret; > } > > -static int do_switch(struct intel_engine_cs *ring, > +static int do_switch(struct drm_i915_gem_request *req, > struct intel_context *to) > { > + struct intel_engine_cs *ring = req->ring; > struct drm_i915_private *dev_priv = ring->dev->dev_private; > struct intel_context *from = ring->last_context; > u32 hw_flags = 0; > @@ -577,7 +561,7 @@ static int do_switch(struct intel_engine_cs *ring, > > if (to->ppgtt) { > trace_switch_mm(ring, to); > - ret = to->ppgtt->switch_mm(to->ppgtt, ring); > + ret = to->ppgtt->switch_mm(to->ppgtt, req); > if (ret) > goto unpin_out; > } > @@ -608,7 +592,7 @@ static int do_switch(struct intel_engine_cs *ring, > if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to)) > hw_flags |= MI_RESTORE_INHIBIT; > > - ret = mi_set_context(ring, to, hw_flags); > + ret = mi_set_context(req, to, hw_flags); > if (ret) > goto unpin_out; > > @@ -616,7 +600,7 @@ static int do_switch(struct intel_engine_cs *ring, > if (!(to->remap_slice & (1<<i))) > continue; > > - ret = i915_gem_l3_remap(ring, i); > + ret = i915_gem_l3_remap(req, i); > /* If it failed, try again next round */ > if (ret) > DRM_DEBUG_DRIVER("L3 remapping failed\n"); > @@ -632,7 +616,7 @@ static int do_switch(struct intel_engine_cs *ring, > */ > if (from != NULL) { > from->legacy_hw_ctx.rcs_state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; > - i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), ring); > + i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), req); > /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the > * whole damn pipeline, we don't need to explicitly mark the > * object dirty. The only exception is that the context must be > @@ -658,12 +642,12 @@ done: > > if (uninitialized) { > if (ring->init_context) { > - ret = ring->init_context(ring, to); > + ret = ring->init_context(req, to); > if (ret) > DRM_ERROR("ring init context: %d\n", ret); > } > > - ret = i915_gem_render_state_init(ring); > + ret = i915_gem_render_state_init(req); > if (ret) > DRM_ERROR("init render state: %d\n", ret); > } > @@ -690,9 +674,10 @@ unpin_out: > * switched by writing to the ELSP and requests keep a reference to their > * context. > */ > -int i915_switch_context(struct intel_engine_cs *ring, > +int i915_switch_context(struct drm_i915_gem_request *req, > struct intel_context *to) > { > + struct intel_engine_cs *ring = req->ring; > struct drm_i915_private *dev_priv = ring->dev->dev_private; > > WARN_ON(i915.enable_execlists); > @@ -708,7 +693,7 @@ int i915_switch_context(struct intel_engine_cs *ring, > return 0; > } > > - return do_switch(ring, to); > + return do_switch(req, to); > } > > static bool contexts_enabled(struct drm_device *dev) > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index ca31673..5caa2a2 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -822,7 +822,7 @@ err: > } > > static int > -i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring, > +i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, > struct list_head *vmas) > { > struct i915_vma *vma; > @@ -832,7 +832,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring, > > list_for_each_entry(vma, vmas, exec_list) { > struct drm_i915_gem_object *obj = vma->obj; > - ret = i915_gem_object_sync(obj, ring, false); > + ret = i915_gem_object_sync(obj, req); > if (ret) > return ret; > > @@ -843,7 +843,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring, > } > > if (flush_chipset) > - i915_gem_chipset_flush(ring->dev); > + i915_gem_chipset_flush(req->ring->dev); > > if (flush_domains & I915_GEM_DOMAIN_GTT) > wmb(); > @@ -941,9 +941,9 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, > > void > i915_gem_execbuffer_move_to_active(struct list_head *vmas, > - struct intel_engine_cs *ring) > + struct drm_i915_gem_request *req) > { > - struct drm_i915_gem_request *req = intel_ring_get_request(ring); > + struct intel_engine_cs *ring = i915_gem_request_get_ring(req); > struct i915_vma *vma; > > list_for_each_entry(vma, vmas, exec_list) { > @@ -957,7 +957,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, > obj->base.pending_read_domains |= obj->base.read_domains; > obj->base.read_domains = obj->base.pending_read_domains; > > - i915_vma_move_to_active(vma, ring); > + i915_vma_move_to_active(vma, req); > if (obj->base.write_domain) { > obj->dirty = 1; > i915_gem_request_assign(&obj->last_write_req, req); > @@ -983,20 +983,21 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, > void > i915_gem_execbuffer_retire_commands(struct drm_device *dev, > struct drm_file *file, > - struct intel_engine_cs *ring, > + struct drm_i915_gem_request *req, > struct drm_i915_gem_object *obj) > { > /* Unconditionally force add_request to emit a full flush. */ > - ring->gpu_caches_dirty = true; > + req->ring->gpu_caches_dirty = true; > > /* Add a breadcrumb for the completion of the batch buffer */ > - (void)__i915_add_request(ring, file, obj, true); > + (void)__i915_add_request(req, file, obj, true); > } > > static int > i915_reset_gen7_sol_offsets(struct drm_device *dev, > - struct intel_engine_cs *ring) > + struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > struct drm_i915_private *dev_priv = dev->dev_private; > int ret, i; > > @@ -1005,7 +1006,7 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, > return -EINVAL; > } > > - ret = intel_ring_begin(ring, 4 * 3); > + ret = intel_ring_begin(req, 4 * 3); > if (ret) > return ret; > > @@ -1021,10 +1022,11 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, > } > > static int > -i915_emit_box(struct intel_engine_cs *ring, > +i915_emit_box(struct drm_i915_gem_request *req, > struct drm_clip_rect *box, > int DR1, int DR4) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > if (box->y2 <= box->y1 || box->x2 <= box->x1 || > @@ -1035,7 +1037,7 @@ i915_emit_box(struct intel_engine_cs *ring, > } > > if (INTEL_INFO(ring->dev)->gen >= 4) { > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > @@ -1044,7 +1046,7 @@ i915_emit_box(struct intel_engine_cs *ring, > intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16); > intel_ring_emit(ring, DR4); > } else { > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -1151,11 +1153,11 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, > goto error; > } > > - ret = i915_gem_execbuffer_move_to_gpu(ring, vmas); > + ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas); > if (ret) > goto error; > > - i915_gem_execbuffer_move_to_active(vmas, ring); > + i915_gem_execbuffer_move_to_active(vmas, params->request); > > /* Make sure the OLR hasn't advanced (which would indicate a flush > * of the work in progress which in turn would be a Bad Thing). */ > @@ -1200,18 +1202,18 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params) > /* Unconditionally invalidate gpu caches and ensure that we do flush > * any residual writes from the previous batch. > */ > - ret = intel_ring_invalidate_all_caches(ring); > + ret = intel_ring_invalidate_all_caches(params->request); > if (ret) > goto error; > > /* Switch to the correct context for the batch */ > - ret = i915_switch_context(ring, params->ctx); > + ret = i915_switch_context(params->request, params->ctx); > if (ret) > goto error; > > if (ring == &dev_priv->ring[RCS] && > params->instp_mode != dev_priv->relative_constants_mode) { > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(params->request, 4); > if (ret) > goto error; > > @@ -1225,7 +1227,7 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params) > } > > if (params->args_flags & I915_EXEC_GEN7_SOL_RESET) { > - ret = i915_reset_gen7_sol_offsets(params->dev, ring); > + ret = i915_reset_gen7_sol_offsets(params->dev, params->request); > if (ret) > goto error; > } > @@ -1236,29 +1238,31 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params) > > if (params->cliprects) { > for (i = 0; i < params->args_num_cliprects; i++) { > - ret = i915_emit_box(ring, ¶ms->cliprects[i], > - params->args_DR1, params->args_DR4); > + ret = i915_emit_box(params->request, > + ¶ms->cliprects[i], > + params->args_DR1, > + params->args_DR4); > if (ret) > goto error; > > - ret = ring->dispatch_execbuffer(ring, > + ret = ring->dispatch_execbuffer(params->request, > exec_start, exec_len, > params->dispatch_flags); > if (ret) > goto error; > } > } else { > - ret = ring->dispatch_execbuffer(ring, > + ret = ring->dispatch_execbuffer(params->request, > exec_start, exec_len, > params->dispatch_flags); > if (ret) > goto error; > } > > - trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), params->dispatch_flags); > + trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); > > - i915_gem_execbuffer_retire_commands(params->dev, params->file, ring, > - params->batch_obj); > + i915_gem_execbuffer_retire_commands(params->dev, params->file, > + params->request, params->batch_obj); > > error: > /* intel_gpu_busy should also get a ref, so it will free when the device > @@ -1490,10 +1494,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, > params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm); > > /* Allocate a request for this batch buffer nice and early. */ > - ret = dev_priv->gt.alloc_request(ring, ctx); > + ret = dev_priv->gt.alloc_request(ring, ctx, ¶ms->request); > if (ret) > goto err; > - params->request = ring->outstanding_lazy_request; > + WARN_ON(params->request != ring->outstanding_lazy_request); > > /* Save assorted stuff away to pass through to *_submission_final() */ > params->dev = dev; > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 7eead93..776776e 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -213,14 +213,15 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, > } > > /* Broadwell Page Directory Pointer Descriptors */ > -static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, > - uint64_t val) > +static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry, > + uint64_t val) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > BUG_ON(entry >= 4); > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -236,7 +237,7 @@ static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, > } > > static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, > - struct intel_engine_cs *ring) > + struct drm_i915_gem_request *req) > { > int i, ret; > > @@ -245,7 +246,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, > > for (i = used_pd - 1; i >= 0; i--) { > dma_addr_t addr = ppgtt->pd_dma_addr[i]; > - ret = gen8_write_pdp(ring, i, addr); > + ret = gen8_write_pdp(req, i, addr); > if (ret) > return ret; > } > @@ -710,16 +711,17 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) > } > > static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, > - struct intel_engine_cs *ring) > + struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > /* NB: TLBs must be flushed and invalidated before a switch */ > - ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); > + ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); > if (ret) > return ret; > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -735,16 +737,17 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, > } > > static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, > - struct intel_engine_cs *ring) > + struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > /* NB: TLBs must be flushed and invalidated before a switch */ > - ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); > + ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); > if (ret) > return ret; > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -758,7 +761,7 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, > > /* XXX: RCS is the only one to auto invalidate the TLBs? */ > if (ring->id != RCS) { > - ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); > + ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); > if (ret) > return ret; > } > @@ -767,8 +770,9 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, > } > > static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, > - struct intel_engine_cs *ring) > + struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > struct drm_device *dev = ppgtt->base.dev; > struct drm_i915_private *dev_priv = dev->dev_private; > > @@ -1125,11 +1129,6 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) > > int i915_ppgtt_init_hw(struct drm_device *dev) > { > - struct drm_i915_private *dev_priv = dev->dev_private; > - struct intel_engine_cs *ring; > - struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; > - int i, ret = 0; > - > /* In the case of execlists, PPGTT is enabled by the context descriptor > * and the PDPs are contained within the context itself. We don't > * need to do anything here. */ > @@ -1148,25 +1147,20 @@ int i915_ppgtt_init_hw(struct drm_device *dev) > else > WARN_ON(1); > > - if (ppgtt) { > - for_each_ring(ring, dev_priv, i) { > - ret = ppgtt->switch_mm(ppgtt, ring); > - if (ret != 0) > - return ret; > - > - /* Make sure the context switch (if one actually happened) > - * gets wrapped up and finished rather than hanging around > - * and confusing things later. */ > - if (ring->outstanding_lazy_request) { > - ret = i915_add_request_no_flush(ring); > - if (ret) > - return ret; > - } > - } > - } > + return 0; > +} > > - return ret; > +int i915_ppgtt_init_ring(struct drm_i915_gem_request *req) > +{ > + struct drm_i915_private *dev_priv = req->ring->dev->dev_private; > + struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; > + > + if (!ppgtt) > + return 0; > + > + return ppgtt->switch_mm(ppgtt, req); > } > + > struct i915_hw_ppgtt * > i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) > { > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > index dd849df..bee3e2a 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -267,7 +267,7 @@ struct i915_hw_ppgtt { > > int (*enable)(struct i915_hw_ppgtt *ppgtt); > int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, > - struct intel_engine_cs *ring); > + struct drm_i915_gem_request *req); > void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); > }; > > @@ -278,6 +278,7 @@ void i915_global_gtt_cleanup(struct drm_device *dev); > > int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); > int i915_ppgtt_init_hw(struct drm_device *dev); > +int i915_ppgtt_init_ring(struct drm_i915_gem_request *req); > void i915_ppgtt_release(struct kref *kref); > struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev, > struct drm_i915_file_private *fpriv); > diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c > index aba39c3..0e0c23fe 100644 > --- a/drivers/gpu/drm/i915/i915_gem_render_state.c > +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c > @@ -152,8 +152,9 @@ int i915_gem_render_state_prepare(struct intel_engine_cs *ring, > return 0; > } > > -int i915_gem_render_state_init(struct intel_engine_cs *ring) > +int i915_gem_render_state_init(struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = i915_gem_request_get_ring(req); > struct render_state so; > int ret; > > @@ -164,16 +165,17 @@ int i915_gem_render_state_init(struct intel_engine_cs *ring) > if (so.rodata == NULL) > return 0; > > - ret = ring->dispatch_execbuffer(ring, > + ret = ring->dispatch_execbuffer(req, > so.ggtt_offset, > so.rodata->batch_items * 4, > I915_DISPATCH_SECURE); > if (ret) > goto out; > > - i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring); > + i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); > > - ret = __i915_add_request(ring, NULL, so.obj, true); > +// ret = __i915_add_request(req, NULL, so.obj, true); > + req->batch_obj = so.obj; > /* __i915_add_request moves object to inactive if it fails */ > out: > i915_gem_render_state_fini(&so); > diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h > index c44961e..7aa7372 100644 > --- a/drivers/gpu/drm/i915/i915_gem_render_state.h > +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h > @@ -39,7 +39,7 @@ struct render_state { > int gen; > }; > > -int i915_gem_render_state_init(struct intel_engine_cs *ring); > +int i915_gem_render_state_init(struct drm_i915_gem_request *req); > void i915_gem_render_state_fini(struct render_state *so); > int i915_gem_render_state_prepare(struct intel_engine_cs *ring, > struct render_state *so); > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > index f0cf421..c0b0e37 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -9089,14 +9089,15 @@ static int intel_gen2_queue_flip(struct drm_device *dev, > struct drm_crtc *crtc, > struct drm_framebuffer *fb, > struct drm_i915_gem_object *obj, > - struct intel_engine_cs *ring, > + struct drm_i915_gem_request *req, > uint32_t flags) > { > + struct intel_engine_cs *ring = req->ring; > struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > u32 flip_mask; > int ret; > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -9116,7 +9117,7 @@ static int intel_gen2_queue_flip(struct drm_device *dev, > intel_ring_emit(ring, 0); /* aux display base address, unused */ > > intel_mark_page_flip_active(intel_crtc); > - i915_add_request_no_flush(ring); > + i915_add_request_no_flush(req); > return 0; > } > > @@ -9124,14 +9125,15 @@ static int intel_gen3_queue_flip(struct drm_device *dev, > struct drm_crtc *crtc, > struct drm_framebuffer *fb, > struct drm_i915_gem_object *obj, > - struct intel_engine_cs *ring, > + struct drm_i915_gem_request *req, > uint32_t flags) > { > + struct intel_engine_cs *ring = req->ring; > struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > u32 flip_mask; > int ret; > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -9148,7 +9150,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev, > intel_ring_emit(ring, MI_NOOP); > > intel_mark_page_flip_active(intel_crtc); > - i915_add_request_no_flush(ring); > + i915_add_request_no_flush(req); > return 0; > } > > @@ -9156,15 +9158,16 @@ static int intel_gen4_queue_flip(struct drm_device *dev, > struct drm_crtc *crtc, > struct drm_framebuffer *fb, > struct drm_i915_gem_object *obj, > - struct intel_engine_cs *ring, > + struct drm_i915_gem_request *req, > uint32_t flags) > { > + struct intel_engine_cs *ring = req->ring; > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > uint32_t pf, pipesrc; > int ret; > > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > @@ -9187,7 +9190,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, > intel_ring_emit(ring, pf | pipesrc); > > intel_mark_page_flip_active(intel_crtc); > - i915_add_request_no_flush(ring); > + i915_add_request_no_flush(req); > return 0; > } > > @@ -9195,15 +9198,16 @@ static int intel_gen6_queue_flip(struct drm_device *dev, > struct drm_crtc *crtc, > struct drm_framebuffer *fb, > struct drm_i915_gem_object *obj, > - struct intel_engine_cs *ring, > + struct drm_i915_gem_request *req, > uint32_t flags) > { > + struct intel_engine_cs *ring = req->ring; > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > uint32_t pf, pipesrc; > int ret; > > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > @@ -9223,7 +9227,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev, > intel_ring_emit(ring, pf | pipesrc); > > intel_mark_page_flip_active(intel_crtc); > - i915_add_request_no_flush(ring); > + i915_add_request_no_flush(req); > return 0; > } > > @@ -9231,9 +9235,10 @@ static int intel_gen7_queue_flip(struct drm_device *dev, > struct drm_crtc *crtc, > struct drm_framebuffer *fb, > struct drm_i915_gem_object *obj, > - struct intel_engine_cs *ring, > + struct drm_i915_gem_request *req, > uint32_t flags) > { > + struct intel_engine_cs *ring = req->ring; > struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > uint32_t plane_bit = 0; > int len, ret; > @@ -9275,11 +9280,11 @@ static int intel_gen7_queue_flip(struct drm_device *dev, > * then do the cacheline alignment, and finally emit the > * MI_DISPLAY_FLIP. > */ > - ret = intel_ring_cacheline_align(ring); > + ret = intel_ring_cacheline_align(req); > if (ret) > return ret; > > - ret = intel_ring_begin(ring, len); > + ret = intel_ring_begin(req, len); > if (ret) > return ret; > > @@ -9318,7 +9323,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev, > intel_ring_emit(ring, (MI_NOOP)); > > intel_mark_page_flip_active(intel_crtc); > - i915_add_request_no_flush(ring); > + i915_add_request_no_flush(req); > return 0; > } > > @@ -9474,9 +9479,10 @@ static int intel_gen9_queue_flip(struct drm_device *dev, > struct drm_crtc *crtc, > struct drm_framebuffer *fb, > struct drm_i915_gem_object *obj, > - struct intel_engine_cs *ring, > + struct drm_i915_gem_request *req, > uint32_t flags) > { > + struct intel_engine_cs *ring = req->ring; > struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > uint32_t plane = 0, stride; > int ret; > @@ -9508,7 +9514,7 @@ static int intel_gen9_queue_flip(struct drm_device *dev, > return -ENODEV; > } > > - ret = intel_ring_begin(ring, 10); > + ret = intel_ring_begin(req, 10); > if (ret) > return ret; > > @@ -9528,7 +9534,7 @@ static int intel_gen9_queue_flip(struct drm_device *dev, > intel_ring_emit(ring, intel_crtc->unpin_work->gtt_offset); > > intel_mark_page_flip_active(intel_crtc); > - i915_add_request_no_flush(ring); > + i915_add_request_no_flush(req); > > return 0; > } > @@ -9537,7 +9543,7 @@ static int intel_default_queue_flip(struct drm_device *dev, > struct drm_crtc *crtc, > struct drm_framebuffer *fb, > struct drm_i915_gem_object *obj, > - struct intel_engine_cs *ring, > + struct drm_i915_gem_request *req, > uint32_t flags) > { > return -ENODEV; > @@ -9729,22 +9735,18 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, > i915_gem_request_assign(&work->flip_queued_req, > obj->last_write_req); > } else { > - ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, ring, > + struct drm_i915_gem_request *req; > + > + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); > + if (ret) > + return ret; > + > + i915_gem_request_assign(&work->flip_queued_req, req); > + > + ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, req, > page_flip_flags); > if (ret) > goto cleanup_unpin; > - > - /* Borked: need to get the seqno for the request submitted in > - * 'queue_flip()' above. However, either the request has been > - * posted already and the seqno is gone (q_f calls add_request), > - * or the request never gets posted and is merged into whatever > - * render comes along next (q_f calls ring_advance). > - * > - * On the other hand, seqnos are going away soon anyway! So > - * hopefully the problem will disappear... > - */ > - i915_gem_request_assign(&work->flip_queued_req, > - ring->outstanding_lazy_request ? intel_ring_get_request(ring) : NULL); > } > > work->flip_queued_vblank = drm_vblank_count(dev, intel_crtc->pipe); > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 80cb87e..5077a77 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -203,6 +203,10 @@ enum { > }; > #define GEN8_CTX_ID_SHIFT 32 > > +static int intel_logical_ring_begin(struct drm_i915_gem_request *req, > + int num_dwords); > +static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req, > + struct intel_context *ctx); > static int intel_lr_context_pin(struct intel_engine_cs *ring, > struct intel_context *ctx); > > @@ -587,9 +591,9 @@ static int execlists_context_queue(struct intel_engine_cs *ring, > return 0; > } > > -static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) > +static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req) > { > - struct intel_engine_cs *ring = ringbuf->ring; > + struct intel_engine_cs *ring = req->ring; > uint32_t flush_domains; > int ret; > > @@ -597,7 +601,7 @@ static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) > if (ring->gpu_caches_dirty) > flush_domains = I915_GEM_GPU_DOMAINS; > > - ret = ring->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains); > + ret = ring->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains); > if (ret) > return ret; > > @@ -605,10 +609,9 @@ static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) > return 0; > } > > -static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, > +static int execlists_move_to_gpu(struct drm_i915_gem_request *req, > struct list_head *vmas) > { > - struct intel_engine_cs *ring = ringbuf->ring; > struct i915_vma *vma; > uint32_t flush_domains = 0; > bool flush_chipset = false; > @@ -617,7 +620,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, > list_for_each_entry(vma, vmas, exec_list) { > struct drm_i915_gem_object *obj = vma->obj; > > - ret = i915_gem_object_sync(obj, ring, true); > + ret = i915_gem_object_sync(obj, req); > if (ret) > return ret; > > @@ -657,7 +660,6 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, > struct drm_device *dev = params->dev; > struct intel_engine_cs *ring = params->ring; > struct drm_i915_private *dev_priv = dev->dev_private; > - struct intel_ringbuffer *ringbuf = params->ctx->engine[ring->id].ringbuf; > int ret; > > params->instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; > @@ -706,11 +708,11 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, > return -EINVAL; > } > > - ret = execlists_move_to_gpu(ringbuf, vmas); > + ret = execlists_move_to_gpu(params->request, vmas); > if (ret) > return ret; > > - i915_gem_execbuffer_move_to_active(vmas, ring); > + i915_gem_execbuffer_move_to_active(vmas, params->request); > > ret = dev_priv->gt.do_execfinal(params); > if (ret) > @@ -742,13 +744,13 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params) > /* Unconditionally invalidate gpu caches and ensure that we do flush > * any residual writes from the previous batch. > */ > - ret = logical_ring_invalidate_all_caches(ringbuf); > + ret = logical_ring_invalidate_all_caches(params->request); > if (ret) > return ret; > > if (ring == &dev_priv->ring[RCS] && > params->instp_mode != dev_priv->relative_constants_mode) { > - ret = intel_logical_ring_begin(ringbuf, 4); > + ret = intel_logical_ring_begin(params->request, 4); > if (ret) > return ret; > > @@ -764,13 +766,14 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params) > exec_start = params->batch_obj_vm_offset + > params->args_batch_start_offset; > > - ret = ring->emit_bb_start(ringbuf, exec_start, params->dispatch_flags); > + ret = ring->emit_bb_start(params->request, exec_start, params->dispatch_flags); > if (ret) > return ret; > > - trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), params->dispatch_flags); > + trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); > > - i915_gem_execbuffer_retire_commands(params->dev, params->file, ring, params->batch_obj); > + i915_gem_execbuffer_retire_commands(params->dev, params->file, > + params->request, params->batch_obj); > > return 0; > } > @@ -827,15 +830,15 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) > I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING)); > } > > -int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf) > +int logical_ring_flush_all_caches(struct drm_i915_gem_request *req) > { > - struct intel_engine_cs *ring = ringbuf->ring; > + struct intel_engine_cs *ring = req->ring; > int ret; > > if (!ring->gpu_caches_dirty) > return 0; > > - ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS); > + ret = ring->emit_flush(req, 0, I915_GEM_GPU_DOMAINS); > if (ret) > return ret; > > @@ -910,13 +913,17 @@ void intel_lr_context_unpin(struct intel_engine_cs *ring, > } > > int intel_logical_ring_alloc_request(struct intel_engine_cs *ring, > - struct intel_context *ctx) > + struct intel_context *ctx, > + struct drm_i915_gem_request **req_out) > { > struct drm_i915_gem_request *request; > struct drm_i915_private *dev_private = ring->dev->dev_private; > int ret; > > - if (ring->outstanding_lazy_request) > + if (!req_out) > + return -EINVAL; > + > + if ((*req_out = ring->outstanding_lazy_request) != NULL) > return 0; > > request = kzalloc(sizeof(*request), GFP_KERNEL); > @@ -953,7 +960,7 @@ int intel_logical_ring_alloc_request(struct intel_engine_cs *ring, > i915_gem_context_reference(request->ctx); > request->ringbuf = ctx->engine[ring->id].ringbuf; > > - ring->outstanding_lazy_request = request; > + *req_out = ring->outstanding_lazy_request = request; > return 0; > } > > @@ -1090,7 +1097,7 @@ static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes) > /** > * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands > * > - * @ringbuf: Logical ringbuffer. > + * @request: The request to start some new work for > * @num_dwords: number of DWORDs that we plan to write to the ringbuffer. > * > * The ringbuffer might not be ready to accept the commands right away (maybe it needs to > @@ -1100,8 +1107,9 @@ static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes) > * > * Return: non-zero if the ringbuffer is not ready to be written to. > */ > -int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords) > +static int intel_logical_ring_begin(struct drm_i915_gem_request *req, int num_dwords) > { > + struct intel_ringbuffer *ringbuf = req->ringbuf; > struct intel_engine_cs *ring = ringbuf->ring; > struct drm_device *dev = ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > @@ -1116,38 +1124,28 @@ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords) > if (ret) > return ret; > > - if(!ring->outstanding_lazy_request) { > - printk(KERN_INFO "%s:%d> \x1B[31;1mring->outstanding_lazy_request = 0x%p\x1B[0m\n", __func__, __LINE__, ring->outstanding_lazy_request); > - dump_stack(); > - } > - > - /* Preallocate the olr before touching the ring */ > - ret = intel_logical_ring_alloc_request(ring, ringbuf->FIXME_lrc_ctx); > - if (ret) > - return ret; > - > ringbuf->space -= num_dwords * sizeof(uint32_t); > return 0; > } > > -static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring, > +static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req, > struct intel_context *ctx) > { > int ret, i; > - struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; > - struct drm_device *dev = ring->dev; > + struct intel_ringbuffer *ringbuf = req->ringbuf; > + struct drm_device *dev = req->ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > struct i915_workarounds *w = &dev_priv->workarounds; > > if (WARN_ON(w->count == 0)) > return 0; > > - ring->gpu_caches_dirty = true; > - ret = logical_ring_flush_all_caches(ringbuf); > + req->ring->gpu_caches_dirty = true; > + ret = logical_ring_flush_all_caches(req); > if (ret) > return ret; > > - ret = intel_logical_ring_begin(ringbuf, w->count * 2 + 2); > + ret = intel_logical_ring_begin(req, w->count * 2 + 2); > if (ret) > return ret; > > @@ -1160,8 +1158,8 @@ static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring, > > intel_logical_ring_advance(ringbuf); > > - ring->gpu_caches_dirty = true; > - ret = logical_ring_flush_all_caches(ringbuf); > + req->ring->gpu_caches_dirty = true; > + ret = logical_ring_flush_all_caches(req); > if (ret) > return ret; > > @@ -1210,13 +1208,14 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) > return init_workarounds_ring(ring); > } > > -static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf, > +static int gen8_emit_bb_start(struct drm_i915_gem_request *req, > u64 offset, unsigned flags) > { > + struct intel_ringbuffer *ringbuf = req->ringbuf; > bool ppgtt = !(flags & I915_DISPATCH_SECURE); > int ret; > > - ret = intel_logical_ring_begin(ringbuf, 4); > + ret = intel_logical_ring_begin(req, 4); > if (ret) > return ret; > > @@ -1263,17 +1262,18 @@ static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring) > spin_unlock_irqrestore(&dev_priv->irq_lock, flags); > } > > -static int gen8_emit_flush(struct intel_ringbuffer *ringbuf, > +static int gen8_emit_flush(struct drm_i915_gem_request *req, > u32 invalidate_domains, > u32 unused) > { > + struct intel_ringbuffer *ringbuf = req->ringbuf; > struct intel_engine_cs *ring = ringbuf->ring; > struct drm_device *dev = ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > uint32_t cmd; > int ret; > > - ret = intel_logical_ring_begin(ringbuf, 4); > + ret = intel_logical_ring_begin(req, 4); > if (ret) > return ret; > > @@ -1301,10 +1301,11 @@ static int gen8_emit_flush(struct intel_ringbuffer *ringbuf, > return 0; > } > > -static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf, > +static int gen8_emit_flush_render(struct drm_i915_gem_request *req, > u32 invalidate_domains, > u32 flush_domains) > { > + struct intel_ringbuffer *ringbuf = req->ringbuf; > struct intel_engine_cs *ring = ringbuf->ring; > u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; > u32 flags = 0; > @@ -1328,7 +1329,7 @@ static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf, > flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; > } > > - ret = intel_logical_ring_begin(ringbuf, 6); > + ret = intel_logical_ring_begin(req, 6); > if (ret) > return ret; > > @@ -1353,13 +1354,14 @@ static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno) > intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno); > } > > -static int gen8_emit_request(struct intel_ringbuffer *ringbuf) > +static int gen8_emit_request(struct drm_i915_gem_request *req) > { > + struct intel_ringbuffer *ringbuf = req->ringbuf; > struct intel_engine_cs *ring = ringbuf->ring; > u32 cmd; > int ret; > > - ret = intel_logical_ring_begin(ringbuf, 6); > + ret = intel_logical_ring_begin(req, 6); > if (ret) > return ret; > > @@ -1371,8 +1373,7 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf) > (ring->status_page.gfx_addr + > (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT))); > intel_logical_ring_emit(ringbuf, 0); > - intel_logical_ring_emit(ringbuf, > - i915_gem_request_get_seqno(ring->outstanding_lazy_request)); > + intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(req)); > intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); > intel_logical_ring_emit(ringbuf, MI_NOOP); > intel_logical_ring_advance_and_submit(ringbuf); > @@ -1380,16 +1381,20 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf) > return 0; > } > > -static int gen8_init_rcs_context(struct intel_engine_cs *ring, > - struct intel_context *ctx) > +static int gen8_init_rcs_context(struct drm_i915_gem_request *req, > + struct intel_context *ctx) > { > int ret; > > - ret = intel_logical_ring_workarounds_emit(ring, ctx); > + ret = intel_logical_ring_workarounds_emit(req, ctx); > if (ret) > return ret; > > - return intel_lr_context_render_state_init(ring, ctx); > + ret = intel_lr_context_render_state_init(req, ctx); > + if (ret) > + return ret; > + > + return 0; > } > > /** > @@ -1409,6 +1414,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring) > > intel_logical_ring_stop(ring); > WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0); > + WARN_ON(ring->outstanding_lazy_request); > i915_gem_request_assign(&ring->outstanding_lazy_request, NULL); > > if (ring->cleanup) > @@ -1648,10 +1654,10 @@ cleanup_render_ring: > return ret; > } > > -int intel_lr_context_render_state_init(struct intel_engine_cs *ring, > - struct intel_context *ctx) > +static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req, > + struct intel_context *ctx) > { > - struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; > + struct intel_engine_cs *ring = i915_gem_request_get_ring(req); > struct render_state so; > struct drm_i915_file_private *file_priv = ctx->file_priv; > struct drm_file *file = file_priv ? file_priv->file : NULL; > @@ -1664,15 +1670,13 @@ int intel_lr_context_render_state_init(struct intel_engine_cs *ring, > if (so.rodata == NULL) > return 0; > > - ret = ring->emit_bb_start(ringbuf, > - so.ggtt_offset, > - I915_DISPATCH_SECURE); > + ret = ring->emit_bb_start(req, so.ggtt_offset, I915_DISPATCH_SECURE); > if (ret) > goto out; > > - i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring); > + i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); > > - ret = __i915_add_request(ring, file, so.obj, true); > + ret = __i915_add_request(req, file, so.obj, true); > /* intel_logical_ring_add_request moves object to inactive if it > * fails */ > out: > @@ -1883,6 +1887,7 @@ static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring, > int intel_lr_context_deferred_create(struct intel_context *ctx, > struct intel_engine_cs *ring) > { > + struct drm_i915_private *dev_priv = ring->dev->dev_private; > const bool is_global_default_ctx = (ctx == ring->default_context); > struct drm_device *dev = ring->dev; > struct drm_i915_gem_object *ctx_obj; > @@ -1964,13 +1969,27 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, > lrc_setup_hardware_status_page(ring, ctx_obj); > else if (ring->id == RCS && !ctx->rcs_initialized) { > if (ring->init_context) { > - ret = ring->init_context(ring, ctx); > + struct drm_i915_gem_request *req; > + > + ret = dev_priv->gt.alloc_request(ring, ctx, &req); > + if (ret) > + return ret; > + > + ret = ring->init_context(req, ctx); > if (ret) { > DRM_ERROR("ring init context: %d\n", ret); > + i915_gem_request_unreference(req); > ctx->engine[ring->id].ringbuf = NULL; > ctx->engine[ring->id].state = NULL; > goto error; > } > + > + ret = i915_add_request_no_flush(req); > + if (ret) { > + DRM_ERROR("ring init context: %d\n", ret); > + i915_gem_request_unreference(req); > + goto error; > + } > } > > ctx->rcs_initialized = true; > diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h > index ea083d9..a2981ba 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.h > +++ b/drivers/gpu/drm/i915/intel_lrc.h > @@ -35,12 +35,13 @@ > > /* Logical Rings */ > int __must_check intel_logical_ring_alloc_request(struct intel_engine_cs *ring, > - struct intel_context *ctx); > + struct intel_context *ctx, > + struct drm_i915_gem_request **req_out); > void intel_logical_ring_stop(struct intel_engine_cs *ring); > void intel_logical_ring_cleanup(struct intel_engine_cs *ring); > int intel_logical_rings_init(struct drm_device *dev); > > -int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf); > +int logical_ring_flush_all_caches(struct drm_i915_gem_request *req); > void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf); > /** > * intel_logical_ring_advance() - advance the ringbuffer tail > @@ -63,11 +64,8 @@ static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, > iowrite32(data, ringbuf->virtual_start + ringbuf->tail); > ringbuf->tail += 4; > } > -int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords); > > /* Logical Ring Contexts */ > -int intel_lr_context_render_state_init(struct intel_engine_cs *ring, > - struct intel_context *ctx); > void intel_lr_context_free(struct intel_context *ctx); > int intel_lr_context_deferred_create(struct intel_context *ctx, > struct intel_engine_cs *ring); > diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c > index 973c9de..2d2ce59 100644 > --- a/drivers/gpu/drm/i915/intel_overlay.c > +++ b/drivers/gpu/drm/i915/intel_overlay.c > @@ -209,17 +209,15 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay, > } > > static int intel_overlay_do_wait_request(struct intel_overlay *overlay, > + struct drm_i915_gem_request *req, > void (*tail)(struct intel_overlay *)) > { > struct drm_device *dev = overlay->dev; > - struct drm_i915_private *dev_priv = dev->dev_private; > - struct intel_engine_cs *ring = &dev_priv->ring[RCS]; > int ret; > > BUG_ON(overlay->last_flip_req); > - i915_gem_request_assign(&overlay->last_flip_req, > - ring->outstanding_lazy_request); > - ret = i915_add_request(ring); > + i915_gem_request_assign(&overlay->last_flip_req, req); > + ret = i915_add_request(overlay->last_flip_req); > if (ret) > return ret; > > @@ -239,6 +237,7 @@ static int intel_overlay_on(struct intel_overlay *overlay) > struct drm_device *dev = overlay->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_engine_cs *ring = &dev_priv->ring[RCS]; > + struct drm_i915_gem_request *req; > int ret; > > BUG_ON(overlay->active); > @@ -246,17 +245,21 @@ static int intel_overlay_on(struct intel_overlay *overlay) > > WARN_ON(IS_I830(dev) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE)); > > - ret = intel_ring_begin(ring, 4); > + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); > if (ret) > return ret; > > - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON); > - intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE); > - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); > - intel_ring_emit(ring, MI_NOOP); > - intel_ring_advance(ring); > + ret = intel_ring_begin(req, 4); > + if (ret) > + return ret; > + > + intel_ring_emit(req->ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON); > + intel_ring_emit(req->ring, overlay->flip_addr | OFC_UPDATE); > + intel_ring_emit(req->ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); > + intel_ring_emit(req->ring, MI_NOOP); > + intel_ring_advance(req->ring); > > - return intel_overlay_do_wait_request(overlay, NULL); > + return intel_overlay_do_wait_request(overlay, req, NULL); > } > > /* overlay needs to be enabled in OCMD reg */ > @@ -266,6 +269,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay, > struct drm_device *dev = overlay->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_engine_cs *ring = &dev_priv->ring[RCS]; > + struct drm_i915_gem_request *req; > u32 flip_addr = overlay->flip_addr; > u32 tmp; > int ret; > @@ -280,7 +284,11 @@ static int intel_overlay_continue(struct intel_overlay *overlay, > if (tmp & (1 << 17)) > DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp); > > - ret = intel_ring_begin(ring, 2); > + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); > + if (ret) > + return ret; > + > + ret = intel_ring_begin(req, 2); > if (ret) > return ret; > > @@ -289,9 +297,8 @@ static int intel_overlay_continue(struct intel_overlay *overlay, > intel_ring_advance(ring); > > WARN_ON(overlay->last_flip_req); > - i915_gem_request_assign(&overlay->last_flip_req, > - ring->outstanding_lazy_request); > - return i915_add_request(ring); > + i915_gem_request_assign(&overlay->last_flip_req, req); > + return i915_add_request(req); > } > > static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay) > @@ -326,6 +333,7 @@ static int intel_overlay_off(struct intel_overlay *overlay) > struct drm_device *dev = overlay->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_engine_cs *ring = &dev_priv->ring[RCS]; > + struct drm_i915_gem_request *req; > u32 flip_addr = overlay->flip_addr; > int ret; > > @@ -337,7 +345,11 @@ static int intel_overlay_off(struct intel_overlay *overlay) > * of the hw. Do it in both cases */ > flip_addr |= OFC_UPDATE; > > - ret = intel_ring_begin(ring, 6); > + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); > + if (ret) > + return ret; > + > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -359,7 +371,7 @@ static int intel_overlay_off(struct intel_overlay *overlay) > } > intel_ring_advance(ring); > > - return intel_overlay_do_wait_request(overlay, intel_overlay_off_tail); > + return intel_overlay_do_wait_request(overlay, req, intel_overlay_off_tail); > } > > /* recover from an interruption due to a signal > @@ -404,7 +416,13 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) > > if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { > /* synchronous slowpath */ > - ret = intel_ring_begin(ring, 2); > + struct drm_i915_gem_request *req; > + > + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); > + if (ret) > + return ret; > + > + ret = intel_ring_begin(req, 2); > if (ret) > return ret; > > @@ -412,7 +430,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) > intel_ring_emit(ring, MI_NOOP); > intel_ring_advance(ring); > > - ret = intel_overlay_do_wait_request(overlay, > + ret = intel_overlay_do_wait_request(overlay, req, > intel_overlay_release_old_vid_tail); > if (ret) > return ret; > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index 78911e2..5905fa5 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -5506,6 +5506,7 @@ static void ironlake_enable_rc6(struct drm_device *dev) > { > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_engine_cs *ring = &dev_priv->ring[RCS]; > + struct drm_i915_gem_request *req = NULL; > bool was_interruptible; > int ret; > > @@ -5524,16 +5525,17 @@ static void ironlake_enable_rc6(struct drm_device *dev) > was_interruptible = dev_priv->mm.interruptible; > dev_priv->mm.interruptible = false; > > + ret = dev_priv->gt.alloc_request(ring, NULL, &req); > + if (ret) > + goto err; > + > /* > * GPU can automatically power down the render unit if given a page > * to save state. > */ > - ret = intel_ring_begin(ring, 6); > - if (ret) { > - ironlake_teardown_rc6(dev); > - dev_priv->mm.interruptible = was_interruptible; > - return; > - } > + ret = intel_ring_begin(req, 6); > + if (ret) > + goto err; > > intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN); > intel_ring_emit(ring, MI_SET_CONTEXT); > @@ -5547,6 +5549,11 @@ static void ironlake_enable_rc6(struct drm_device *dev) > intel_ring_emit(ring, MI_FLUSH); > intel_ring_advance(ring); > > + ret = i915_add_request_no_flush(req); > + if (ret) > + goto err; > + req = NULL; > + > /* > * Wait for the command parser to advance past MI_SET_CONTEXT. The HW > * does an implicit flush, combined with MI_FLUSH above, it should be > @@ -5554,16 +5561,20 @@ static void ironlake_enable_rc6(struct drm_device *dev) > */ > ret = intel_ring_idle(ring); > dev_priv->mm.interruptible = was_interruptible; > - if (ret) { > - DRM_ERROR("failed to enable ironlake power savings\n"); > - ironlake_teardown_rc6(dev); > - return; > - } > + if (ret) > + goto err; > > I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN); > I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT); > > intel_print_rc6_info(dev, GEN6_RC_CTL_RC6_ENABLE); > + > +err: > + DRM_ERROR("failed to enable ironlake power savings\n"); > + ironlake_teardown_rc6(dev); > + dev_priv->mm.interruptible = was_interruptible; > + if (req) > + i915_gem_request_unreference(req); > } > > static unsigned long intel_pxfreq(u32 vidfreq) > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index b60e59b..e6e7bb5 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -91,10 +91,11 @@ void __intel_ring_advance(struct intel_engine_cs *ring) > } > > static int > -gen2_render_ring_flush(struct intel_engine_cs *ring, > +gen2_render_ring_flush(struct drm_i915_gem_request *req, > u32 invalidate_domains, > u32 flush_domains) > { > + struct intel_engine_cs *ring = req->ring; > u32 cmd; > int ret; > > @@ -105,7 +106,7 @@ gen2_render_ring_flush(struct intel_engine_cs *ring, > if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) > cmd |= MI_READ_FLUSH; > > - ret = intel_ring_begin(ring, 2); > + ret = intel_ring_begin(req, 2); > if (ret) > return ret; > > @@ -117,10 +118,11 @@ gen2_render_ring_flush(struct intel_engine_cs *ring, > } > > static int > -gen4_render_ring_flush(struct intel_engine_cs *ring, > +gen4_render_ring_flush(struct drm_i915_gem_request *req, > u32 invalidate_domains, > u32 flush_domains) > { > + struct intel_engine_cs *ring = req->ring; > struct drm_device *dev = ring->dev; > u32 cmd; > int ret; > @@ -163,7 +165,7 @@ gen4_render_ring_flush(struct intel_engine_cs *ring, > (IS_G4X(dev) || IS_GEN5(dev))) > cmd |= MI_INVALIDATE_ISP; > > - ret = intel_ring_begin(ring, 2); > + ret = intel_ring_begin(req, 2); > if (ret) > return ret; > > @@ -212,12 +214,13 @@ gen4_render_ring_flush(struct intel_engine_cs *ring, > * really our business. That leaves only stall at scoreboard. > */ > static int > -intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring) > +intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; > int ret; > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -230,7 +233,7 @@ intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring) > intel_ring_emit(ring, MI_NOOP); > intel_ring_advance(ring); > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -246,15 +249,16 @@ intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring) > } > > static int > -gen6_render_ring_flush(struct intel_engine_cs *ring, > - u32 invalidate_domains, u32 flush_domains) > +gen6_render_ring_flush(struct drm_i915_gem_request *req, > + u32 invalidate_domains, u32 flush_domains) > { > + struct intel_engine_cs *ring = req->ring; > u32 flags = 0; > u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; > int ret; > > /* Force SNB workarounds for PIPE_CONTROL flushes */ > - ret = intel_emit_post_sync_nonzero_flush(ring); > + ret = intel_emit_post_sync_nonzero_flush(req); > if (ret) > return ret; > > @@ -284,7 +288,7 @@ gen6_render_ring_flush(struct intel_engine_cs *ring, > flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; > } > > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > @@ -298,11 +302,12 @@ gen6_render_ring_flush(struct intel_engine_cs *ring, > } > > static int > -gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring) > +gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > @@ -316,14 +321,15 @@ gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring) > return 0; > } > > -static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value) > +static int gen7_ring_fbc_flush(struct drm_i915_gem_request *req, u32 value) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > if (!ring->fbc_dirty) > return 0; > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > /* WaFbcNukeOn3DBlt:ivb/hsw */ > @@ -340,9 +346,10 @@ static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value) > } > > static int > -gen7_render_ring_flush(struct intel_engine_cs *ring, > +gen7_render_ring_flush(struct drm_i915_gem_request *req, > u32 invalidate_domains, u32 flush_domains) > { > + struct intel_engine_cs *ring = req->ring; > u32 flags = 0; > u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; > int ret; > @@ -381,10 +388,10 @@ gen7_render_ring_flush(struct intel_engine_cs *ring, > /* Workaround: we must issue a pipe_control with CS-stall bit > * set before a pipe_control command that has the state cache > * invalidate bit set. */ > - gen7_render_ring_cs_stall_wa(ring); > + gen7_render_ring_cs_stall_wa(req); > } > > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > @@ -395,18 +402,19 @@ gen7_render_ring_flush(struct intel_engine_cs *ring, > intel_ring_advance(ring); > > if (!invalidate_domains && flush_domains) > - return gen7_ring_fbc_flush(ring, FBC_REND_NUKE); > + return gen7_ring_fbc_flush(req, FBC_REND_NUKE); > > return 0; > } > > static int > -gen8_emit_pipe_control(struct intel_engine_cs *ring, > +gen8_emit_pipe_control(struct drm_i915_gem_request *req, > u32 flags, u32 scratch_addr) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -422,11 +430,11 @@ gen8_emit_pipe_control(struct intel_engine_cs *ring, > } > > static int > -gen8_render_ring_flush(struct intel_engine_cs *ring, > +gen8_render_ring_flush(struct drm_i915_gem_request *req, > u32 invalidate_domains, u32 flush_domains) > { > u32 flags = 0; > - u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; > + u32 scratch_addr = req->ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; > int ret; > > flags |= PIPE_CONTROL_CS_STALL; > @@ -446,7 +454,7 @@ gen8_render_ring_flush(struct intel_engine_cs *ring, > flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; > > /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */ > - ret = gen8_emit_pipe_control(ring, > + ret = gen8_emit_pipe_control(req, > PIPE_CONTROL_CS_STALL | > PIPE_CONTROL_STALL_AT_SCOREBOARD, > 0); > @@ -454,12 +462,12 @@ gen8_render_ring_flush(struct intel_engine_cs *ring, > return ret; > } > > - ret = gen8_emit_pipe_control(ring, flags, scratch_addr); > + ret = gen8_emit_pipe_control(req, flags, scratch_addr); > if (ret) > return ret; > > if (!invalidate_domains && flush_domains) > - return gen7_ring_fbc_flush(ring, FBC_REND_NUKE); > + return gen7_ring_fbc_flush(req, FBC_REND_NUKE); > > return 0; > } > @@ -670,9 +678,10 @@ err: > return ret; > } > > -static int intel_ring_workarounds_emit(struct intel_engine_cs *ring, > +static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req, > struct intel_context *ctx) > { > + struct intel_engine_cs *ring = req->ring; > int ret, i; > struct drm_device *dev = ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > @@ -682,11 +691,11 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring, > return 0; > > ring->gpu_caches_dirty = true; > - ret = intel_ring_flush_all_caches(ring); > + ret = intel_ring_flush_all_caches(req); > if (ret) > return ret; > > - ret = intel_ring_begin(ring, (w->count * 2 + 2)); > + ret = intel_ring_begin(req, (w->count * 2 + 2)); > if (ret) > return ret; > > @@ -700,7 +709,7 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring, > intel_ring_advance(ring); > > ring->gpu_caches_dirty = true; > - ret = intel_ring_flush_all_caches(ring); > + ret = intel_ring_flush_all_caches(req); > if (ret) > return ret; > > @@ -898,10 +907,11 @@ static void render_ring_cleanup(struct intel_engine_cs *ring) > intel_fini_pipe_control(ring); > } > > -static int gen8_rcs_signal(struct intel_engine_cs *signaller, > +static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req, > unsigned int num_dwords) > { > #define MBOX_UPDATE_DWORDS 8 > + struct intel_engine_cs *signaller = signaller_req->ring; > struct drm_device *dev = signaller->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_engine_cs *waiter; > @@ -911,7 +921,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller, > num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; > #undef MBOX_UPDATE_DWORDS > > - ret = intel_ring_begin(signaller, num_dwords); > + ret = intel_ring_begin(signaller_req, num_dwords); > if (ret) > return ret; > > @@ -921,8 +931,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller, > if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) > continue; > > - seqno = i915_gem_request_get_seqno( > - signaller->outstanding_lazy_request); > + seqno = i915_gem_request_get_seqno(signaller_req); > intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6)); > intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB | > PIPE_CONTROL_QW_WRITE | > @@ -939,10 +948,11 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller, > return 0; > } > > -static int gen8_xcs_signal(struct intel_engine_cs *signaller, > +static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, > unsigned int num_dwords) > { > #define MBOX_UPDATE_DWORDS 6 > + struct intel_engine_cs *signaller = signaller_req->ring; > struct drm_device *dev = signaller->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_engine_cs *waiter; > @@ -952,7 +962,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller, > num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; > #undef MBOX_UPDATE_DWORDS > > - ret = intel_ring_begin(signaller, num_dwords); > + ret = intel_ring_begin(signaller_req, num_dwords); > if (ret) > return ret; > > @@ -962,8 +972,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller, > if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) > continue; > > - seqno = i915_gem_request_get_seqno( > - signaller->outstanding_lazy_request); > + seqno = i915_gem_request_get_seqno(signaller_req); > intel_ring_emit(signaller, (MI_FLUSH_DW + 1) | > MI_FLUSH_DW_OP_STOREDW); > intel_ring_emit(signaller, lower_32_bits(gtt_offset) | > @@ -978,9 +987,10 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller, > return 0; > } > > -static int gen6_signal(struct intel_engine_cs *signaller, > +static int gen6_signal(struct drm_i915_gem_request *signaller_req, > unsigned int num_dwords) > { > + struct intel_engine_cs *signaller = signaller_req->ring; > struct drm_device *dev = signaller->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_engine_cs *useless; > @@ -991,15 +1001,14 @@ static int gen6_signal(struct intel_engine_cs *signaller, > num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2); > #undef MBOX_UPDATE_DWORDS > > - ret = intel_ring_begin(signaller, num_dwords); > + ret = intel_ring_begin(signaller_req, num_dwords); > if (ret) > return ret; > > for_each_ring(useless, dev_priv, i) { > u32 mbox_reg = signaller->semaphore.mbox.signal[i]; > if (mbox_reg != GEN6_NOSYNC) { > - u32 seqno = i915_gem_request_get_seqno( > - signaller->outstanding_lazy_request); > + u32 seqno = i915_gem_request_get_seqno(signaller_req); > intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1)); > intel_ring_emit(signaller, mbox_reg); > intel_ring_emit(signaller, seqno); > @@ -1016,29 +1025,28 @@ static int gen6_signal(struct intel_engine_cs *signaller, > /** > * gen6_add_request - Update the semaphore mailbox registers > * > - * @ring - ring that is adding a request > - * @seqno - return seqno stuck into the ring > + * @request - request to write to the ring > * > * Update the mailbox registers in the *other* rings with the current seqno. > * This acts like a signal in the canonical semaphore. > */ > static int > -gen6_add_request(struct intel_engine_cs *ring) > +gen6_add_request(struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > if (ring->semaphore.signal) > - ret = ring->semaphore.signal(ring, 4); > + ret = ring->semaphore.signal(req, 4); > else > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > > if (ret) > return ret; > > intel_ring_emit(ring, MI_STORE_DWORD_INDEX); > intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); > - intel_ring_emit(ring, > - i915_gem_request_get_seqno(ring->outstanding_lazy_request)); > + intel_ring_emit(ring, i915_gem_request_get_seqno(req)); > intel_ring_emit(ring, MI_USER_INTERRUPT); > __intel_ring_advance(ring); > > @@ -1061,14 +1069,15 @@ static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev, > */ > > static int > -gen8_ring_sync(struct intel_engine_cs *waiter, > +gen8_ring_sync(struct drm_i915_gem_request *waiter_req, > struct intel_engine_cs *signaller, > u32 seqno) > { > + struct intel_engine_cs *waiter = waiter_req->ring; > struct drm_i915_private *dev_priv = waiter->dev->dev_private; > int ret; > > - ret = intel_ring_begin(waiter, 4); > + ret = intel_ring_begin(waiter_req, 4); > if (ret) > return ret; > > @@ -1086,10 +1095,11 @@ gen8_ring_sync(struct intel_engine_cs *waiter, > } > > static int > -gen6_ring_sync(struct intel_engine_cs *waiter, > +gen6_ring_sync(struct drm_i915_gem_request *waiter_req, > struct intel_engine_cs *signaller, > u32 seqno) > { > + struct intel_engine_cs *waiter = waiter_req->ring; > u32 dw1 = MI_SEMAPHORE_MBOX | > MI_SEMAPHORE_COMPARE | > MI_SEMAPHORE_REGISTER; > @@ -1104,7 +1114,7 @@ gen6_ring_sync(struct intel_engine_cs *waiter, > > WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); > > - ret = intel_ring_begin(waiter, 4); > + ret = intel_ring_begin(waiter_req, 4); > if (ret) > return ret; > > @@ -1135,8 +1145,9 @@ do { \ > } while (0) > > static int > -pc_render_add_request(struct intel_engine_cs *ring) > +pc_render_add_request(struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; > int ret; > > @@ -1148,7 +1159,7 @@ pc_render_add_request(struct intel_engine_cs *ring) > * incoherence by flushing the 6 PIPE_NOTIFY buffers out to > * memory before requesting an interrupt. > */ > - ret = intel_ring_begin(ring, 32); > + ret = intel_ring_begin(req, 32); > if (ret) > return ret; > > @@ -1156,8 +1167,7 @@ pc_render_add_request(struct intel_engine_cs *ring) > PIPE_CONTROL_WRITE_FLUSH | > PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); > intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); > - intel_ring_emit(ring, > - i915_gem_request_get_seqno(ring->outstanding_lazy_request)); > + intel_ring_emit(ring, i915_gem_request_get_seqno(req)); > intel_ring_emit(ring, 0); > PIPE_CONTROL_FLUSH(ring, scratch_addr); > scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */ > @@ -1176,8 +1186,7 @@ pc_render_add_request(struct intel_engine_cs *ring) > PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | > PIPE_CONTROL_NOTIFY); > intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); > - intel_ring_emit(ring, > - i915_gem_request_get_seqno(ring->outstanding_lazy_request)); > + intel_ring_emit(ring, i915_gem_request_get_seqno(req)); > intel_ring_emit(ring, 0); > __intel_ring_advance(ring); > > @@ -1390,13 +1399,14 @@ void intel_ring_setup_status_page(struct intel_engine_cs *ring) > } > > static int > -bsd_ring_flush(struct intel_engine_cs *ring, > +bsd_ring_flush(struct drm_i915_gem_request *req, > u32 invalidate_domains, > u32 flush_domains) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > - ret = intel_ring_begin(ring, 2); > + ret = intel_ring_begin(req, 2); > if (ret) > return ret; > > @@ -1407,18 +1417,18 @@ bsd_ring_flush(struct intel_engine_cs *ring, > } > > static int > -i9xx_add_request(struct intel_engine_cs *ring) > +i9xx_add_request(struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > intel_ring_emit(ring, MI_STORE_DWORD_INDEX); > intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); > - intel_ring_emit(ring, > - i915_gem_request_get_seqno(ring->outstanding_lazy_request)); > + intel_ring_emit(ring, i915_gem_request_get_seqno(req)); > intel_ring_emit(ring, MI_USER_INTERRUPT); > __intel_ring_advance(ring); > > @@ -1550,13 +1560,14 @@ gen8_ring_put_irq(struct intel_engine_cs *ring) > } > > static int > -i965_dispatch_execbuffer(struct intel_engine_cs *ring, > +i965_dispatch_execbuffer(struct drm_i915_gem_request *req, > u64 offset, u32 length, > unsigned flags) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > - ret = intel_ring_begin(ring, 2); > + ret = intel_ring_begin(req, 2); > if (ret) > return ret; > > @@ -1575,14 +1586,15 @@ i965_dispatch_execbuffer(struct intel_engine_cs *ring, > #define I830_TLB_ENTRIES (2) > #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) > static int > -i830_dispatch_execbuffer(struct intel_engine_cs *ring, > - u64 offset, u32 len, > - unsigned flags) > +i830_dispatch_execbuffer(struct drm_i915_gem_request *req, > + u64 offset, u32 len, > + unsigned flags) > { > + struct intel_engine_cs *ring = req->ring; > u32 cs_offset = ring->scratch.gtt_offset; > int ret; > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -1599,7 +1611,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring, > if (len > I830_BATCH_LIMIT) > return -ENOSPC; > > - ret = intel_ring_begin(ring, 6 + 2); > + ret = intel_ring_begin(req, 6 + 2); > if (ret) > return ret; > > @@ -1622,7 +1634,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring, > offset = cs_offset; > } > > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > @@ -1636,13 +1648,14 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring, > } > > static int > -i915_dispatch_execbuffer(struct intel_engine_cs *ring, > +i915_dispatch_execbuffer(struct drm_i915_gem_request *req, > u64 offset, u32 len, > unsigned flags) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > - ret = intel_ring_begin(ring, 2); > + ret = intel_ring_begin(req, 2); > if (ret) > return ret; > > @@ -1885,6 +1898,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring) > > intel_unpin_ringbuffer_obj(ringbuf); > intel_destroy_ringbuffer_obj(ringbuf); > + WARN_ON(ring->outstanding_lazy_request); > i915_gem_request_assign(&ring->outstanding_lazy_request, NULL); > > if (ring->cleanup) > @@ -2007,8 +2021,9 @@ int intel_ring_idle(struct intel_engine_cs *ring) > int ret; > > /* We need to add any requests required to flush the objects and ring */ > + WARN_ON(ring->outstanding_lazy_request); > if (ring->outstanding_lazy_request) { > - ret = i915_add_request(ring); > + ret = i915_add_request(ring->outstanding_lazy_request); > if (ret) > return ret; > } > @@ -2025,13 +2040,18 @@ int intel_ring_idle(struct intel_engine_cs *ring) > } > > int > -intel_ring_alloc_request(struct intel_engine_cs *ring, struct intel_context *ctx) > +intel_ring_alloc_request(struct intel_engine_cs *ring, > + struct intel_context *ctx, > + struct drm_i915_gem_request **req_out) > { > int ret; > struct drm_i915_gem_request *request; > struct drm_i915_private *dev_private = ring->dev->dev_private; > > - if (ring->outstanding_lazy_request) > + if (!req_out) > + return -EINVAL; > + > + if ((*req_out = ring->outstanding_lazy_request) != NULL) > return 0; > > request = kzalloc(sizeof(*request), GFP_KERNEL); > @@ -2053,7 +2073,7 @@ intel_ring_alloc_request(struct intel_engine_cs *ring, struct intel_context *ctx > spewThisReq(request, "\x1B[32mCreated: %d:%d, ref => %d\x1B[0m", request->uniq, request->seqno, request->ref.refcount.counter); > > //printk(KERN_INFO "%s:%d> <%s> OLR = 0x%p, uniq = %d, seqno = %d\n", __func__, __LINE__, ring->name, request, request->uniq, request->seqno); > - ring->outstanding_lazy_request = request; > + *req_out = ring->outstanding_lazy_request = request; > return 0; > } > > @@ -2078,9 +2098,10 @@ static int __intel_ring_prepare(struct intel_engine_cs *ring, > return 0; > } > > -int intel_ring_begin(struct intel_engine_cs *ring, > +int intel_ring_begin(struct drm_i915_gem_request *req, > int num_dwords) > { > + struct intel_engine_cs *ring = req->ring; > struct drm_i915_private *dev_priv = ring->dev->dev_private; > int ret; > > @@ -2093,18 +2114,14 @@ int intel_ring_begin(struct intel_engine_cs *ring, > if (ret) > return ret; > > - /* Preallocate the olr before touching the ring */ > - ret = intel_ring_alloc_request(ring, NULL); > - if (ret) > - return ret; > - > ring->buffer->space -= num_dwords * sizeof(uint32_t); > return 0; > } > > /* Align the ring tail to a cacheline boundary */ > -int intel_ring_cacheline_align(struct intel_engine_cs *ring) > +int intel_ring_cacheline_align(struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); > int ret; > > @@ -2112,7 +2129,7 @@ int intel_ring_cacheline_align(struct intel_engine_cs *ring) > return 0; > > num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords; > - ret = intel_ring_begin(ring, num_dwords); > + ret = intel_ring_begin(req, num_dwords); > if (ret) > return ret; > > @@ -2176,13 +2193,14 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring, > _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); > } > > -static int gen6_bsd_ring_flush(struct intel_engine_cs *ring, > +static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, > u32 invalidate, u32 flush) > { > + struct intel_engine_cs *ring = req->ring; > uint32_t cmd; > int ret; > > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > @@ -2212,14 +2230,15 @@ static int gen6_bsd_ring_flush(struct intel_engine_cs *ring, > } > > static int > -gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, > +gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, > u64 offset, u32 len, > unsigned flags) > { > + struct intel_engine_cs *ring = req->ring; > bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE); > int ret; > > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > @@ -2234,13 +2253,14 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, > } > > static int > -hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, > +hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, > u64 offset, u32 len, > unsigned flags) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > - ret = intel_ring_begin(ring, 2); > + ret = intel_ring_begin(req, 2); > if (ret) > return ret; > > @@ -2256,13 +2276,14 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, > } > > static int > -gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring, > +gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, > u64 offset, u32 len, > unsigned flags) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > - ret = intel_ring_begin(ring, 2); > + ret = intel_ring_begin(req, 2); > if (ret) > return ret; > > @@ -2278,15 +2299,16 @@ gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring, > > /* Blitter support (SandyBridge+) */ > > -static int gen6_ring_flush(struct intel_engine_cs *ring, > +static int gen6_ring_flush(struct drm_i915_gem_request *req, > u32 invalidate, u32 flush) > { > + struct intel_engine_cs *ring = req->ring; > struct drm_device *dev = ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > uint32_t cmd; > int ret; > > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > @@ -2315,7 +2337,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring, > > if (!invalidate && flush) { > if (IS_GEN7(dev)) > - return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN); > + return gen7_ring_fbc_flush(req, FBC_REND_CACHE_CLEAN); > else if (IS_BROADWELL(dev)) > dev_priv->fbc.need_sw_cache_clean = true; > } > @@ -2696,14 +2718,15 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) > } > > int > -intel_ring_flush_all_caches(struct intel_engine_cs *ring) > +intel_ring_flush_all_caches(struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > if (!ring->gpu_caches_dirty) > return 0; > > - ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); > + ret = ring->flush(req, 0, I915_GEM_GPU_DOMAINS); > if (ret) > return ret; > > @@ -2714,8 +2737,9 @@ intel_ring_flush_all_caches(struct intel_engine_cs *ring) > } > > int > -intel_ring_invalidate_all_caches(struct intel_engine_cs *ring) > +intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > uint32_t flush_domains; > int ret; > > @@ -2723,7 +2747,7 @@ intel_ring_invalidate_all_caches(struct intel_engine_cs *ring) > if (ring->gpu_caches_dirty) > flush_domains = I915_GEM_GPU_DOMAINS; > > - ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); > + ret = ring->flush(req, I915_GEM_GPU_DOMAINS, flush_domains); > if (ret) > return ret; > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 48cbb00..a7e47ad 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -154,15 +154,15 @@ struct intel_engine_cs { > > int (*init_hw)(struct intel_engine_cs *ring); > > - int (*init_context)(struct intel_engine_cs *ring, > + int (*init_context)(struct drm_i915_gem_request *req, > struct intel_context *ctx); > > void (*write_tail)(struct intel_engine_cs *ring, > u32 value); > - int __must_check (*flush)(struct intel_engine_cs *ring, > + int __must_check (*flush)(struct drm_i915_gem_request *req, > u32 invalidate_domains, > u32 flush_domains); > - int (*add_request)(struct intel_engine_cs *ring); > + int (*add_request)(struct drm_i915_gem_request *req); > /* Some chipsets are not quite as coherent as advertised and need > * an expensive kick to force a true read of the up-to-date seqno. > * However, the up-to-date seqno is not always required and the last > @@ -173,7 +173,7 @@ struct intel_engine_cs { > bool lazy_coherency); > void (*set_seqno)(struct intel_engine_cs *ring, > u32 seqno); > - int (*dispatch_execbuffer)(struct intel_engine_cs *ring, > + int (*dispatch_execbuffer)(struct drm_i915_gem_request *req, > u64 offset, u32 length, > unsigned dispatch_flags); > #define I915_DISPATCH_SECURE 0x1 > @@ -231,10 +231,10 @@ struct intel_engine_cs { > }; > > /* AKA wait() */ > - int (*sync_to)(struct intel_engine_cs *ring, > - struct intel_engine_cs *to, > + int (*sync_to)(struct drm_i915_gem_request *to_req, > + struct intel_engine_cs *from, > u32 seqno); > - int (*signal)(struct intel_engine_cs *signaller, > + int (*signal)(struct drm_i915_gem_request *signaller_req, > /* num_dwords needed by caller */ > unsigned int num_dwords); > } semaphore; > @@ -245,11 +245,11 @@ struct intel_engine_cs { > struct list_head execlist_retired_req_list; > u8 next_context_status_buffer; > u32 irq_keep_mask; /* bitmask for interrupts that should not be masked */ > - int (*emit_request)(struct intel_ringbuffer *ringbuf); > - int (*emit_flush)(struct intel_ringbuffer *ringbuf, > + int (*emit_request)(struct drm_i915_gem_request *req); > + int (*emit_flush)(struct drm_i915_gem_request *req, > u32 invalidate_domains, > u32 flush_domains); > - int (*emit_bb_start)(struct intel_ringbuffer *ringbuf, > + int (*emit_bb_start)(struct drm_i915_gem_request *req, > u64 offset, unsigned flags); > > /** > @@ -433,10 +433,11 @@ int intel_alloc_ringbuffer_obj(struct drm_device *dev, > void intel_stop_ring_buffer(struct intel_engine_cs *ring); > void intel_cleanup_ring_buffer(struct intel_engine_cs *ring); > > -int __must_check intel_ring_begin(struct intel_engine_cs *ring, int n); > -int __must_check intel_ring_cacheline_align(struct intel_engine_cs *ring); > +int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n); > +int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); > int __must_check intel_ring_alloc_request(struct intel_engine_cs *ring, > - struct intel_context *ctx); > + struct intel_context *ctx, > + struct drm_i915_gem_request **req_out); > static inline void intel_ring_emit(struct intel_engine_cs *ring, > u32 data) > { > @@ -457,8 +458,8 @@ void __intel_ring_advance(struct intel_engine_cs *ring); > > int __must_check intel_ring_idle(struct intel_engine_cs *ring); > void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno); > -int intel_ring_flush_all_caches(struct intel_engine_cs *ring); > -int intel_ring_invalidate_all_caches(struct intel_engine_cs *ring); > +int intel_ring_flush_all_caches(struct drm_i915_gem_request *req); > +int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req); > > void intel_fini_pipe_control(struct intel_engine_cs *ring); > int intel_init_pipe_control(struct intel_engine_cs *ring); > @@ -479,11 +480,4 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf) > return ringbuf->tail; > } > > -static inline struct drm_i915_gem_request * > -intel_ring_get_request(struct intel_engine_cs *ring) > -{ > - BUG_ON(ring->outstanding_lazy_request == NULL); > - return ring->outstanding_lazy_request; > -} > - > #endif /* _INTEL_RINGBUFFER_H_ */
On Fri, Dec 19, 2014 at 02:41:05PM +0000, John.C.Harrison@Intel.com wrote: > From: John Harrison <John.C.Harrison@Intel.com> > > The outstanding lazy request mechanism does not really work well with > a GPU scheduler. The scheduler expects each work packet, i.e. request > structure, to be a complete entity and to belong to one and only one > submitter. Whereas the whole lazy mechanism allows lots of work from > lots of different places to all be lumped together into a single > request. It also means that work is floating around in the system > unowned and untracked at various random points in time. This all > causes headaches for the scheduler. > > This patch removes the need for the outstanding lazy request. It > converts all functions which would otherwise be relying on the OLR to > explicitly manage the request. Either by allocating, passing and > submitting the request if they are the top level owner. Or by simply > taking a request in as a parameter rather than pulling it out of the > magic global variable if they are a client. The OLR itself is left in > along with a bunch of sanity check asserts that it matches the request > being passed in as a parameter. However, it should now be safe to > remove completely. > > Note that this patch is not intended as a final, shipping, isn't it > gorgeous, end product. It is merely a quick hack that I went through > as being the simplest way to actually work out what the real sequence > of events and the real ownership of work is in certain circumstances. > Most particularly to do with display and overlay work. However, I > would like to get agreement that it is a good direction to go in and > that removing the OLR would be a good thing. Or, to put it another > way, is it worth me trying to break this patch into a set of > manageable items or do I just abandon it and give up? > > Note also that the patch is based on a tree including the scheduler > prep-work patches posted earlier. So it will not apply to a clean > nightly tree. > > Signed-off-by: John Harrison <John.C.Harrison@Intel.com> Summarizing offline discussions from a meeting about John's rfc here: I definitely like where this is going, using requests as the primary object to submit work to the gpu should simplify our code a lot. And getting rid of the olr will remove a lot of the accidental complexity in gem. I also looked at some of the details here with John specifically that he chuffles the init_hw functions around a bit to just have 1 request to wrap all the ring init (default ctx, ppgtt, l3 remapping). For the details it'd be good to discuss this all with Chris since he's got a working poc for this, just to make sure you know about all the dragons potentially lurking around. -Daniel > --- > drivers/gpu/drm/i915/i915_drv.h | 29 ++-- > drivers/gpu/drm/i915/i915_gem.c | 182 ++++++++++++-------- > drivers/gpu/drm/i915/i915_gem_context.c | 69 +++----- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 62 +++---- > drivers/gpu/drm/i915/i915_gem_gtt.c | 64 ++++---- > drivers/gpu/drm/i915/i915_gem_gtt.h | 3 +- > drivers/gpu/drm/i915/i915_gem_render_state.c | 10 +- > drivers/gpu/drm/i915/i915_gem_render_state.h | 2 +- > drivers/gpu/drm/i915/intel_display.c | 68 ++++---- > drivers/gpu/drm/i915/intel_lrc.c | 145 +++++++++------- > drivers/gpu/drm/i915/intel_lrc.h | 8 +- > drivers/gpu/drm/i915/intel_overlay.c | 58 ++++--- > drivers/gpu/drm/i915/intel_pm.c | 33 ++-- > drivers/gpu/drm/i915/intel_ringbuffer.c | 228 ++++++++++++++------------ > drivers/gpu/drm/i915/intel_ringbuffer.h | 38 ++--- > 15 files changed, 553 insertions(+), 446 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 511f55f..7b4309e 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -513,7 +513,7 @@ struct drm_i915_display_funcs { > int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc, > struct drm_framebuffer *fb, > struct drm_i915_gem_object *obj, > - struct intel_engine_cs *ring, > + struct drm_i915_gem_request *req, > uint32_t flags); > void (*update_primary_plane)(struct drm_crtc *crtc, > struct drm_framebuffer *fb, > @@ -1796,7 +1796,8 @@ struct drm_i915_private { > /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ > struct { > int (*alloc_request)(struct intel_engine_cs *ring, > - struct intel_context *ctx); > + struct intel_context *ctx, > + struct drm_i915_gem_request **req_out); > int (*do_execbuf)(struct i915_execbuffer_params *params, > struct drm_i915_gem_execbuffer2 *args, > struct list_head *vmas); > @@ -2511,10 +2512,10 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, > int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_priv); > void i915_gem_execbuffer_move_to_active(struct list_head *vmas, > - struct intel_engine_cs *ring); > + struct drm_i915_gem_request *req); > void i915_gem_execbuffer_retire_commands(struct drm_device *dev, > struct drm_file *file, > - struct intel_engine_cs *ring, > + struct drm_i915_gem_request *req, > struct drm_i915_gem_object *obj); > void i915_gem_execbuff_release_batch_obj(struct drm_i915_gem_object *batch_obj); > int i915_gem_ringbuffer_submission(struct i915_execbuffer_params *qe, > @@ -2609,9 +2610,9 @@ int __must_check __i915_mutex_lock_interruptible(struct drm_device *dev, const c > int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); > #endif > int i915_gem_object_sync(struct drm_i915_gem_object *obj, > - struct intel_engine_cs *to, bool add_request); > + struct drm_i915_gem_request *to_req); > void i915_vma_move_to_active(struct i915_vma *vma, > - struct intel_engine_cs *ring); > + struct drm_i915_gem_request *req); > int i915_gem_dumb_create(struct drm_file *file_priv, > struct drm_device *dev, > struct drm_mode_create_dumb *args); > @@ -2678,19 +2679,19 @@ int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); > int __must_check i915_gem_init(struct drm_device *dev); > int i915_gem_init_rings(struct drm_device *dev); > int __must_check i915_gem_init_hw(struct drm_device *dev); > -int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice); > +int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice); > void i915_gem_init_swizzling(struct drm_device *dev); > void i915_gem_cleanup_ringbuffer(struct drm_device *dev); > int __must_check i915_gpu_idle(struct drm_device *dev); > int __must_check i915_gem_suspend(struct drm_device *dev); > -int __i915_add_request(struct intel_engine_cs *ring, > +int __i915_add_request(struct drm_i915_gem_request *req, > struct drm_file *file, > struct drm_i915_gem_object *batch_obj, > bool flush_caches); > -#define i915_add_request(ring) \ > - __i915_add_request(ring, NULL, NULL, true) > -#define i915_add_request_no_flush(ring) \ > - __i915_add_request(ring, NULL, NULL, false) > +#define i915_add_request(req) \ > + __i915_add_request(req, NULL, NULL, true) > +#define i915_add_request_no_flush(req) \ > + __i915_add_request(req, NULL, NULL, false) > int __i915_wait_request(struct drm_i915_gem_request *req, > unsigned reset_counter, > bool interruptible, > @@ -2810,9 +2811,9 @@ int __must_check i915_gem_context_init(struct drm_device *dev); > void i915_gem_context_fini(struct drm_device *dev); > void i915_gem_context_reset(struct drm_device *dev); > int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); > -int i915_gem_context_enable(struct drm_i915_private *dev_priv); > +int i915_gem_context_enable(struct drm_i915_gem_request *req); > void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); > -int i915_switch_context(struct intel_engine_cs *ring, > +int i915_switch_context(struct drm_i915_gem_request *req, > struct intel_context *to); > struct intel_context * > i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id); > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 1d2cbfb..dbfb4e5 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1178,7 +1178,7 @@ i915_gem_check_olr(struct drm_i915_gem_request *req) > > ret = 0; > if (req == req->ring->outstanding_lazy_request) > - ret = i915_add_request(req->ring); > + ret = i915_add_request(req); > > return ret; > } > @@ -2294,17 +2294,16 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) > > static void > i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, > - struct intel_engine_cs *ring) > + struct drm_i915_gem_request *req) > { > - struct drm_i915_gem_request *req; > - struct intel_engine_cs *old_ring; > + struct intel_engine_cs *new_ring, *old_ring; > > - BUG_ON(ring == NULL); > + BUG_ON(req == NULL); > > - req = intel_ring_get_request(ring); > + new_ring = i915_gem_request_get_ring(req); > old_ring = i915_gem_request_get_ring(obj->last_read_req); > > - if (old_ring != ring && obj->last_write_req) { > + if (old_ring != new_ring && obj->last_write_req) { > /* Keep the request relative to the current ring */ > i915_gem_request_assign(&obj->last_write_req, req); > } > @@ -2315,17 +2314,17 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, > obj->active = 1; > } > > - list_move_tail(&obj->ring_list, &ring->active_list); > + list_move_tail(&obj->ring_list, &new_ring->active_list); > > - //printk(KERN_INFO "%s:%d> <%s> obj = %p, last_read_req <= 0x%p\n", __func__, __LINE__, ring->name, obj, req); > + //printk(KERN_INFO "%s:%d> <%s> obj = %p, last_read_req <= 0x%p\n", __func__, __LINE__, new_ring->name, obj, req); > i915_gem_request_assign(&obj->last_read_req, req); > } > > void i915_vma_move_to_active(struct i915_vma *vma, > - struct intel_engine_cs *ring) > + struct drm_i915_gem_request *req) > { > list_move_tail(&vma->mm_list, &vma->vm->active_list); > - return i915_gem_object_move_to_active(vma->obj, ring); > + return i915_gem_object_move_to_active(vma->obj, req); > } > > static void > @@ -2440,26 +2439,35 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) > return 0; > } > > -int __i915_add_request(struct intel_engine_cs *ring, > +int __i915_add_request(struct drm_i915_gem_request *request, > struct drm_file *file, > struct drm_i915_gem_object *obj, > bool flush_caches) > { > - struct drm_i915_private *dev_priv = ring->dev->dev_private; > - struct drm_i915_gem_request *request; > + struct intel_engine_cs *ring; > + struct drm_i915_private *dev_priv; > struct intel_ringbuffer *ringbuf; > u32 request_ring_position, request_start; > int ret; > > - request = ring->outstanding_lazy_request; > + /*printk( KERN_ERR "<%s> request %c %d:%d, OLR %c %d:%d\n", > + request ? request->ring->name : "???", > + request ? '=' : '?', > + request ? request->uniq : -1, > + request ? request->seqno : 0, > + request->ring->outstanding_lazy_request ? '=' : '?', > + request->ring->outstanding_lazy_request ? request->ring->outstanding_lazy_request->uniq : -1, > + request->ring->outstanding_lazy_request ? request->ring->outstanding_lazy_request->seqno : 0);*/ > + //dump_stack(); > + > if (WARN_ON(request == NULL)) > return -ENOMEM; > > - if (i915.enable_execlists) { > - struct intel_context *ctx = request->ctx; > - ringbuf = ctx->engine[ring->id].ringbuf; > - } else > - ringbuf = ring->buffer; > + ring = request->ring; > + dev_priv = ring->dev->dev_private; > + ringbuf = request->ringbuf; > + > + WARN_ON(request != ring->outstanding_lazy_request); > > request_start = intel_ring_get_tail(ringbuf); > /* > @@ -2471,9 +2479,9 @@ int __i915_add_request(struct intel_engine_cs *ring, > */ > if (flush_caches) { > if (i915.enable_execlists) > - ret = logical_ring_flush_all_caches(ringbuf); > + ret = logical_ring_flush_all_caches(request); > else > - ret = intel_ring_flush_all_caches(ring); > + ret = intel_ring_flush_all_caches(request); > if (ret) > return ret; > } > @@ -2488,9 +2496,9 @@ int __i915_add_request(struct intel_engine_cs *ring, > request_ring_position = intel_ring_get_tail(ringbuf); > > if (i915.enable_execlists) > - ret = ring->emit_request(ringbuf); > + ret = ring->emit_request(request); > else > - ret = ring->add_request(ring); > + ret = ring->add_request(request); > if (ret) > return ret; > > @@ -2504,7 +2512,8 @@ int __i915_add_request(struct intel_engine_cs *ring, > * inactive_list and lose its active reference. Hence we do not need > * to explicitly hold another reference here. > */ > - request->batch_obj = obj; > + if (obj) > + request->batch_obj = obj; > > if (!i915.enable_execlists) { > /* Hold a reference to the current context so that we can inspect > @@ -2744,6 +2753,7 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, > #endif > > /* This may not have been flushed before the reset, so clean it now */ > + WARN_ON(ring->outstanding_lazy_request); > i915_gem_request_assign(&ring->outstanding_lazy_request, NULL); > } > > @@ -3114,8 +3124,6 @@ out: > * > * @obj: object which may be in use on another ring. > * @to: ring we wish to use the object on. May be NULL. > - * @add_request: do we need to add a request to track operations > - * submitted on ring with sync_to function > * > * This code is meant to abstract object synchronization with the GPU. > * Calling with NULL implies synchronizing the object with the CPU > @@ -3125,8 +3133,9 @@ out: > */ > int > i915_gem_object_sync(struct drm_i915_gem_object *obj, > - struct intel_engine_cs *to, bool add_request) > + struct drm_i915_gem_request *to_req) > { > + struct intel_engine_cs *to = to_req->ring; > struct intel_engine_cs *from; > u32 seqno; > int ret, idx; > @@ -3152,7 +3161,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, > return ret; > > trace_i915_gem_ring_sync_to(from, to, obj->last_read_req); > - ret = to->semaphore.sync_to(to, from, seqno); > + ret = to->semaphore.sync_to(to_req, from, seqno); > if (!ret) { > /* We use last_read_req because sync_to() > * might have just caused seqno wrap under > @@ -3160,8 +3169,6 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, > */ > from->semaphore.sync_seqno[idx] = > i915_gem_request_get_seqno(obj->last_read_req); > - if (add_request) > - i915_add_request_no_flush(to); > } > > return ret; > @@ -3266,18 +3273,23 @@ int i915_gpu_idle(struct drm_device *dev) > /* Flush everything onto the inactive list. */ > for_each_ring(ring, dev_priv, i) { > if (!i915.enable_execlists) { > - ret = i915_switch_context(ring, ring->default_context); > + struct drm_i915_gem_request *req; > + > + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); > if (ret) > return ret; > - } > > - /* Make sure the context switch (if one actually happened) > - * gets wrapped up and finished rather than hanging around > - * and confusing things later. */ > - if (ring->outstanding_lazy_request) { > - ret = i915_add_request(ring); > - if (ret) > + ret = i915_switch_context(req, ring->default_context); > + if (ret) { > + i915_gem_request_unreference(req); > return ret; > + } > + > + ret = i915_add_request_no_flush(req); > + if (ret) { > + i915_gem_request_unreference(req); > + return ret; > + } > } > > ret = intel_ring_idle(ring); > @@ -4099,8 +4111,19 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, > bool was_pin_display; > int ret; > > - if (pipelined != i915_gem_request_get_ring(obj->last_read_req)) { > - ret = i915_gem_object_sync(obj, pipelined, true); > + if (pipelined && (pipelined != i915_gem_request_get_ring(obj->last_read_req))) { > + struct drm_i915_private *dev_priv = pipelined->dev->dev_private; > + struct drm_i915_gem_request *req; > + > + ret = dev_priv->gt.alloc_request(pipelined, pipelined->default_context, &req); > + if (ret) > + return ret; > + > + ret = i915_gem_object_sync(obj, req); > + if (ret) > + return ret; > + > + ret = i915_add_request_no_flush(req); > if (ret) > return ret; > } > @@ -4771,8 +4794,9 @@ err: > return ret; > } > > -int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice) > +int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) > { > + struct intel_engine_cs *ring = req->ring; > struct drm_device *dev = ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); > @@ -4782,7 +4806,7 @@ int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice) > if (!HAS_L3_DPF(dev) || !remap_info) > return 0; > > - ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); > + ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3); > if (ret) > return ret; > > @@ -4962,37 +4986,67 @@ i915_gem_init_hw(struct drm_device *dev) > */ > init_unused_rings(dev); > > + BUG_ON(!dev_priv->ring[RCS].default_context); > + > + ret = i915_ppgtt_init_hw(dev); > + if (ret) { > + DRM_ERROR("PPGTT enable failed %d\n", ret); > + i915_gem_cleanup_ringbuffer(dev); > + return ret; > + } > + > for_each_ring(ring, dev_priv, i) { > + struct drm_i915_gem_request *req; > + > ret = ring->init_hw(ring); > if (ret) > return ret; > - } > > - for (i = 0; i < NUM_L3_SLICES(dev); i++) > - i915_gem_l3_remap(&dev_priv->ring[RCS], i); > + if (!ring->default_context) > + continue; > > - /* > - * XXX: Contexts should only be initialized once. Doing a switch to the > - * default context switch however is something we'd like to do after > - * reset or thaw (the latter may not actually be necessary for HW, but > - * goes with our code better). Context switching requires rings (for > - * the do_switch), but before enabling PPGTT. So don't move this. > - */ > - ret = i915_gem_context_enable(dev_priv); > - if (ret && ret != -EIO) { > - DRM_ERROR("Context enable failed %d\n", ret); > - i915_gem_cleanup_ringbuffer(dev); > + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); > + if (ret) > + return ret; > > - return ret; > - } > + if (ring->id == RCS) { > + for (i = 0; i < NUM_L3_SLICES(dev); i++) > + i915_gem_l3_remap(req, i); > + } > > - ret = i915_ppgtt_init_hw(dev); > - if (ret && ret != -EIO) { > - DRM_ERROR("PPGTT enable failed %d\n", ret); > - i915_gem_cleanup_ringbuffer(dev); > + /* > + * XXX: Contexts should only be initialized once. Doing a switch to the > + * default context switch however is something we'd like to do after > + * reset or thaw (the latter may not actually be necessary for HW, but > + * goes with our code better). Context switching requires rings (for > + * the do_switch), but before enabling PPGTT. So don't move this. > + */ > + ret = i915_gem_context_enable(req); > + if (ret && ret != -EIO) { > + DRM_ERROR("Context enable failed %d\n", ret); > + i915_gem_request_unreference(req); > + i915_gem_cleanup_ringbuffer(dev); > + > + return ret; > + } > + > + ret = i915_ppgtt_init_ring(req); > + if (ret && ret != -EIO) { > + DRM_ERROR("PPGTT enable failed %d\n", ret); > + i915_gem_request_unreference(req); > + i915_gem_cleanup_ringbuffer(dev); > + } > + > + ret = i915_add_request_no_flush(req); > + if (ret) { > + DRM_ERROR("Add request failed: %d\n", ret); > + i915_gem_request_unreference(req); > + i915_gem_cleanup_ringbuffer(dev); > + return ret; > + } > } > > - return ret; > + return 0; > } > > int i915_gem_init(struct drm_device *dev) > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index c5e1bfc..72e280b 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -401,41 +401,23 @@ void i915_gem_context_fini(struct drm_device *dev) > i915_gem_context_unreference(dctx); > } > > -int i915_gem_context_enable(struct drm_i915_private *dev_priv) > +int i915_gem_context_enable(struct drm_i915_gem_request *req) > { > - struct intel_engine_cs *ring; > - int ret, i; > - > - BUG_ON(!dev_priv->ring[RCS].default_context); > + struct intel_engine_cs *ring = req->ring; > + int ret; > > if (i915.enable_execlists) { > - for_each_ring(ring, dev_priv, i) { > - if (ring->init_context) { > - ret = ring->init_context(ring, > - ring->default_context); > - if (ret) { > - DRM_ERROR("ring init context: %d\n", > - ret); > - return ret; > - } > - } > - } > + if (ring->init_context == NULL) > + return 0; > > + ret = ring->init_context(req, ring->default_context); > } else > - for_each_ring(ring, dev_priv, i) { > - ret = i915_switch_context(ring, ring->default_context); > - if (ret) > - return ret; > - > - /* Make sure the context switch (if one actually happened) > - * gets wrapped up and finished rather than hanging around > - * and confusing things later. */ > - if (ring->outstanding_lazy_request) { > - ret = i915_add_request_no_flush(ring); > - if (ret) > - return ret; > - } > - } > + ret = i915_switch_context(req, ring->default_context); > + > + if (ret) { > + DRM_ERROR("ring init context: %d\n", ret); > + return ret; > + } > > return 0; > } > @@ -488,10 +470,11 @@ i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id) > } > > static inline int > -mi_set_context(struct intel_engine_cs *ring, > +mi_set_context(struct drm_i915_gem_request *req, > struct intel_context *new_context, > u32 hw_flags) > { > + struct intel_engine_cs *ring = req->ring; > u32 flags = hw_flags | MI_MM_SPACE_GTT; > int ret; > > @@ -501,7 +484,7 @@ mi_set_context(struct intel_engine_cs *ring, > * itlb_before_ctx_switch. > */ > if (IS_GEN6(ring->dev)) { > - ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, 0); > + ret = ring->flush(req, I915_GEM_GPU_DOMAINS, 0); > if (ret) > return ret; > } > @@ -510,7 +493,7 @@ mi_set_context(struct intel_engine_cs *ring, > if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8) > flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN); > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -540,9 +523,10 @@ mi_set_context(struct intel_engine_cs *ring, > return ret; > } > > -static int do_switch(struct intel_engine_cs *ring, > +static int do_switch(struct drm_i915_gem_request *req, > struct intel_context *to) > { > + struct intel_engine_cs *ring = req->ring; > struct drm_i915_private *dev_priv = ring->dev->dev_private; > struct intel_context *from = ring->last_context; > u32 hw_flags = 0; > @@ -577,7 +561,7 @@ static int do_switch(struct intel_engine_cs *ring, > > if (to->ppgtt) { > trace_switch_mm(ring, to); > - ret = to->ppgtt->switch_mm(to->ppgtt, ring); > + ret = to->ppgtt->switch_mm(to->ppgtt, req); > if (ret) > goto unpin_out; > } > @@ -608,7 +592,7 @@ static int do_switch(struct intel_engine_cs *ring, > if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to)) > hw_flags |= MI_RESTORE_INHIBIT; > > - ret = mi_set_context(ring, to, hw_flags); > + ret = mi_set_context(req, to, hw_flags); > if (ret) > goto unpin_out; > > @@ -616,7 +600,7 @@ static int do_switch(struct intel_engine_cs *ring, > if (!(to->remap_slice & (1<<i))) > continue; > > - ret = i915_gem_l3_remap(ring, i); > + ret = i915_gem_l3_remap(req, i); > /* If it failed, try again next round */ > if (ret) > DRM_DEBUG_DRIVER("L3 remapping failed\n"); > @@ -632,7 +616,7 @@ static int do_switch(struct intel_engine_cs *ring, > */ > if (from != NULL) { > from->legacy_hw_ctx.rcs_state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; > - i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), ring); > + i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), req); > /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the > * whole damn pipeline, we don't need to explicitly mark the > * object dirty. The only exception is that the context must be > @@ -658,12 +642,12 @@ done: > > if (uninitialized) { > if (ring->init_context) { > - ret = ring->init_context(ring, to); > + ret = ring->init_context(req, to); > if (ret) > DRM_ERROR("ring init context: %d\n", ret); > } > > - ret = i915_gem_render_state_init(ring); > + ret = i915_gem_render_state_init(req); > if (ret) > DRM_ERROR("init render state: %d\n", ret); > } > @@ -690,9 +674,10 @@ unpin_out: > * switched by writing to the ELSP and requests keep a reference to their > * context. > */ > -int i915_switch_context(struct intel_engine_cs *ring, > +int i915_switch_context(struct drm_i915_gem_request *req, > struct intel_context *to) > { > + struct intel_engine_cs *ring = req->ring; > struct drm_i915_private *dev_priv = ring->dev->dev_private; > > WARN_ON(i915.enable_execlists); > @@ -708,7 +693,7 @@ int i915_switch_context(struct intel_engine_cs *ring, > return 0; > } > > - return do_switch(ring, to); > + return do_switch(req, to); > } > > static bool contexts_enabled(struct drm_device *dev) > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index ca31673..5caa2a2 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -822,7 +822,7 @@ err: > } > > static int > -i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring, > +i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, > struct list_head *vmas) > { > struct i915_vma *vma; > @@ -832,7 +832,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring, > > list_for_each_entry(vma, vmas, exec_list) { > struct drm_i915_gem_object *obj = vma->obj; > - ret = i915_gem_object_sync(obj, ring, false); > + ret = i915_gem_object_sync(obj, req); > if (ret) > return ret; > > @@ -843,7 +843,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring, > } > > if (flush_chipset) > - i915_gem_chipset_flush(ring->dev); > + i915_gem_chipset_flush(req->ring->dev); > > if (flush_domains & I915_GEM_DOMAIN_GTT) > wmb(); > @@ -941,9 +941,9 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, > > void > i915_gem_execbuffer_move_to_active(struct list_head *vmas, > - struct intel_engine_cs *ring) > + struct drm_i915_gem_request *req) > { > - struct drm_i915_gem_request *req = intel_ring_get_request(ring); > + struct intel_engine_cs *ring = i915_gem_request_get_ring(req); > struct i915_vma *vma; > > list_for_each_entry(vma, vmas, exec_list) { > @@ -957,7 +957,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, > obj->base.pending_read_domains |= obj->base.read_domains; > obj->base.read_domains = obj->base.pending_read_domains; > > - i915_vma_move_to_active(vma, ring); > + i915_vma_move_to_active(vma, req); > if (obj->base.write_domain) { > obj->dirty = 1; > i915_gem_request_assign(&obj->last_write_req, req); > @@ -983,20 +983,21 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, > void > i915_gem_execbuffer_retire_commands(struct drm_device *dev, > struct drm_file *file, > - struct intel_engine_cs *ring, > + struct drm_i915_gem_request *req, > struct drm_i915_gem_object *obj) > { > /* Unconditionally force add_request to emit a full flush. */ > - ring->gpu_caches_dirty = true; > + req->ring->gpu_caches_dirty = true; > > /* Add a breadcrumb for the completion of the batch buffer */ > - (void)__i915_add_request(ring, file, obj, true); > + (void)__i915_add_request(req, file, obj, true); > } > > static int > i915_reset_gen7_sol_offsets(struct drm_device *dev, > - struct intel_engine_cs *ring) > + struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > struct drm_i915_private *dev_priv = dev->dev_private; > int ret, i; > > @@ -1005,7 +1006,7 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, > return -EINVAL; > } > > - ret = intel_ring_begin(ring, 4 * 3); > + ret = intel_ring_begin(req, 4 * 3); > if (ret) > return ret; > > @@ -1021,10 +1022,11 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, > } > > static int > -i915_emit_box(struct intel_engine_cs *ring, > +i915_emit_box(struct drm_i915_gem_request *req, > struct drm_clip_rect *box, > int DR1, int DR4) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > if (box->y2 <= box->y1 || box->x2 <= box->x1 || > @@ -1035,7 +1037,7 @@ i915_emit_box(struct intel_engine_cs *ring, > } > > if (INTEL_INFO(ring->dev)->gen >= 4) { > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > @@ -1044,7 +1046,7 @@ i915_emit_box(struct intel_engine_cs *ring, > intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16); > intel_ring_emit(ring, DR4); > } else { > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -1151,11 +1153,11 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, > goto error; > } > > - ret = i915_gem_execbuffer_move_to_gpu(ring, vmas); > + ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas); > if (ret) > goto error; > > - i915_gem_execbuffer_move_to_active(vmas, ring); > + i915_gem_execbuffer_move_to_active(vmas, params->request); > > /* Make sure the OLR hasn't advanced (which would indicate a flush > * of the work in progress which in turn would be a Bad Thing). */ > @@ -1200,18 +1202,18 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params) > /* Unconditionally invalidate gpu caches and ensure that we do flush > * any residual writes from the previous batch. > */ > - ret = intel_ring_invalidate_all_caches(ring); > + ret = intel_ring_invalidate_all_caches(params->request); > if (ret) > goto error; > > /* Switch to the correct context for the batch */ > - ret = i915_switch_context(ring, params->ctx); > + ret = i915_switch_context(params->request, params->ctx); > if (ret) > goto error; > > if (ring == &dev_priv->ring[RCS] && > params->instp_mode != dev_priv->relative_constants_mode) { > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(params->request, 4); > if (ret) > goto error; > > @@ -1225,7 +1227,7 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params) > } > > if (params->args_flags & I915_EXEC_GEN7_SOL_RESET) { > - ret = i915_reset_gen7_sol_offsets(params->dev, ring); > + ret = i915_reset_gen7_sol_offsets(params->dev, params->request); > if (ret) > goto error; > } > @@ -1236,29 +1238,31 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params) > > if (params->cliprects) { > for (i = 0; i < params->args_num_cliprects; i++) { > - ret = i915_emit_box(ring, ¶ms->cliprects[i], > - params->args_DR1, params->args_DR4); > + ret = i915_emit_box(params->request, > + ¶ms->cliprects[i], > + params->args_DR1, > + params->args_DR4); > if (ret) > goto error; > > - ret = ring->dispatch_execbuffer(ring, > + ret = ring->dispatch_execbuffer(params->request, > exec_start, exec_len, > params->dispatch_flags); > if (ret) > goto error; > } > } else { > - ret = ring->dispatch_execbuffer(ring, > + ret = ring->dispatch_execbuffer(params->request, > exec_start, exec_len, > params->dispatch_flags); > if (ret) > goto error; > } > > - trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), params->dispatch_flags); > + trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); > > - i915_gem_execbuffer_retire_commands(params->dev, params->file, ring, > - params->batch_obj); > + i915_gem_execbuffer_retire_commands(params->dev, params->file, > + params->request, params->batch_obj); > > error: > /* intel_gpu_busy should also get a ref, so it will free when the device > @@ -1490,10 +1494,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, > params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm); > > /* Allocate a request for this batch buffer nice and early. */ > - ret = dev_priv->gt.alloc_request(ring, ctx); > + ret = dev_priv->gt.alloc_request(ring, ctx, ¶ms->request); > if (ret) > goto err; > - params->request = ring->outstanding_lazy_request; > + WARN_ON(params->request != ring->outstanding_lazy_request); > > /* Save assorted stuff away to pass through to *_submission_final() */ > params->dev = dev; > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 7eead93..776776e 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -213,14 +213,15 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, > } > > /* Broadwell Page Directory Pointer Descriptors */ > -static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, > - uint64_t val) > +static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry, > + uint64_t val) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > BUG_ON(entry >= 4); > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -236,7 +237,7 @@ static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, > } > > static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, > - struct intel_engine_cs *ring) > + struct drm_i915_gem_request *req) > { > int i, ret; > > @@ -245,7 +246,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, > > for (i = used_pd - 1; i >= 0; i--) { > dma_addr_t addr = ppgtt->pd_dma_addr[i]; > - ret = gen8_write_pdp(ring, i, addr); > + ret = gen8_write_pdp(req, i, addr); > if (ret) > return ret; > } > @@ -710,16 +711,17 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) > } > > static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, > - struct intel_engine_cs *ring) > + struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > /* NB: TLBs must be flushed and invalidated before a switch */ > - ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); > + ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); > if (ret) > return ret; > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -735,16 +737,17 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, > } > > static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, > - struct intel_engine_cs *ring) > + struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > /* NB: TLBs must be flushed and invalidated before a switch */ > - ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); > + ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); > if (ret) > return ret; > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -758,7 +761,7 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, > > /* XXX: RCS is the only one to auto invalidate the TLBs? */ > if (ring->id != RCS) { > - ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); > + ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); > if (ret) > return ret; > } > @@ -767,8 +770,9 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, > } > > static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, > - struct intel_engine_cs *ring) > + struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > struct drm_device *dev = ppgtt->base.dev; > struct drm_i915_private *dev_priv = dev->dev_private; > > @@ -1125,11 +1129,6 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) > > int i915_ppgtt_init_hw(struct drm_device *dev) > { > - struct drm_i915_private *dev_priv = dev->dev_private; > - struct intel_engine_cs *ring; > - struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; > - int i, ret = 0; > - > /* In the case of execlists, PPGTT is enabled by the context descriptor > * and the PDPs are contained within the context itself. We don't > * need to do anything here. */ > @@ -1148,25 +1147,20 @@ int i915_ppgtt_init_hw(struct drm_device *dev) > else > WARN_ON(1); > > - if (ppgtt) { > - for_each_ring(ring, dev_priv, i) { > - ret = ppgtt->switch_mm(ppgtt, ring); > - if (ret != 0) > - return ret; > - > - /* Make sure the context switch (if one actually happened) > - * gets wrapped up and finished rather than hanging around > - * and confusing things later. */ > - if (ring->outstanding_lazy_request) { > - ret = i915_add_request_no_flush(ring); > - if (ret) > - return ret; > - } > - } > - } > + return 0; > +} > > - return ret; > +int i915_ppgtt_init_ring(struct drm_i915_gem_request *req) > +{ > + struct drm_i915_private *dev_priv = req->ring->dev->dev_private; > + struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; > + > + if (!ppgtt) > + return 0; > + > + return ppgtt->switch_mm(ppgtt, req); > } > + > struct i915_hw_ppgtt * > i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) > { > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > index dd849df..bee3e2a 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -267,7 +267,7 @@ struct i915_hw_ppgtt { > > int (*enable)(struct i915_hw_ppgtt *ppgtt); > int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, > - struct intel_engine_cs *ring); > + struct drm_i915_gem_request *req); > void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); > }; > > @@ -278,6 +278,7 @@ void i915_global_gtt_cleanup(struct drm_device *dev); > > int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); > int i915_ppgtt_init_hw(struct drm_device *dev); > +int i915_ppgtt_init_ring(struct drm_i915_gem_request *req); > void i915_ppgtt_release(struct kref *kref); > struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev, > struct drm_i915_file_private *fpriv); > diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c > index aba39c3..0e0c23fe 100644 > --- a/drivers/gpu/drm/i915/i915_gem_render_state.c > +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c > @@ -152,8 +152,9 @@ int i915_gem_render_state_prepare(struct intel_engine_cs *ring, > return 0; > } > > -int i915_gem_render_state_init(struct intel_engine_cs *ring) > +int i915_gem_render_state_init(struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = i915_gem_request_get_ring(req); > struct render_state so; > int ret; > > @@ -164,16 +165,17 @@ int i915_gem_render_state_init(struct intel_engine_cs *ring) > if (so.rodata == NULL) > return 0; > > - ret = ring->dispatch_execbuffer(ring, > + ret = ring->dispatch_execbuffer(req, > so.ggtt_offset, > so.rodata->batch_items * 4, > I915_DISPATCH_SECURE); > if (ret) > goto out; > > - i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring); > + i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); > > - ret = __i915_add_request(ring, NULL, so.obj, true); > +// ret = __i915_add_request(req, NULL, so.obj, true); > + req->batch_obj = so.obj; > /* __i915_add_request moves object to inactive if it fails */ > out: > i915_gem_render_state_fini(&so); > diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h > index c44961e..7aa7372 100644 > --- a/drivers/gpu/drm/i915/i915_gem_render_state.h > +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h > @@ -39,7 +39,7 @@ struct render_state { > int gen; > }; > > -int i915_gem_render_state_init(struct intel_engine_cs *ring); > +int i915_gem_render_state_init(struct drm_i915_gem_request *req); > void i915_gem_render_state_fini(struct render_state *so); > int i915_gem_render_state_prepare(struct intel_engine_cs *ring, > struct render_state *so); > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > index f0cf421..c0b0e37 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -9089,14 +9089,15 @@ static int intel_gen2_queue_flip(struct drm_device *dev, > struct drm_crtc *crtc, > struct drm_framebuffer *fb, > struct drm_i915_gem_object *obj, > - struct intel_engine_cs *ring, > + struct drm_i915_gem_request *req, > uint32_t flags) > { > + struct intel_engine_cs *ring = req->ring; > struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > u32 flip_mask; > int ret; > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -9116,7 +9117,7 @@ static int intel_gen2_queue_flip(struct drm_device *dev, > intel_ring_emit(ring, 0); /* aux display base address, unused */ > > intel_mark_page_flip_active(intel_crtc); > - i915_add_request_no_flush(ring); > + i915_add_request_no_flush(req); > return 0; > } > > @@ -9124,14 +9125,15 @@ static int intel_gen3_queue_flip(struct drm_device *dev, > struct drm_crtc *crtc, > struct drm_framebuffer *fb, > struct drm_i915_gem_object *obj, > - struct intel_engine_cs *ring, > + struct drm_i915_gem_request *req, > uint32_t flags) > { > + struct intel_engine_cs *ring = req->ring; > struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > u32 flip_mask; > int ret; > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -9148,7 +9150,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev, > intel_ring_emit(ring, MI_NOOP); > > intel_mark_page_flip_active(intel_crtc); > - i915_add_request_no_flush(ring); > + i915_add_request_no_flush(req); > return 0; > } > > @@ -9156,15 +9158,16 @@ static int intel_gen4_queue_flip(struct drm_device *dev, > struct drm_crtc *crtc, > struct drm_framebuffer *fb, > struct drm_i915_gem_object *obj, > - struct intel_engine_cs *ring, > + struct drm_i915_gem_request *req, > uint32_t flags) > { > + struct intel_engine_cs *ring = req->ring; > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > uint32_t pf, pipesrc; > int ret; > > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > @@ -9187,7 +9190,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, > intel_ring_emit(ring, pf | pipesrc); > > intel_mark_page_flip_active(intel_crtc); > - i915_add_request_no_flush(ring); > + i915_add_request_no_flush(req); > return 0; > } > > @@ -9195,15 +9198,16 @@ static int intel_gen6_queue_flip(struct drm_device *dev, > struct drm_crtc *crtc, > struct drm_framebuffer *fb, > struct drm_i915_gem_object *obj, > - struct intel_engine_cs *ring, > + struct drm_i915_gem_request *req, > uint32_t flags) > { > + struct intel_engine_cs *ring = req->ring; > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > uint32_t pf, pipesrc; > int ret; > > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > @@ -9223,7 +9227,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev, > intel_ring_emit(ring, pf | pipesrc); > > intel_mark_page_flip_active(intel_crtc); > - i915_add_request_no_flush(ring); > + i915_add_request_no_flush(req); > return 0; > } > > @@ -9231,9 +9235,10 @@ static int intel_gen7_queue_flip(struct drm_device *dev, > struct drm_crtc *crtc, > struct drm_framebuffer *fb, > struct drm_i915_gem_object *obj, > - struct intel_engine_cs *ring, > + struct drm_i915_gem_request *req, > uint32_t flags) > { > + struct intel_engine_cs *ring = req->ring; > struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > uint32_t plane_bit = 0; > int len, ret; > @@ -9275,11 +9280,11 @@ static int intel_gen7_queue_flip(struct drm_device *dev, > * then do the cacheline alignment, and finally emit the > * MI_DISPLAY_FLIP. > */ > - ret = intel_ring_cacheline_align(ring); > + ret = intel_ring_cacheline_align(req); > if (ret) > return ret; > > - ret = intel_ring_begin(ring, len); > + ret = intel_ring_begin(req, len); > if (ret) > return ret; > > @@ -9318,7 +9323,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev, > intel_ring_emit(ring, (MI_NOOP)); > > intel_mark_page_flip_active(intel_crtc); > - i915_add_request_no_flush(ring); > + i915_add_request_no_flush(req); > return 0; > } > > @@ -9474,9 +9479,10 @@ static int intel_gen9_queue_flip(struct drm_device *dev, > struct drm_crtc *crtc, > struct drm_framebuffer *fb, > struct drm_i915_gem_object *obj, > - struct intel_engine_cs *ring, > + struct drm_i915_gem_request *req, > uint32_t flags) > { > + struct intel_engine_cs *ring = req->ring; > struct intel_crtc *intel_crtc = to_intel_crtc(crtc); > uint32_t plane = 0, stride; > int ret; > @@ -9508,7 +9514,7 @@ static int intel_gen9_queue_flip(struct drm_device *dev, > return -ENODEV; > } > > - ret = intel_ring_begin(ring, 10); > + ret = intel_ring_begin(req, 10); > if (ret) > return ret; > > @@ -9528,7 +9534,7 @@ static int intel_gen9_queue_flip(struct drm_device *dev, > intel_ring_emit(ring, intel_crtc->unpin_work->gtt_offset); > > intel_mark_page_flip_active(intel_crtc); > - i915_add_request_no_flush(ring); > + i915_add_request_no_flush(req); > > return 0; > } > @@ -9537,7 +9543,7 @@ static int intel_default_queue_flip(struct drm_device *dev, > struct drm_crtc *crtc, > struct drm_framebuffer *fb, > struct drm_i915_gem_object *obj, > - struct intel_engine_cs *ring, > + struct drm_i915_gem_request *req, > uint32_t flags) > { > return -ENODEV; > @@ -9729,22 +9735,18 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, > i915_gem_request_assign(&work->flip_queued_req, > obj->last_write_req); > } else { > - ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, ring, > + struct drm_i915_gem_request *req; > + > + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); > + if (ret) > + return ret; > + > + i915_gem_request_assign(&work->flip_queued_req, req); > + > + ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, req, > page_flip_flags); > if (ret) > goto cleanup_unpin; > - > - /* Borked: need to get the seqno for the request submitted in > - * 'queue_flip()' above. However, either the request has been > - * posted already and the seqno is gone (q_f calls add_request), > - * or the request never gets posted and is merged into whatever > - * render comes along next (q_f calls ring_advance). > - * > - * On the other hand, seqnos are going away soon anyway! So > - * hopefully the problem will disappear... > - */ > - i915_gem_request_assign(&work->flip_queued_req, > - ring->outstanding_lazy_request ? intel_ring_get_request(ring) : NULL); > } > > work->flip_queued_vblank = drm_vblank_count(dev, intel_crtc->pipe); > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 80cb87e..5077a77 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -203,6 +203,10 @@ enum { > }; > #define GEN8_CTX_ID_SHIFT 32 > > +static int intel_logical_ring_begin(struct drm_i915_gem_request *req, > + int num_dwords); > +static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req, > + struct intel_context *ctx); > static int intel_lr_context_pin(struct intel_engine_cs *ring, > struct intel_context *ctx); > > @@ -587,9 +591,9 @@ static int execlists_context_queue(struct intel_engine_cs *ring, > return 0; > } > > -static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) > +static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req) > { > - struct intel_engine_cs *ring = ringbuf->ring; > + struct intel_engine_cs *ring = req->ring; > uint32_t flush_domains; > int ret; > > @@ -597,7 +601,7 @@ static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) > if (ring->gpu_caches_dirty) > flush_domains = I915_GEM_GPU_DOMAINS; > > - ret = ring->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains); > + ret = ring->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains); > if (ret) > return ret; > > @@ -605,10 +609,9 @@ static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) > return 0; > } > > -static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, > +static int execlists_move_to_gpu(struct drm_i915_gem_request *req, > struct list_head *vmas) > { > - struct intel_engine_cs *ring = ringbuf->ring; > struct i915_vma *vma; > uint32_t flush_domains = 0; > bool flush_chipset = false; > @@ -617,7 +620,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, > list_for_each_entry(vma, vmas, exec_list) { > struct drm_i915_gem_object *obj = vma->obj; > > - ret = i915_gem_object_sync(obj, ring, true); > + ret = i915_gem_object_sync(obj, req); > if (ret) > return ret; > > @@ -657,7 +660,6 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, > struct drm_device *dev = params->dev; > struct intel_engine_cs *ring = params->ring; > struct drm_i915_private *dev_priv = dev->dev_private; > - struct intel_ringbuffer *ringbuf = params->ctx->engine[ring->id].ringbuf; > int ret; > > params->instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; > @@ -706,11 +708,11 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, > return -EINVAL; > } > > - ret = execlists_move_to_gpu(ringbuf, vmas); > + ret = execlists_move_to_gpu(params->request, vmas); > if (ret) > return ret; > > - i915_gem_execbuffer_move_to_active(vmas, ring); > + i915_gem_execbuffer_move_to_active(vmas, params->request); > > ret = dev_priv->gt.do_execfinal(params); > if (ret) > @@ -742,13 +744,13 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params) > /* Unconditionally invalidate gpu caches and ensure that we do flush > * any residual writes from the previous batch. > */ > - ret = logical_ring_invalidate_all_caches(ringbuf); > + ret = logical_ring_invalidate_all_caches(params->request); > if (ret) > return ret; > > if (ring == &dev_priv->ring[RCS] && > params->instp_mode != dev_priv->relative_constants_mode) { > - ret = intel_logical_ring_begin(ringbuf, 4); > + ret = intel_logical_ring_begin(params->request, 4); > if (ret) > return ret; > > @@ -764,13 +766,14 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params) > exec_start = params->batch_obj_vm_offset + > params->args_batch_start_offset; > > - ret = ring->emit_bb_start(ringbuf, exec_start, params->dispatch_flags); > + ret = ring->emit_bb_start(params->request, exec_start, params->dispatch_flags); > if (ret) > return ret; > > - trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), params->dispatch_flags); > + trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); > > - i915_gem_execbuffer_retire_commands(params->dev, params->file, ring, params->batch_obj); > + i915_gem_execbuffer_retire_commands(params->dev, params->file, > + params->request, params->batch_obj); > > return 0; > } > @@ -827,15 +830,15 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) > I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING)); > } > > -int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf) > +int logical_ring_flush_all_caches(struct drm_i915_gem_request *req) > { > - struct intel_engine_cs *ring = ringbuf->ring; > + struct intel_engine_cs *ring = req->ring; > int ret; > > if (!ring->gpu_caches_dirty) > return 0; > > - ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS); > + ret = ring->emit_flush(req, 0, I915_GEM_GPU_DOMAINS); > if (ret) > return ret; > > @@ -910,13 +913,17 @@ void intel_lr_context_unpin(struct intel_engine_cs *ring, > } > > int intel_logical_ring_alloc_request(struct intel_engine_cs *ring, > - struct intel_context *ctx) > + struct intel_context *ctx, > + struct drm_i915_gem_request **req_out) > { > struct drm_i915_gem_request *request; > struct drm_i915_private *dev_private = ring->dev->dev_private; > int ret; > > - if (ring->outstanding_lazy_request) > + if (!req_out) > + return -EINVAL; > + > + if ((*req_out = ring->outstanding_lazy_request) != NULL) > return 0; > > request = kzalloc(sizeof(*request), GFP_KERNEL); > @@ -953,7 +960,7 @@ int intel_logical_ring_alloc_request(struct intel_engine_cs *ring, > i915_gem_context_reference(request->ctx); > request->ringbuf = ctx->engine[ring->id].ringbuf; > > - ring->outstanding_lazy_request = request; > + *req_out = ring->outstanding_lazy_request = request; > return 0; > } > > @@ -1090,7 +1097,7 @@ static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes) > /** > * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands > * > - * @ringbuf: Logical ringbuffer. > + * @request: The request to start some new work for > * @num_dwords: number of DWORDs that we plan to write to the ringbuffer. > * > * The ringbuffer might not be ready to accept the commands right away (maybe it needs to > @@ -1100,8 +1107,9 @@ static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes) > * > * Return: non-zero if the ringbuffer is not ready to be written to. > */ > -int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords) > +static int intel_logical_ring_begin(struct drm_i915_gem_request *req, int num_dwords) > { > + struct intel_ringbuffer *ringbuf = req->ringbuf; > struct intel_engine_cs *ring = ringbuf->ring; > struct drm_device *dev = ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > @@ -1116,38 +1124,28 @@ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords) > if (ret) > return ret; > > - if(!ring->outstanding_lazy_request) { > - printk(KERN_INFO "%s:%d> \x1B[31;1mring->outstanding_lazy_request = 0x%p\x1B[0m\n", __func__, __LINE__, ring->outstanding_lazy_request); > - dump_stack(); > - } > - > - /* Preallocate the olr before touching the ring */ > - ret = intel_logical_ring_alloc_request(ring, ringbuf->FIXME_lrc_ctx); > - if (ret) > - return ret; > - > ringbuf->space -= num_dwords * sizeof(uint32_t); > return 0; > } > > -static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring, > +static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req, > struct intel_context *ctx) > { > int ret, i; > - struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; > - struct drm_device *dev = ring->dev; > + struct intel_ringbuffer *ringbuf = req->ringbuf; > + struct drm_device *dev = req->ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > struct i915_workarounds *w = &dev_priv->workarounds; > > if (WARN_ON(w->count == 0)) > return 0; > > - ring->gpu_caches_dirty = true; > - ret = logical_ring_flush_all_caches(ringbuf); > + req->ring->gpu_caches_dirty = true; > + ret = logical_ring_flush_all_caches(req); > if (ret) > return ret; > > - ret = intel_logical_ring_begin(ringbuf, w->count * 2 + 2); > + ret = intel_logical_ring_begin(req, w->count * 2 + 2); > if (ret) > return ret; > > @@ -1160,8 +1158,8 @@ static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring, > > intel_logical_ring_advance(ringbuf); > > - ring->gpu_caches_dirty = true; > - ret = logical_ring_flush_all_caches(ringbuf); > + req->ring->gpu_caches_dirty = true; > + ret = logical_ring_flush_all_caches(req); > if (ret) > return ret; > > @@ -1210,13 +1208,14 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) > return init_workarounds_ring(ring); > } > > -static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf, > +static int gen8_emit_bb_start(struct drm_i915_gem_request *req, > u64 offset, unsigned flags) > { > + struct intel_ringbuffer *ringbuf = req->ringbuf; > bool ppgtt = !(flags & I915_DISPATCH_SECURE); > int ret; > > - ret = intel_logical_ring_begin(ringbuf, 4); > + ret = intel_logical_ring_begin(req, 4); > if (ret) > return ret; > > @@ -1263,17 +1262,18 @@ static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring) > spin_unlock_irqrestore(&dev_priv->irq_lock, flags); > } > > -static int gen8_emit_flush(struct intel_ringbuffer *ringbuf, > +static int gen8_emit_flush(struct drm_i915_gem_request *req, > u32 invalidate_domains, > u32 unused) > { > + struct intel_ringbuffer *ringbuf = req->ringbuf; > struct intel_engine_cs *ring = ringbuf->ring; > struct drm_device *dev = ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > uint32_t cmd; > int ret; > > - ret = intel_logical_ring_begin(ringbuf, 4); > + ret = intel_logical_ring_begin(req, 4); > if (ret) > return ret; > > @@ -1301,10 +1301,11 @@ static int gen8_emit_flush(struct intel_ringbuffer *ringbuf, > return 0; > } > > -static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf, > +static int gen8_emit_flush_render(struct drm_i915_gem_request *req, > u32 invalidate_domains, > u32 flush_domains) > { > + struct intel_ringbuffer *ringbuf = req->ringbuf; > struct intel_engine_cs *ring = ringbuf->ring; > u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; > u32 flags = 0; > @@ -1328,7 +1329,7 @@ static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf, > flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; > } > > - ret = intel_logical_ring_begin(ringbuf, 6); > + ret = intel_logical_ring_begin(req, 6); > if (ret) > return ret; > > @@ -1353,13 +1354,14 @@ static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno) > intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno); > } > > -static int gen8_emit_request(struct intel_ringbuffer *ringbuf) > +static int gen8_emit_request(struct drm_i915_gem_request *req) > { > + struct intel_ringbuffer *ringbuf = req->ringbuf; > struct intel_engine_cs *ring = ringbuf->ring; > u32 cmd; > int ret; > > - ret = intel_logical_ring_begin(ringbuf, 6); > + ret = intel_logical_ring_begin(req, 6); > if (ret) > return ret; > > @@ -1371,8 +1373,7 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf) > (ring->status_page.gfx_addr + > (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT))); > intel_logical_ring_emit(ringbuf, 0); > - intel_logical_ring_emit(ringbuf, > - i915_gem_request_get_seqno(ring->outstanding_lazy_request)); > + intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(req)); > intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); > intel_logical_ring_emit(ringbuf, MI_NOOP); > intel_logical_ring_advance_and_submit(ringbuf); > @@ -1380,16 +1381,20 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf) > return 0; > } > > -static int gen8_init_rcs_context(struct intel_engine_cs *ring, > - struct intel_context *ctx) > +static int gen8_init_rcs_context(struct drm_i915_gem_request *req, > + struct intel_context *ctx) > { > int ret; > > - ret = intel_logical_ring_workarounds_emit(ring, ctx); > + ret = intel_logical_ring_workarounds_emit(req, ctx); > if (ret) > return ret; > > - return intel_lr_context_render_state_init(ring, ctx); > + ret = intel_lr_context_render_state_init(req, ctx); > + if (ret) > + return ret; > + > + return 0; > } > > /** > @@ -1409,6 +1414,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring) > > intel_logical_ring_stop(ring); > WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0); > + WARN_ON(ring->outstanding_lazy_request); > i915_gem_request_assign(&ring->outstanding_lazy_request, NULL); > > if (ring->cleanup) > @@ -1648,10 +1654,10 @@ cleanup_render_ring: > return ret; > } > > -int intel_lr_context_render_state_init(struct intel_engine_cs *ring, > - struct intel_context *ctx) > +static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req, > + struct intel_context *ctx) > { > - struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; > + struct intel_engine_cs *ring = i915_gem_request_get_ring(req); > struct render_state so; > struct drm_i915_file_private *file_priv = ctx->file_priv; > struct drm_file *file = file_priv ? file_priv->file : NULL; > @@ -1664,15 +1670,13 @@ int intel_lr_context_render_state_init(struct intel_engine_cs *ring, > if (so.rodata == NULL) > return 0; > > - ret = ring->emit_bb_start(ringbuf, > - so.ggtt_offset, > - I915_DISPATCH_SECURE); > + ret = ring->emit_bb_start(req, so.ggtt_offset, I915_DISPATCH_SECURE); > if (ret) > goto out; > > - i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring); > + i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); > > - ret = __i915_add_request(ring, file, so.obj, true); > + ret = __i915_add_request(req, file, so.obj, true); > /* intel_logical_ring_add_request moves object to inactive if it > * fails */ > out: > @@ -1883,6 +1887,7 @@ static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring, > int intel_lr_context_deferred_create(struct intel_context *ctx, > struct intel_engine_cs *ring) > { > + struct drm_i915_private *dev_priv = ring->dev->dev_private; > const bool is_global_default_ctx = (ctx == ring->default_context); > struct drm_device *dev = ring->dev; > struct drm_i915_gem_object *ctx_obj; > @@ -1964,13 +1969,27 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, > lrc_setup_hardware_status_page(ring, ctx_obj); > else if (ring->id == RCS && !ctx->rcs_initialized) { > if (ring->init_context) { > - ret = ring->init_context(ring, ctx); > + struct drm_i915_gem_request *req; > + > + ret = dev_priv->gt.alloc_request(ring, ctx, &req); > + if (ret) > + return ret; > + > + ret = ring->init_context(req, ctx); > if (ret) { > DRM_ERROR("ring init context: %d\n", ret); > + i915_gem_request_unreference(req); > ctx->engine[ring->id].ringbuf = NULL; > ctx->engine[ring->id].state = NULL; > goto error; > } > + > + ret = i915_add_request_no_flush(req); > + if (ret) { > + DRM_ERROR("ring init context: %d\n", ret); > + i915_gem_request_unreference(req); > + goto error; > + } > } > > ctx->rcs_initialized = true; > diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h > index ea083d9..a2981ba 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.h > +++ b/drivers/gpu/drm/i915/intel_lrc.h > @@ -35,12 +35,13 @@ > > /* Logical Rings */ > int __must_check intel_logical_ring_alloc_request(struct intel_engine_cs *ring, > - struct intel_context *ctx); > + struct intel_context *ctx, > + struct drm_i915_gem_request **req_out); > void intel_logical_ring_stop(struct intel_engine_cs *ring); > void intel_logical_ring_cleanup(struct intel_engine_cs *ring); > int intel_logical_rings_init(struct drm_device *dev); > > -int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf); > +int logical_ring_flush_all_caches(struct drm_i915_gem_request *req); > void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf); > /** > * intel_logical_ring_advance() - advance the ringbuffer tail > @@ -63,11 +64,8 @@ static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, > iowrite32(data, ringbuf->virtual_start + ringbuf->tail); > ringbuf->tail += 4; > } > -int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords); > > /* Logical Ring Contexts */ > -int intel_lr_context_render_state_init(struct intel_engine_cs *ring, > - struct intel_context *ctx); > void intel_lr_context_free(struct intel_context *ctx); > int intel_lr_context_deferred_create(struct intel_context *ctx, > struct intel_engine_cs *ring); > diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c > index 973c9de..2d2ce59 100644 > --- a/drivers/gpu/drm/i915/intel_overlay.c > +++ b/drivers/gpu/drm/i915/intel_overlay.c > @@ -209,17 +209,15 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay, > } > > static int intel_overlay_do_wait_request(struct intel_overlay *overlay, > + struct drm_i915_gem_request *req, > void (*tail)(struct intel_overlay *)) > { > struct drm_device *dev = overlay->dev; > - struct drm_i915_private *dev_priv = dev->dev_private; > - struct intel_engine_cs *ring = &dev_priv->ring[RCS]; > int ret; > > BUG_ON(overlay->last_flip_req); > - i915_gem_request_assign(&overlay->last_flip_req, > - ring->outstanding_lazy_request); > - ret = i915_add_request(ring); > + i915_gem_request_assign(&overlay->last_flip_req, req); > + ret = i915_add_request(overlay->last_flip_req); > if (ret) > return ret; > > @@ -239,6 +237,7 @@ static int intel_overlay_on(struct intel_overlay *overlay) > struct drm_device *dev = overlay->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_engine_cs *ring = &dev_priv->ring[RCS]; > + struct drm_i915_gem_request *req; > int ret; > > BUG_ON(overlay->active); > @@ -246,17 +245,21 @@ static int intel_overlay_on(struct intel_overlay *overlay) > > WARN_ON(IS_I830(dev) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE)); > > - ret = intel_ring_begin(ring, 4); > + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); > if (ret) > return ret; > > - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON); > - intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE); > - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); > - intel_ring_emit(ring, MI_NOOP); > - intel_ring_advance(ring); > + ret = intel_ring_begin(req, 4); > + if (ret) > + return ret; > + > + intel_ring_emit(req->ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON); > + intel_ring_emit(req->ring, overlay->flip_addr | OFC_UPDATE); > + intel_ring_emit(req->ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); > + intel_ring_emit(req->ring, MI_NOOP); > + intel_ring_advance(req->ring); > > - return intel_overlay_do_wait_request(overlay, NULL); > + return intel_overlay_do_wait_request(overlay, req, NULL); > } > > /* overlay needs to be enabled in OCMD reg */ > @@ -266,6 +269,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay, > struct drm_device *dev = overlay->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_engine_cs *ring = &dev_priv->ring[RCS]; > + struct drm_i915_gem_request *req; > u32 flip_addr = overlay->flip_addr; > u32 tmp; > int ret; > @@ -280,7 +284,11 @@ static int intel_overlay_continue(struct intel_overlay *overlay, > if (tmp & (1 << 17)) > DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp); > > - ret = intel_ring_begin(ring, 2); > + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); > + if (ret) > + return ret; > + > + ret = intel_ring_begin(req, 2); > if (ret) > return ret; > > @@ -289,9 +297,8 @@ static int intel_overlay_continue(struct intel_overlay *overlay, > intel_ring_advance(ring); > > WARN_ON(overlay->last_flip_req); > - i915_gem_request_assign(&overlay->last_flip_req, > - ring->outstanding_lazy_request); > - return i915_add_request(ring); > + i915_gem_request_assign(&overlay->last_flip_req, req); > + return i915_add_request(req); > } > > static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay) > @@ -326,6 +333,7 @@ static int intel_overlay_off(struct intel_overlay *overlay) > struct drm_device *dev = overlay->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_engine_cs *ring = &dev_priv->ring[RCS]; > + struct drm_i915_gem_request *req; > u32 flip_addr = overlay->flip_addr; > int ret; > > @@ -337,7 +345,11 @@ static int intel_overlay_off(struct intel_overlay *overlay) > * of the hw. Do it in both cases */ > flip_addr |= OFC_UPDATE; > > - ret = intel_ring_begin(ring, 6); > + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); > + if (ret) > + return ret; > + > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -359,7 +371,7 @@ static int intel_overlay_off(struct intel_overlay *overlay) > } > intel_ring_advance(ring); > > - return intel_overlay_do_wait_request(overlay, intel_overlay_off_tail); > + return intel_overlay_do_wait_request(overlay, req, intel_overlay_off_tail); > } > > /* recover from an interruption due to a signal > @@ -404,7 +416,13 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) > > if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { > /* synchronous slowpath */ > - ret = intel_ring_begin(ring, 2); > + struct drm_i915_gem_request *req; > + > + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); > + if (ret) > + return ret; > + > + ret = intel_ring_begin(req, 2); > if (ret) > return ret; > > @@ -412,7 +430,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) > intel_ring_emit(ring, MI_NOOP); > intel_ring_advance(ring); > > - ret = intel_overlay_do_wait_request(overlay, > + ret = intel_overlay_do_wait_request(overlay, req, > intel_overlay_release_old_vid_tail); > if (ret) > return ret; > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index 78911e2..5905fa5 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -5506,6 +5506,7 @@ static void ironlake_enable_rc6(struct drm_device *dev) > { > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_engine_cs *ring = &dev_priv->ring[RCS]; > + struct drm_i915_gem_request *req = NULL; > bool was_interruptible; > int ret; > > @@ -5524,16 +5525,17 @@ static void ironlake_enable_rc6(struct drm_device *dev) > was_interruptible = dev_priv->mm.interruptible; > dev_priv->mm.interruptible = false; > > + ret = dev_priv->gt.alloc_request(ring, NULL, &req); > + if (ret) > + goto err; > + > /* > * GPU can automatically power down the render unit if given a page > * to save state. > */ > - ret = intel_ring_begin(ring, 6); > - if (ret) { > - ironlake_teardown_rc6(dev); > - dev_priv->mm.interruptible = was_interruptible; > - return; > - } > + ret = intel_ring_begin(req, 6); > + if (ret) > + goto err; > > intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN); > intel_ring_emit(ring, MI_SET_CONTEXT); > @@ -5547,6 +5549,11 @@ static void ironlake_enable_rc6(struct drm_device *dev) > intel_ring_emit(ring, MI_FLUSH); > intel_ring_advance(ring); > > + ret = i915_add_request_no_flush(req); > + if (ret) > + goto err; > + req = NULL; > + > /* > * Wait for the command parser to advance past MI_SET_CONTEXT. The HW > * does an implicit flush, combined with MI_FLUSH above, it should be > @@ -5554,16 +5561,20 @@ static void ironlake_enable_rc6(struct drm_device *dev) > */ > ret = intel_ring_idle(ring); > dev_priv->mm.interruptible = was_interruptible; > - if (ret) { > - DRM_ERROR("failed to enable ironlake power savings\n"); > - ironlake_teardown_rc6(dev); > - return; > - } > + if (ret) > + goto err; > > I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN); > I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT); > > intel_print_rc6_info(dev, GEN6_RC_CTL_RC6_ENABLE); > + > +err: > + DRM_ERROR("failed to enable ironlake power savings\n"); > + ironlake_teardown_rc6(dev); > + dev_priv->mm.interruptible = was_interruptible; > + if (req) > + i915_gem_request_unreference(req); > } > > static unsigned long intel_pxfreq(u32 vidfreq) > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index b60e59b..e6e7bb5 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -91,10 +91,11 @@ void __intel_ring_advance(struct intel_engine_cs *ring) > } > > static int > -gen2_render_ring_flush(struct intel_engine_cs *ring, > +gen2_render_ring_flush(struct drm_i915_gem_request *req, > u32 invalidate_domains, > u32 flush_domains) > { > + struct intel_engine_cs *ring = req->ring; > u32 cmd; > int ret; > > @@ -105,7 +106,7 @@ gen2_render_ring_flush(struct intel_engine_cs *ring, > if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) > cmd |= MI_READ_FLUSH; > > - ret = intel_ring_begin(ring, 2); > + ret = intel_ring_begin(req, 2); > if (ret) > return ret; > > @@ -117,10 +118,11 @@ gen2_render_ring_flush(struct intel_engine_cs *ring, > } > > static int > -gen4_render_ring_flush(struct intel_engine_cs *ring, > +gen4_render_ring_flush(struct drm_i915_gem_request *req, > u32 invalidate_domains, > u32 flush_domains) > { > + struct intel_engine_cs *ring = req->ring; > struct drm_device *dev = ring->dev; > u32 cmd; > int ret; > @@ -163,7 +165,7 @@ gen4_render_ring_flush(struct intel_engine_cs *ring, > (IS_G4X(dev) || IS_GEN5(dev))) > cmd |= MI_INVALIDATE_ISP; > > - ret = intel_ring_begin(ring, 2); > + ret = intel_ring_begin(req, 2); > if (ret) > return ret; > > @@ -212,12 +214,13 @@ gen4_render_ring_flush(struct intel_engine_cs *ring, > * really our business. That leaves only stall at scoreboard. > */ > static int > -intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring) > +intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; > int ret; > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -230,7 +233,7 @@ intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring) > intel_ring_emit(ring, MI_NOOP); > intel_ring_advance(ring); > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -246,15 +249,16 @@ intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring) > } > > static int > -gen6_render_ring_flush(struct intel_engine_cs *ring, > - u32 invalidate_domains, u32 flush_domains) > +gen6_render_ring_flush(struct drm_i915_gem_request *req, > + u32 invalidate_domains, u32 flush_domains) > { > + struct intel_engine_cs *ring = req->ring; > u32 flags = 0; > u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; > int ret; > > /* Force SNB workarounds for PIPE_CONTROL flushes */ > - ret = intel_emit_post_sync_nonzero_flush(ring); > + ret = intel_emit_post_sync_nonzero_flush(req); > if (ret) > return ret; > > @@ -284,7 +288,7 @@ gen6_render_ring_flush(struct intel_engine_cs *ring, > flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; > } > > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > @@ -298,11 +302,12 @@ gen6_render_ring_flush(struct intel_engine_cs *ring, > } > > static int > -gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring) > +gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > @@ -316,14 +321,15 @@ gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring) > return 0; > } > > -static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value) > +static int gen7_ring_fbc_flush(struct drm_i915_gem_request *req, u32 value) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > if (!ring->fbc_dirty) > return 0; > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > /* WaFbcNukeOn3DBlt:ivb/hsw */ > @@ -340,9 +346,10 @@ static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value) > } > > static int > -gen7_render_ring_flush(struct intel_engine_cs *ring, > +gen7_render_ring_flush(struct drm_i915_gem_request *req, > u32 invalidate_domains, u32 flush_domains) > { > + struct intel_engine_cs *ring = req->ring; > u32 flags = 0; > u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; > int ret; > @@ -381,10 +388,10 @@ gen7_render_ring_flush(struct intel_engine_cs *ring, > /* Workaround: we must issue a pipe_control with CS-stall bit > * set before a pipe_control command that has the state cache > * invalidate bit set. */ > - gen7_render_ring_cs_stall_wa(ring); > + gen7_render_ring_cs_stall_wa(req); > } > > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > @@ -395,18 +402,19 @@ gen7_render_ring_flush(struct intel_engine_cs *ring, > intel_ring_advance(ring); > > if (!invalidate_domains && flush_domains) > - return gen7_ring_fbc_flush(ring, FBC_REND_NUKE); > + return gen7_ring_fbc_flush(req, FBC_REND_NUKE); > > return 0; > } > > static int > -gen8_emit_pipe_control(struct intel_engine_cs *ring, > +gen8_emit_pipe_control(struct drm_i915_gem_request *req, > u32 flags, u32 scratch_addr) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -422,11 +430,11 @@ gen8_emit_pipe_control(struct intel_engine_cs *ring, > } > > static int > -gen8_render_ring_flush(struct intel_engine_cs *ring, > +gen8_render_ring_flush(struct drm_i915_gem_request *req, > u32 invalidate_domains, u32 flush_domains) > { > u32 flags = 0; > - u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; > + u32 scratch_addr = req->ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; > int ret; > > flags |= PIPE_CONTROL_CS_STALL; > @@ -446,7 +454,7 @@ gen8_render_ring_flush(struct intel_engine_cs *ring, > flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; > > /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */ > - ret = gen8_emit_pipe_control(ring, > + ret = gen8_emit_pipe_control(req, > PIPE_CONTROL_CS_STALL | > PIPE_CONTROL_STALL_AT_SCOREBOARD, > 0); > @@ -454,12 +462,12 @@ gen8_render_ring_flush(struct intel_engine_cs *ring, > return ret; > } > > - ret = gen8_emit_pipe_control(ring, flags, scratch_addr); > + ret = gen8_emit_pipe_control(req, flags, scratch_addr); > if (ret) > return ret; > > if (!invalidate_domains && flush_domains) > - return gen7_ring_fbc_flush(ring, FBC_REND_NUKE); > + return gen7_ring_fbc_flush(req, FBC_REND_NUKE); > > return 0; > } > @@ -670,9 +678,10 @@ err: > return ret; > } > > -static int intel_ring_workarounds_emit(struct intel_engine_cs *ring, > +static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req, > struct intel_context *ctx) > { > + struct intel_engine_cs *ring = req->ring; > int ret, i; > struct drm_device *dev = ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > @@ -682,11 +691,11 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring, > return 0; > > ring->gpu_caches_dirty = true; > - ret = intel_ring_flush_all_caches(ring); > + ret = intel_ring_flush_all_caches(req); > if (ret) > return ret; > > - ret = intel_ring_begin(ring, (w->count * 2 + 2)); > + ret = intel_ring_begin(req, (w->count * 2 + 2)); > if (ret) > return ret; > > @@ -700,7 +709,7 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring, > intel_ring_advance(ring); > > ring->gpu_caches_dirty = true; > - ret = intel_ring_flush_all_caches(ring); > + ret = intel_ring_flush_all_caches(req); > if (ret) > return ret; > > @@ -898,10 +907,11 @@ static void render_ring_cleanup(struct intel_engine_cs *ring) > intel_fini_pipe_control(ring); > } > > -static int gen8_rcs_signal(struct intel_engine_cs *signaller, > +static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req, > unsigned int num_dwords) > { > #define MBOX_UPDATE_DWORDS 8 > + struct intel_engine_cs *signaller = signaller_req->ring; > struct drm_device *dev = signaller->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_engine_cs *waiter; > @@ -911,7 +921,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller, > num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; > #undef MBOX_UPDATE_DWORDS > > - ret = intel_ring_begin(signaller, num_dwords); > + ret = intel_ring_begin(signaller_req, num_dwords); > if (ret) > return ret; > > @@ -921,8 +931,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller, > if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) > continue; > > - seqno = i915_gem_request_get_seqno( > - signaller->outstanding_lazy_request); > + seqno = i915_gem_request_get_seqno(signaller_req); > intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6)); > intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB | > PIPE_CONTROL_QW_WRITE | > @@ -939,10 +948,11 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller, > return 0; > } > > -static int gen8_xcs_signal(struct intel_engine_cs *signaller, > +static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, > unsigned int num_dwords) > { > #define MBOX_UPDATE_DWORDS 6 > + struct intel_engine_cs *signaller = signaller_req->ring; > struct drm_device *dev = signaller->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_engine_cs *waiter; > @@ -952,7 +962,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller, > num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; > #undef MBOX_UPDATE_DWORDS > > - ret = intel_ring_begin(signaller, num_dwords); > + ret = intel_ring_begin(signaller_req, num_dwords); > if (ret) > return ret; > > @@ -962,8 +972,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller, > if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) > continue; > > - seqno = i915_gem_request_get_seqno( > - signaller->outstanding_lazy_request); > + seqno = i915_gem_request_get_seqno(signaller_req); > intel_ring_emit(signaller, (MI_FLUSH_DW + 1) | > MI_FLUSH_DW_OP_STOREDW); > intel_ring_emit(signaller, lower_32_bits(gtt_offset) | > @@ -978,9 +987,10 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller, > return 0; > } > > -static int gen6_signal(struct intel_engine_cs *signaller, > +static int gen6_signal(struct drm_i915_gem_request *signaller_req, > unsigned int num_dwords) > { > + struct intel_engine_cs *signaller = signaller_req->ring; > struct drm_device *dev = signaller->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_engine_cs *useless; > @@ -991,15 +1001,14 @@ static int gen6_signal(struct intel_engine_cs *signaller, > num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2); > #undef MBOX_UPDATE_DWORDS > > - ret = intel_ring_begin(signaller, num_dwords); > + ret = intel_ring_begin(signaller_req, num_dwords); > if (ret) > return ret; > > for_each_ring(useless, dev_priv, i) { > u32 mbox_reg = signaller->semaphore.mbox.signal[i]; > if (mbox_reg != GEN6_NOSYNC) { > - u32 seqno = i915_gem_request_get_seqno( > - signaller->outstanding_lazy_request); > + u32 seqno = i915_gem_request_get_seqno(signaller_req); > intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1)); > intel_ring_emit(signaller, mbox_reg); > intel_ring_emit(signaller, seqno); > @@ -1016,29 +1025,28 @@ static int gen6_signal(struct intel_engine_cs *signaller, > /** > * gen6_add_request - Update the semaphore mailbox registers > * > - * @ring - ring that is adding a request > - * @seqno - return seqno stuck into the ring > + * @request - request to write to the ring > * > * Update the mailbox registers in the *other* rings with the current seqno. > * This acts like a signal in the canonical semaphore. > */ > static int > -gen6_add_request(struct intel_engine_cs *ring) > +gen6_add_request(struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > if (ring->semaphore.signal) > - ret = ring->semaphore.signal(ring, 4); > + ret = ring->semaphore.signal(req, 4); > else > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > > if (ret) > return ret; > > intel_ring_emit(ring, MI_STORE_DWORD_INDEX); > intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); > - intel_ring_emit(ring, > - i915_gem_request_get_seqno(ring->outstanding_lazy_request)); > + intel_ring_emit(ring, i915_gem_request_get_seqno(req)); > intel_ring_emit(ring, MI_USER_INTERRUPT); > __intel_ring_advance(ring); > > @@ -1061,14 +1069,15 @@ static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev, > */ > > static int > -gen8_ring_sync(struct intel_engine_cs *waiter, > +gen8_ring_sync(struct drm_i915_gem_request *waiter_req, > struct intel_engine_cs *signaller, > u32 seqno) > { > + struct intel_engine_cs *waiter = waiter_req->ring; > struct drm_i915_private *dev_priv = waiter->dev->dev_private; > int ret; > > - ret = intel_ring_begin(waiter, 4); > + ret = intel_ring_begin(waiter_req, 4); > if (ret) > return ret; > > @@ -1086,10 +1095,11 @@ gen8_ring_sync(struct intel_engine_cs *waiter, > } > > static int > -gen6_ring_sync(struct intel_engine_cs *waiter, > +gen6_ring_sync(struct drm_i915_gem_request *waiter_req, > struct intel_engine_cs *signaller, > u32 seqno) > { > + struct intel_engine_cs *waiter = waiter_req->ring; > u32 dw1 = MI_SEMAPHORE_MBOX | > MI_SEMAPHORE_COMPARE | > MI_SEMAPHORE_REGISTER; > @@ -1104,7 +1114,7 @@ gen6_ring_sync(struct intel_engine_cs *waiter, > > WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); > > - ret = intel_ring_begin(waiter, 4); > + ret = intel_ring_begin(waiter_req, 4); > if (ret) > return ret; > > @@ -1135,8 +1145,9 @@ do { \ > } while (0) > > static int > -pc_render_add_request(struct intel_engine_cs *ring) > +pc_render_add_request(struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; > int ret; > > @@ -1148,7 +1159,7 @@ pc_render_add_request(struct intel_engine_cs *ring) > * incoherence by flushing the 6 PIPE_NOTIFY buffers out to > * memory before requesting an interrupt. > */ > - ret = intel_ring_begin(ring, 32); > + ret = intel_ring_begin(req, 32); > if (ret) > return ret; > > @@ -1156,8 +1167,7 @@ pc_render_add_request(struct intel_engine_cs *ring) > PIPE_CONTROL_WRITE_FLUSH | > PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); > intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); > - intel_ring_emit(ring, > - i915_gem_request_get_seqno(ring->outstanding_lazy_request)); > + intel_ring_emit(ring, i915_gem_request_get_seqno(req)); > intel_ring_emit(ring, 0); > PIPE_CONTROL_FLUSH(ring, scratch_addr); > scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */ > @@ -1176,8 +1186,7 @@ pc_render_add_request(struct intel_engine_cs *ring) > PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | > PIPE_CONTROL_NOTIFY); > intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); > - intel_ring_emit(ring, > - i915_gem_request_get_seqno(ring->outstanding_lazy_request)); > + intel_ring_emit(ring, i915_gem_request_get_seqno(req)); > intel_ring_emit(ring, 0); > __intel_ring_advance(ring); > > @@ -1390,13 +1399,14 @@ void intel_ring_setup_status_page(struct intel_engine_cs *ring) > } > > static int > -bsd_ring_flush(struct intel_engine_cs *ring, > +bsd_ring_flush(struct drm_i915_gem_request *req, > u32 invalidate_domains, > u32 flush_domains) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > - ret = intel_ring_begin(ring, 2); > + ret = intel_ring_begin(req, 2); > if (ret) > return ret; > > @@ -1407,18 +1417,18 @@ bsd_ring_flush(struct intel_engine_cs *ring, > } > > static int > -i9xx_add_request(struct intel_engine_cs *ring) > +i9xx_add_request(struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > intel_ring_emit(ring, MI_STORE_DWORD_INDEX); > intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); > - intel_ring_emit(ring, > - i915_gem_request_get_seqno(ring->outstanding_lazy_request)); > + intel_ring_emit(ring, i915_gem_request_get_seqno(req)); > intel_ring_emit(ring, MI_USER_INTERRUPT); > __intel_ring_advance(ring); > > @@ -1550,13 +1560,14 @@ gen8_ring_put_irq(struct intel_engine_cs *ring) > } > > static int > -i965_dispatch_execbuffer(struct intel_engine_cs *ring, > +i965_dispatch_execbuffer(struct drm_i915_gem_request *req, > u64 offset, u32 length, > unsigned flags) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > - ret = intel_ring_begin(ring, 2); > + ret = intel_ring_begin(req, 2); > if (ret) > return ret; > > @@ -1575,14 +1586,15 @@ i965_dispatch_execbuffer(struct intel_engine_cs *ring, > #define I830_TLB_ENTRIES (2) > #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) > static int > -i830_dispatch_execbuffer(struct intel_engine_cs *ring, > - u64 offset, u32 len, > - unsigned flags) > +i830_dispatch_execbuffer(struct drm_i915_gem_request *req, > + u64 offset, u32 len, > + unsigned flags) > { > + struct intel_engine_cs *ring = req->ring; > u32 cs_offset = ring->scratch.gtt_offset; > int ret; > > - ret = intel_ring_begin(ring, 6); > + ret = intel_ring_begin(req, 6); > if (ret) > return ret; > > @@ -1599,7 +1611,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring, > if (len > I830_BATCH_LIMIT) > return -ENOSPC; > > - ret = intel_ring_begin(ring, 6 + 2); > + ret = intel_ring_begin(req, 6 + 2); > if (ret) > return ret; > > @@ -1622,7 +1634,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring, > offset = cs_offset; > } > > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > @@ -1636,13 +1648,14 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring, > } > > static int > -i915_dispatch_execbuffer(struct intel_engine_cs *ring, > +i915_dispatch_execbuffer(struct drm_i915_gem_request *req, > u64 offset, u32 len, > unsigned flags) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > - ret = intel_ring_begin(ring, 2); > + ret = intel_ring_begin(req, 2); > if (ret) > return ret; > > @@ -1885,6 +1898,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring) > > intel_unpin_ringbuffer_obj(ringbuf); > intel_destroy_ringbuffer_obj(ringbuf); > + WARN_ON(ring->outstanding_lazy_request); > i915_gem_request_assign(&ring->outstanding_lazy_request, NULL); > > if (ring->cleanup) > @@ -2007,8 +2021,9 @@ int intel_ring_idle(struct intel_engine_cs *ring) > int ret; > > /* We need to add any requests required to flush the objects and ring */ > + WARN_ON(ring->outstanding_lazy_request); > if (ring->outstanding_lazy_request) { > - ret = i915_add_request(ring); > + ret = i915_add_request(ring->outstanding_lazy_request); > if (ret) > return ret; > } > @@ -2025,13 +2040,18 @@ int intel_ring_idle(struct intel_engine_cs *ring) > } > > int > -intel_ring_alloc_request(struct intel_engine_cs *ring, struct intel_context *ctx) > +intel_ring_alloc_request(struct intel_engine_cs *ring, > + struct intel_context *ctx, > + struct drm_i915_gem_request **req_out) > { > int ret; > struct drm_i915_gem_request *request; > struct drm_i915_private *dev_private = ring->dev->dev_private; > > - if (ring->outstanding_lazy_request) > + if (!req_out) > + return -EINVAL; > + > + if ((*req_out = ring->outstanding_lazy_request) != NULL) > return 0; > > request = kzalloc(sizeof(*request), GFP_KERNEL); > @@ -2053,7 +2073,7 @@ intel_ring_alloc_request(struct intel_engine_cs *ring, struct intel_context *ctx > spewThisReq(request, "\x1B[32mCreated: %d:%d, ref => %d\x1B[0m", request->uniq, request->seqno, request->ref.refcount.counter); > > //printk(KERN_INFO "%s:%d> <%s> OLR = 0x%p, uniq = %d, seqno = %d\n", __func__, __LINE__, ring->name, request, request->uniq, request->seqno); > - ring->outstanding_lazy_request = request; > + *req_out = ring->outstanding_lazy_request = request; > return 0; > } > > @@ -2078,9 +2098,10 @@ static int __intel_ring_prepare(struct intel_engine_cs *ring, > return 0; > } > > -int intel_ring_begin(struct intel_engine_cs *ring, > +int intel_ring_begin(struct drm_i915_gem_request *req, > int num_dwords) > { > + struct intel_engine_cs *ring = req->ring; > struct drm_i915_private *dev_priv = ring->dev->dev_private; > int ret; > > @@ -2093,18 +2114,14 @@ int intel_ring_begin(struct intel_engine_cs *ring, > if (ret) > return ret; > > - /* Preallocate the olr before touching the ring */ > - ret = intel_ring_alloc_request(ring, NULL); > - if (ret) > - return ret; > - > ring->buffer->space -= num_dwords * sizeof(uint32_t); > return 0; > } > > /* Align the ring tail to a cacheline boundary */ > -int intel_ring_cacheline_align(struct intel_engine_cs *ring) > +int intel_ring_cacheline_align(struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); > int ret; > > @@ -2112,7 +2129,7 @@ int intel_ring_cacheline_align(struct intel_engine_cs *ring) > return 0; > > num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords; > - ret = intel_ring_begin(ring, num_dwords); > + ret = intel_ring_begin(req, num_dwords); > if (ret) > return ret; > > @@ -2176,13 +2193,14 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring, > _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); > } > > -static int gen6_bsd_ring_flush(struct intel_engine_cs *ring, > +static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, > u32 invalidate, u32 flush) > { > + struct intel_engine_cs *ring = req->ring; > uint32_t cmd; > int ret; > > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > @@ -2212,14 +2230,15 @@ static int gen6_bsd_ring_flush(struct intel_engine_cs *ring, > } > > static int > -gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, > +gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, > u64 offset, u32 len, > unsigned flags) > { > + struct intel_engine_cs *ring = req->ring; > bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE); > int ret; > > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > @@ -2234,13 +2253,14 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, > } > > static int > -hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, > +hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, > u64 offset, u32 len, > unsigned flags) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > - ret = intel_ring_begin(ring, 2); > + ret = intel_ring_begin(req, 2); > if (ret) > return ret; > > @@ -2256,13 +2276,14 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, > } > > static int > -gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring, > +gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, > u64 offset, u32 len, > unsigned flags) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > - ret = intel_ring_begin(ring, 2); > + ret = intel_ring_begin(req, 2); > if (ret) > return ret; > > @@ -2278,15 +2299,16 @@ gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring, > > /* Blitter support (SandyBridge+) */ > > -static int gen6_ring_flush(struct intel_engine_cs *ring, > +static int gen6_ring_flush(struct drm_i915_gem_request *req, > u32 invalidate, u32 flush) > { > + struct intel_engine_cs *ring = req->ring; > struct drm_device *dev = ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > uint32_t cmd; > int ret; > > - ret = intel_ring_begin(ring, 4); > + ret = intel_ring_begin(req, 4); > if (ret) > return ret; > > @@ -2315,7 +2337,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring, > > if (!invalidate && flush) { > if (IS_GEN7(dev)) > - return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN); > + return gen7_ring_fbc_flush(req, FBC_REND_CACHE_CLEAN); > else if (IS_BROADWELL(dev)) > dev_priv->fbc.need_sw_cache_clean = true; > } > @@ -2696,14 +2718,15 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) > } > > int > -intel_ring_flush_all_caches(struct intel_engine_cs *ring) > +intel_ring_flush_all_caches(struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > int ret; > > if (!ring->gpu_caches_dirty) > return 0; > > - ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); > + ret = ring->flush(req, 0, I915_GEM_GPU_DOMAINS); > if (ret) > return ret; > > @@ -2714,8 +2737,9 @@ intel_ring_flush_all_caches(struct intel_engine_cs *ring) > } > > int > -intel_ring_invalidate_all_caches(struct intel_engine_cs *ring) > +intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req) > { > + struct intel_engine_cs *ring = req->ring; > uint32_t flush_domains; > int ret; > > @@ -2723,7 +2747,7 @@ intel_ring_invalidate_all_caches(struct intel_engine_cs *ring) > if (ring->gpu_caches_dirty) > flush_domains = I915_GEM_GPU_DOMAINS; > > - ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); > + ret = ring->flush(req, I915_GEM_GPU_DOMAINS, flush_domains); > if (ret) > return ret; > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 48cbb00..a7e47ad 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -154,15 +154,15 @@ struct intel_engine_cs { > > int (*init_hw)(struct intel_engine_cs *ring); > > - int (*init_context)(struct intel_engine_cs *ring, > + int (*init_context)(struct drm_i915_gem_request *req, > struct intel_context *ctx); > > void (*write_tail)(struct intel_engine_cs *ring, > u32 value); > - int __must_check (*flush)(struct intel_engine_cs *ring, > + int __must_check (*flush)(struct drm_i915_gem_request *req, > u32 invalidate_domains, > u32 flush_domains); > - int (*add_request)(struct intel_engine_cs *ring); > + int (*add_request)(struct drm_i915_gem_request *req); > /* Some chipsets are not quite as coherent as advertised and need > * an expensive kick to force a true read of the up-to-date seqno. > * However, the up-to-date seqno is not always required and the last > @@ -173,7 +173,7 @@ struct intel_engine_cs { > bool lazy_coherency); > void (*set_seqno)(struct intel_engine_cs *ring, > u32 seqno); > - int (*dispatch_execbuffer)(struct intel_engine_cs *ring, > + int (*dispatch_execbuffer)(struct drm_i915_gem_request *req, > u64 offset, u32 length, > unsigned dispatch_flags); > #define I915_DISPATCH_SECURE 0x1 > @@ -231,10 +231,10 @@ struct intel_engine_cs { > }; > > /* AKA wait() */ > - int (*sync_to)(struct intel_engine_cs *ring, > - struct intel_engine_cs *to, > + int (*sync_to)(struct drm_i915_gem_request *to_req, > + struct intel_engine_cs *from, > u32 seqno); > - int (*signal)(struct intel_engine_cs *signaller, > + int (*signal)(struct drm_i915_gem_request *signaller_req, > /* num_dwords needed by caller */ > unsigned int num_dwords); > } semaphore; > @@ -245,11 +245,11 @@ struct intel_engine_cs { > struct list_head execlist_retired_req_list; > u8 next_context_status_buffer; > u32 irq_keep_mask; /* bitmask for interrupts that should not be masked */ > - int (*emit_request)(struct intel_ringbuffer *ringbuf); > - int (*emit_flush)(struct intel_ringbuffer *ringbuf, > + int (*emit_request)(struct drm_i915_gem_request *req); > + int (*emit_flush)(struct drm_i915_gem_request *req, > u32 invalidate_domains, > u32 flush_domains); > - int (*emit_bb_start)(struct intel_ringbuffer *ringbuf, > + int (*emit_bb_start)(struct drm_i915_gem_request *req, > u64 offset, unsigned flags); > > /** > @@ -433,10 +433,11 @@ int intel_alloc_ringbuffer_obj(struct drm_device *dev, > void intel_stop_ring_buffer(struct intel_engine_cs *ring); > void intel_cleanup_ring_buffer(struct intel_engine_cs *ring); > > -int __must_check intel_ring_begin(struct intel_engine_cs *ring, int n); > -int __must_check intel_ring_cacheline_align(struct intel_engine_cs *ring); > +int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n); > +int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); > int __must_check intel_ring_alloc_request(struct intel_engine_cs *ring, > - struct intel_context *ctx); > + struct intel_context *ctx, > + struct drm_i915_gem_request **req_out); > static inline void intel_ring_emit(struct intel_engine_cs *ring, > u32 data) > { > @@ -457,8 +458,8 @@ void __intel_ring_advance(struct intel_engine_cs *ring); > > int __must_check intel_ring_idle(struct intel_engine_cs *ring); > void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno); > -int intel_ring_flush_all_caches(struct intel_engine_cs *ring); > -int intel_ring_invalidate_all_caches(struct intel_engine_cs *ring); > +int intel_ring_flush_all_caches(struct drm_i915_gem_request *req); > +int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req); > > void intel_fini_pipe_control(struct intel_engine_cs *ring); > int intel_init_pipe_control(struct intel_engine_cs *ring); > @@ -479,11 +480,4 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf) > return ringbuf->tail; > } > > -static inline struct drm_i915_gem_request * > -intel_ring_get_request(struct intel_engine_cs *ring) > -{ > - BUG_ON(ring->outstanding_lazy_request == NULL); > - return ring->outstanding_lazy_request; > -} > - > #endif /* _INTEL_RINGBUFFER_H_ */ > -- > 1.7.9.5 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 511f55f..7b4309e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -513,7 +513,7 @@ struct drm_i915_display_funcs { int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_i915_gem_object *obj, - struct intel_engine_cs *ring, + struct drm_i915_gem_request *req, uint32_t flags); void (*update_primary_plane)(struct drm_crtc *crtc, struct drm_framebuffer *fb, @@ -1796,7 +1796,8 @@ struct drm_i915_private { /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct { int (*alloc_request)(struct intel_engine_cs *ring, - struct intel_context *ctx); + struct intel_context *ctx, + struct drm_i915_gem_request **req_out); int (*do_execbuf)(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, struct list_head *vmas); @@ -2511,10 +2512,10 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); void i915_gem_execbuffer_move_to_active(struct list_head *vmas, - struct intel_engine_cs *ring); + struct drm_i915_gem_request *req); void i915_gem_execbuffer_retire_commands(struct drm_device *dev, struct drm_file *file, - struct intel_engine_cs *ring, + struct drm_i915_gem_request *req, struct drm_i915_gem_object *obj); void i915_gem_execbuff_release_batch_obj(struct drm_i915_gem_object *batch_obj); int i915_gem_ringbuffer_submission(struct i915_execbuffer_params *qe, @@ -2609,9 +2610,9 @@ int __must_check __i915_mutex_lock_interruptible(struct drm_device *dev, const c int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); #endif int i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_engine_cs *to, bool add_request); + struct drm_i915_gem_request *to_req); void i915_vma_move_to_active(struct i915_vma *vma, - struct intel_engine_cs *ring); + struct drm_i915_gem_request *req); int i915_gem_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); @@ -2678,19 +2679,19 @@ int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); int __must_check i915_gem_init(struct drm_device *dev); int i915_gem_init_rings(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); -int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice); +int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice); void i915_gem_init_swizzling(struct drm_device *dev); void i915_gem_cleanup_ringbuffer(struct drm_device *dev); int __must_check i915_gpu_idle(struct drm_device *dev); int __must_check i915_gem_suspend(struct drm_device *dev); -int __i915_add_request(struct intel_engine_cs *ring, +int __i915_add_request(struct drm_i915_gem_request *req, struct drm_file *file, struct drm_i915_gem_object *batch_obj, bool flush_caches); -#define i915_add_request(ring) \ - __i915_add_request(ring, NULL, NULL, true) -#define i915_add_request_no_flush(ring) \ - __i915_add_request(ring, NULL, NULL, false) +#define i915_add_request(req) \ + __i915_add_request(req, NULL, NULL, true) +#define i915_add_request_no_flush(req) \ + __i915_add_request(req, NULL, NULL, false) int __i915_wait_request(struct drm_i915_gem_request *req, unsigned reset_counter, bool interruptible, @@ -2810,9 +2811,9 @@ int __must_check i915_gem_context_init(struct drm_device *dev); void i915_gem_context_fini(struct drm_device *dev); void i915_gem_context_reset(struct drm_device *dev); int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); -int i915_gem_context_enable(struct drm_i915_private *dev_priv); +int i915_gem_context_enable(struct drm_i915_gem_request *req); void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); -int i915_switch_context(struct intel_engine_cs *ring, +int i915_switch_context(struct drm_i915_gem_request *req, struct intel_context *to); struct intel_context * i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1d2cbfb..dbfb4e5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1178,7 +1178,7 @@ i915_gem_check_olr(struct drm_i915_gem_request *req) ret = 0; if (req == req->ring->outstanding_lazy_request) - ret = i915_add_request(req->ring); + ret = i915_add_request(req); return ret; } @@ -2294,17 +2294,16 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) static void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, - struct intel_engine_cs *ring) + struct drm_i915_gem_request *req) { - struct drm_i915_gem_request *req; - struct intel_engine_cs *old_ring; + struct intel_engine_cs *new_ring, *old_ring; - BUG_ON(ring == NULL); + BUG_ON(req == NULL); - req = intel_ring_get_request(ring); + new_ring = i915_gem_request_get_ring(req); old_ring = i915_gem_request_get_ring(obj->last_read_req); - if (old_ring != ring && obj->last_write_req) { + if (old_ring != new_ring && obj->last_write_req) { /* Keep the request relative to the current ring */ i915_gem_request_assign(&obj->last_write_req, req); } @@ -2315,17 +2314,17 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, obj->active = 1; } - list_move_tail(&obj->ring_list, &ring->active_list); + list_move_tail(&obj->ring_list, &new_ring->active_list); - //printk(KERN_INFO "%s:%d> <%s> obj = %p, last_read_req <= 0x%p\n", __func__, __LINE__, ring->name, obj, req); + //printk(KERN_INFO "%s:%d> <%s> obj = %p, last_read_req <= 0x%p\n", __func__, __LINE__, new_ring->name, obj, req); i915_gem_request_assign(&obj->last_read_req, req); } void i915_vma_move_to_active(struct i915_vma *vma, - struct intel_engine_cs *ring) + struct drm_i915_gem_request *req) { list_move_tail(&vma->mm_list, &vma->vm->active_list); - return i915_gem_object_move_to_active(vma->obj, ring); + return i915_gem_object_move_to_active(vma->obj, req); } static void @@ -2440,26 +2439,35 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) return 0; } -int __i915_add_request(struct intel_engine_cs *ring, +int __i915_add_request(struct drm_i915_gem_request *request, struct drm_file *file, struct drm_i915_gem_object *obj, bool flush_caches) { - struct drm_i915_private *dev_priv = ring->dev->dev_private; - struct drm_i915_gem_request *request; + struct intel_engine_cs *ring; + struct drm_i915_private *dev_priv; struct intel_ringbuffer *ringbuf; u32 request_ring_position, request_start; int ret; - request = ring->outstanding_lazy_request; + /*printk( KERN_ERR "<%s> request %c %d:%d, OLR %c %d:%d\n", + request ? request->ring->name : "???", + request ? '=' : '?', + request ? request->uniq : -1, + request ? request->seqno : 0, + request->ring->outstanding_lazy_request ? '=' : '?', + request->ring->outstanding_lazy_request ? request->ring->outstanding_lazy_request->uniq : -1, + request->ring->outstanding_lazy_request ? request->ring->outstanding_lazy_request->seqno : 0);*/ + //dump_stack(); + if (WARN_ON(request == NULL)) return -ENOMEM; - if (i915.enable_execlists) { - struct intel_context *ctx = request->ctx; - ringbuf = ctx->engine[ring->id].ringbuf; - } else - ringbuf = ring->buffer; + ring = request->ring; + dev_priv = ring->dev->dev_private; + ringbuf = request->ringbuf; + + WARN_ON(request != ring->outstanding_lazy_request); request_start = intel_ring_get_tail(ringbuf); /* @@ -2471,9 +2479,9 @@ int __i915_add_request(struct intel_engine_cs *ring, */ if (flush_caches) { if (i915.enable_execlists) - ret = logical_ring_flush_all_caches(ringbuf); + ret = logical_ring_flush_all_caches(request); else - ret = intel_ring_flush_all_caches(ring); + ret = intel_ring_flush_all_caches(request); if (ret) return ret; } @@ -2488,9 +2496,9 @@ int __i915_add_request(struct intel_engine_cs *ring, request_ring_position = intel_ring_get_tail(ringbuf); if (i915.enable_execlists) - ret = ring->emit_request(ringbuf); + ret = ring->emit_request(request); else - ret = ring->add_request(ring); + ret = ring->add_request(request); if (ret) return ret; @@ -2504,7 +2512,8 @@ int __i915_add_request(struct intel_engine_cs *ring, * inactive_list and lose its active reference. Hence we do not need * to explicitly hold another reference here. */ - request->batch_obj = obj; + if (obj) + request->batch_obj = obj; if (!i915.enable_execlists) { /* Hold a reference to the current context so that we can inspect @@ -2744,6 +2753,7 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, #endif /* This may not have been flushed before the reset, so clean it now */ + WARN_ON(ring->outstanding_lazy_request); i915_gem_request_assign(&ring->outstanding_lazy_request, NULL); } @@ -3114,8 +3124,6 @@ out: * * @obj: object which may be in use on another ring. * @to: ring we wish to use the object on. May be NULL. - * @add_request: do we need to add a request to track operations - * submitted on ring with sync_to function * * This code is meant to abstract object synchronization with the GPU. * Calling with NULL implies synchronizing the object with the CPU @@ -3125,8 +3133,9 @@ out: */ int i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_engine_cs *to, bool add_request) + struct drm_i915_gem_request *to_req) { + struct intel_engine_cs *to = to_req->ring; struct intel_engine_cs *from; u32 seqno; int ret, idx; @@ -3152,7 +3161,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, return ret; trace_i915_gem_ring_sync_to(from, to, obj->last_read_req); - ret = to->semaphore.sync_to(to, from, seqno); + ret = to->semaphore.sync_to(to_req, from, seqno); if (!ret) { /* We use last_read_req because sync_to() * might have just caused seqno wrap under @@ -3160,8 +3169,6 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, */ from->semaphore.sync_seqno[idx] = i915_gem_request_get_seqno(obj->last_read_req); - if (add_request) - i915_add_request_no_flush(to); } return ret; @@ -3266,18 +3273,23 @@ int i915_gpu_idle(struct drm_device *dev) /* Flush everything onto the inactive list. */ for_each_ring(ring, dev_priv, i) { if (!i915.enable_execlists) { - ret = i915_switch_context(ring, ring->default_context); + struct drm_i915_gem_request *req; + + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); if (ret) return ret; - } - /* Make sure the context switch (if one actually happened) - * gets wrapped up and finished rather than hanging around - * and confusing things later. */ - if (ring->outstanding_lazy_request) { - ret = i915_add_request(ring); - if (ret) + ret = i915_switch_context(req, ring->default_context); + if (ret) { + i915_gem_request_unreference(req); return ret; + } + + ret = i915_add_request_no_flush(req); + if (ret) { + i915_gem_request_unreference(req); + return ret; + } } ret = intel_ring_idle(ring); @@ -4099,8 +4111,19 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, bool was_pin_display; int ret; - if (pipelined != i915_gem_request_get_ring(obj->last_read_req)) { - ret = i915_gem_object_sync(obj, pipelined, true); + if (pipelined && (pipelined != i915_gem_request_get_ring(obj->last_read_req))) { + struct drm_i915_private *dev_priv = pipelined->dev->dev_private; + struct drm_i915_gem_request *req; + + ret = dev_priv->gt.alloc_request(pipelined, pipelined->default_context, &req); + if (ret) + return ret; + + ret = i915_gem_object_sync(obj, req); + if (ret) + return ret; + + ret = i915_add_request_no_flush(req); if (ret) return ret; } @@ -4771,8 +4794,9 @@ err: return ret; } -int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice) +int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) { + struct intel_engine_cs *ring = req->ring; struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); @@ -4782,7 +4806,7 @@ int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice) if (!HAS_L3_DPF(dev) || !remap_info) return 0; - ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); + ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3); if (ret) return ret; @@ -4962,37 +4986,67 @@ i915_gem_init_hw(struct drm_device *dev) */ init_unused_rings(dev); + BUG_ON(!dev_priv->ring[RCS].default_context); + + ret = i915_ppgtt_init_hw(dev); + if (ret) { + DRM_ERROR("PPGTT enable failed %d\n", ret); + i915_gem_cleanup_ringbuffer(dev); + return ret; + } + for_each_ring(ring, dev_priv, i) { + struct drm_i915_gem_request *req; + ret = ring->init_hw(ring); if (ret) return ret; - } - for (i = 0; i < NUM_L3_SLICES(dev); i++) - i915_gem_l3_remap(&dev_priv->ring[RCS], i); + if (!ring->default_context) + continue; - /* - * XXX: Contexts should only be initialized once. Doing a switch to the - * default context switch however is something we'd like to do after - * reset or thaw (the latter may not actually be necessary for HW, but - * goes with our code better). Context switching requires rings (for - * the do_switch), but before enabling PPGTT. So don't move this. - */ - ret = i915_gem_context_enable(dev_priv); - if (ret && ret != -EIO) { - DRM_ERROR("Context enable failed %d\n", ret); - i915_gem_cleanup_ringbuffer(dev); + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); + if (ret) + return ret; - return ret; - } + if (ring->id == RCS) { + for (i = 0; i < NUM_L3_SLICES(dev); i++) + i915_gem_l3_remap(req, i); + } - ret = i915_ppgtt_init_hw(dev); - if (ret && ret != -EIO) { - DRM_ERROR("PPGTT enable failed %d\n", ret); - i915_gem_cleanup_ringbuffer(dev); + /* + * XXX: Contexts should only be initialized once. Doing a switch to the + * default context switch however is something we'd like to do after + * reset or thaw (the latter may not actually be necessary for HW, but + * goes with our code better). Context switching requires rings (for + * the do_switch), but before enabling PPGTT. So don't move this. + */ + ret = i915_gem_context_enable(req); + if (ret && ret != -EIO) { + DRM_ERROR("Context enable failed %d\n", ret); + i915_gem_request_unreference(req); + i915_gem_cleanup_ringbuffer(dev); + + return ret; + } + + ret = i915_ppgtt_init_ring(req); + if (ret && ret != -EIO) { + DRM_ERROR("PPGTT enable failed %d\n", ret); + i915_gem_request_unreference(req); + i915_gem_cleanup_ringbuffer(dev); + } + + ret = i915_add_request_no_flush(req); + if (ret) { + DRM_ERROR("Add request failed: %d\n", ret); + i915_gem_request_unreference(req); + i915_gem_cleanup_ringbuffer(dev); + return ret; + } } - return ret; + return 0; } int i915_gem_init(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index c5e1bfc..72e280b 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -401,41 +401,23 @@ void i915_gem_context_fini(struct drm_device *dev) i915_gem_context_unreference(dctx); } -int i915_gem_context_enable(struct drm_i915_private *dev_priv) +int i915_gem_context_enable(struct drm_i915_gem_request *req) { - struct intel_engine_cs *ring; - int ret, i; - - BUG_ON(!dev_priv->ring[RCS].default_context); + struct intel_engine_cs *ring = req->ring; + int ret; if (i915.enable_execlists) { - for_each_ring(ring, dev_priv, i) { - if (ring->init_context) { - ret = ring->init_context(ring, - ring->default_context); - if (ret) { - DRM_ERROR("ring init context: %d\n", - ret); - return ret; - } - } - } + if (ring->init_context == NULL) + return 0; + ret = ring->init_context(req, ring->default_context); } else - for_each_ring(ring, dev_priv, i) { - ret = i915_switch_context(ring, ring->default_context); - if (ret) - return ret; - - /* Make sure the context switch (if one actually happened) - * gets wrapped up and finished rather than hanging around - * and confusing things later. */ - if (ring->outstanding_lazy_request) { - ret = i915_add_request_no_flush(ring); - if (ret) - return ret; - } - } + ret = i915_switch_context(req, ring->default_context); + + if (ret) { + DRM_ERROR("ring init context: %d\n", ret); + return ret; + } return 0; } @@ -488,10 +470,11 @@ i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id) } static inline int -mi_set_context(struct intel_engine_cs *ring, +mi_set_context(struct drm_i915_gem_request *req, struct intel_context *new_context, u32 hw_flags) { + struct intel_engine_cs *ring = req->ring; u32 flags = hw_flags | MI_MM_SPACE_GTT; int ret; @@ -501,7 +484,7 @@ mi_set_context(struct intel_engine_cs *ring, * itlb_before_ctx_switch. */ if (IS_GEN6(ring->dev)) { - ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, 0); + ret = ring->flush(req, I915_GEM_GPU_DOMAINS, 0); if (ret) return ret; } @@ -510,7 +493,7 @@ mi_set_context(struct intel_engine_cs *ring, if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8) flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN); - ret = intel_ring_begin(ring, 6); + ret = intel_ring_begin(req, 6); if (ret) return ret; @@ -540,9 +523,10 @@ mi_set_context(struct intel_engine_cs *ring, return ret; } -static int do_switch(struct intel_engine_cs *ring, +static int do_switch(struct drm_i915_gem_request *req, struct intel_context *to) { + struct intel_engine_cs *ring = req->ring; struct drm_i915_private *dev_priv = ring->dev->dev_private; struct intel_context *from = ring->last_context; u32 hw_flags = 0; @@ -577,7 +561,7 @@ static int do_switch(struct intel_engine_cs *ring, if (to->ppgtt) { trace_switch_mm(ring, to); - ret = to->ppgtt->switch_mm(to->ppgtt, ring); + ret = to->ppgtt->switch_mm(to->ppgtt, req); if (ret) goto unpin_out; } @@ -608,7 +592,7 @@ static int do_switch(struct intel_engine_cs *ring, if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to)) hw_flags |= MI_RESTORE_INHIBIT; - ret = mi_set_context(ring, to, hw_flags); + ret = mi_set_context(req, to, hw_flags); if (ret) goto unpin_out; @@ -616,7 +600,7 @@ static int do_switch(struct intel_engine_cs *ring, if (!(to->remap_slice & (1<<i))) continue; - ret = i915_gem_l3_remap(ring, i); + ret = i915_gem_l3_remap(req, i); /* If it failed, try again next round */ if (ret) DRM_DEBUG_DRIVER("L3 remapping failed\n"); @@ -632,7 +616,7 @@ static int do_switch(struct intel_engine_cs *ring, */ if (from != NULL) { from->legacy_hw_ctx.rcs_state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; - i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), ring); + i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), req); /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the * whole damn pipeline, we don't need to explicitly mark the * object dirty. The only exception is that the context must be @@ -658,12 +642,12 @@ done: if (uninitialized) { if (ring->init_context) { - ret = ring->init_context(ring, to); + ret = ring->init_context(req, to); if (ret) DRM_ERROR("ring init context: %d\n", ret); } - ret = i915_gem_render_state_init(ring); + ret = i915_gem_render_state_init(req); if (ret) DRM_ERROR("init render state: %d\n", ret); } @@ -690,9 +674,10 @@ unpin_out: * switched by writing to the ELSP and requests keep a reference to their * context. */ -int i915_switch_context(struct intel_engine_cs *ring, +int i915_switch_context(struct drm_i915_gem_request *req, struct intel_context *to) { + struct intel_engine_cs *ring = req->ring; struct drm_i915_private *dev_priv = ring->dev->dev_private; WARN_ON(i915.enable_execlists); @@ -708,7 +693,7 @@ int i915_switch_context(struct intel_engine_cs *ring, return 0; } - return do_switch(ring, to); + return do_switch(req, to); } static bool contexts_enabled(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index ca31673..5caa2a2 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -822,7 +822,7 @@ err: } static int -i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring, +i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, struct list_head *vmas) { struct i915_vma *vma; @@ -832,7 +832,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; - ret = i915_gem_object_sync(obj, ring, false); + ret = i915_gem_object_sync(obj, req); if (ret) return ret; @@ -843,7 +843,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring, } if (flush_chipset) - i915_gem_chipset_flush(ring->dev); + i915_gem_chipset_flush(req->ring->dev); if (flush_domains & I915_GEM_DOMAIN_GTT) wmb(); @@ -941,9 +941,9 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, void i915_gem_execbuffer_move_to_active(struct list_head *vmas, - struct intel_engine_cs *ring) + struct drm_i915_gem_request *req) { - struct drm_i915_gem_request *req = intel_ring_get_request(ring); + struct intel_engine_cs *ring = i915_gem_request_get_ring(req); struct i915_vma *vma; list_for_each_entry(vma, vmas, exec_list) { @@ -957,7 +957,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, obj->base.pending_read_domains |= obj->base.read_domains; obj->base.read_domains = obj->base.pending_read_domains; - i915_vma_move_to_active(vma, ring); + i915_vma_move_to_active(vma, req); if (obj->base.write_domain) { obj->dirty = 1; i915_gem_request_assign(&obj->last_write_req, req); @@ -983,20 +983,21 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, void i915_gem_execbuffer_retire_commands(struct drm_device *dev, struct drm_file *file, - struct intel_engine_cs *ring, + struct drm_i915_gem_request *req, struct drm_i915_gem_object *obj) { /* Unconditionally force add_request to emit a full flush. */ - ring->gpu_caches_dirty = true; + req->ring->gpu_caches_dirty = true; /* Add a breadcrumb for the completion of the batch buffer */ - (void)__i915_add_request(ring, file, obj, true); + (void)__i915_add_request(req, file, obj, true); } static int i915_reset_gen7_sol_offsets(struct drm_device *dev, - struct intel_engine_cs *ring) + struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = req->ring; struct drm_i915_private *dev_priv = dev->dev_private; int ret, i; @@ -1005,7 +1006,7 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, return -EINVAL; } - ret = intel_ring_begin(ring, 4 * 3); + ret = intel_ring_begin(req, 4 * 3); if (ret) return ret; @@ -1021,10 +1022,11 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, } static int -i915_emit_box(struct intel_engine_cs *ring, +i915_emit_box(struct drm_i915_gem_request *req, struct drm_clip_rect *box, int DR1, int DR4) { + struct intel_engine_cs *ring = req->ring; int ret; if (box->y2 <= box->y1 || box->x2 <= box->x1 || @@ -1035,7 +1037,7 @@ i915_emit_box(struct intel_engine_cs *ring, } if (INTEL_INFO(ring->dev)->gen >= 4) { - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; @@ -1044,7 +1046,7 @@ i915_emit_box(struct intel_engine_cs *ring, intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16); intel_ring_emit(ring, DR4); } else { - ret = intel_ring_begin(ring, 6); + ret = intel_ring_begin(req, 6); if (ret) return ret; @@ -1151,11 +1153,11 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, goto error; } - ret = i915_gem_execbuffer_move_to_gpu(ring, vmas); + ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas); if (ret) goto error; - i915_gem_execbuffer_move_to_active(vmas, ring); + i915_gem_execbuffer_move_to_active(vmas, params->request); /* Make sure the OLR hasn't advanced (which would indicate a flush * of the work in progress which in turn would be a Bad Thing). */ @@ -1200,18 +1202,18 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params) /* Unconditionally invalidate gpu caches and ensure that we do flush * any residual writes from the previous batch. */ - ret = intel_ring_invalidate_all_caches(ring); + ret = intel_ring_invalidate_all_caches(params->request); if (ret) goto error; /* Switch to the correct context for the batch */ - ret = i915_switch_context(ring, params->ctx); + ret = i915_switch_context(params->request, params->ctx); if (ret) goto error; if (ring == &dev_priv->ring[RCS] && params->instp_mode != dev_priv->relative_constants_mode) { - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(params->request, 4); if (ret) goto error; @@ -1225,7 +1227,7 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params) } if (params->args_flags & I915_EXEC_GEN7_SOL_RESET) { - ret = i915_reset_gen7_sol_offsets(params->dev, ring); + ret = i915_reset_gen7_sol_offsets(params->dev, params->request); if (ret) goto error; } @@ -1236,29 +1238,31 @@ int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params) if (params->cliprects) { for (i = 0; i < params->args_num_cliprects; i++) { - ret = i915_emit_box(ring, ¶ms->cliprects[i], - params->args_DR1, params->args_DR4); + ret = i915_emit_box(params->request, + ¶ms->cliprects[i], + params->args_DR1, + params->args_DR4); if (ret) goto error; - ret = ring->dispatch_execbuffer(ring, + ret = ring->dispatch_execbuffer(params->request, exec_start, exec_len, params->dispatch_flags); if (ret) goto error; } } else { - ret = ring->dispatch_execbuffer(ring, + ret = ring->dispatch_execbuffer(params->request, exec_start, exec_len, params->dispatch_flags); if (ret) goto error; } - trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), params->dispatch_flags); + trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); - i915_gem_execbuffer_retire_commands(params->dev, params->file, ring, - params->batch_obj); + i915_gem_execbuffer_retire_commands(params->dev, params->file, + params->request, params->batch_obj); error: /* intel_gpu_busy should also get a ref, so it will free when the device @@ -1490,10 +1494,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm); /* Allocate a request for this batch buffer nice and early. */ - ret = dev_priv->gt.alloc_request(ring, ctx); + ret = dev_priv->gt.alloc_request(ring, ctx, ¶ms->request); if (ret) goto err; - params->request = ring->outstanding_lazy_request; + WARN_ON(params->request != ring->outstanding_lazy_request); /* Save assorted stuff away to pass through to *_submission_final() */ params->dev = dev; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7eead93..776776e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -213,14 +213,15 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, } /* Broadwell Page Directory Pointer Descriptors */ -static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, - uint64_t val) +static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry, + uint64_t val) { + struct intel_engine_cs *ring = req->ring; int ret; BUG_ON(entry >= 4); - ret = intel_ring_begin(ring, 6); + ret = intel_ring_begin(req, 6); if (ret) return ret; @@ -236,7 +237,7 @@ static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, } static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct intel_engine_cs *ring) + struct drm_i915_gem_request *req) { int i, ret; @@ -245,7 +246,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, for (i = used_pd - 1; i >= 0; i--) { dma_addr_t addr = ppgtt->pd_dma_addr[i]; - ret = gen8_write_pdp(ring, i, addr); + ret = gen8_write_pdp(req, i, addr); if (ret) return ret; } @@ -710,16 +711,17 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) } static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct intel_engine_cs *ring) + struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = req->ring; int ret; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); if (ret) return ret; - ret = intel_ring_begin(ring, 6); + ret = intel_ring_begin(req, 6); if (ret) return ret; @@ -735,16 +737,17 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, } static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct intel_engine_cs *ring) + struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = req->ring; int ret; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); if (ret) return ret; - ret = intel_ring_begin(ring, 6); + ret = intel_ring_begin(req, 6); if (ret) return ret; @@ -758,7 +761,7 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, /* XXX: RCS is the only one to auto invalidate the TLBs? */ if (ring->id != RCS) { - ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); if (ret) return ret; } @@ -767,8 +770,9 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, } static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct intel_engine_cs *ring) + struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = req->ring; struct drm_device *dev = ppgtt->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -1125,11 +1129,6 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) int i915_ppgtt_init_hw(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_engine_cs *ring; - struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; - int i, ret = 0; - /* In the case of execlists, PPGTT is enabled by the context descriptor * and the PDPs are contained within the context itself. We don't * need to do anything here. */ @@ -1148,25 +1147,20 @@ int i915_ppgtt_init_hw(struct drm_device *dev) else WARN_ON(1); - if (ppgtt) { - for_each_ring(ring, dev_priv, i) { - ret = ppgtt->switch_mm(ppgtt, ring); - if (ret != 0) - return ret; - - /* Make sure the context switch (if one actually happened) - * gets wrapped up and finished rather than hanging around - * and confusing things later. */ - if (ring->outstanding_lazy_request) { - ret = i915_add_request_no_flush(ring); - if (ret) - return ret; - } - } - } + return 0; +} - return ret; +int i915_ppgtt_init_ring(struct drm_i915_gem_request *req) +{ + struct drm_i915_private *dev_priv = req->ring->dev->dev_private; + struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; + + if (!ppgtt) + return 0; + + return ppgtt->switch_mm(ppgtt, req); } + struct i915_hw_ppgtt * i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index dd849df..bee3e2a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -267,7 +267,7 @@ struct i915_hw_ppgtt { int (*enable)(struct i915_hw_ppgtt *ppgtt); int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, - struct intel_engine_cs *ring); + struct drm_i915_gem_request *req); void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); }; @@ -278,6 +278,7 @@ void i915_global_gtt_cleanup(struct drm_device *dev); int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); int i915_ppgtt_init_hw(struct drm_device *dev); +int i915_ppgtt_init_ring(struct drm_i915_gem_request *req); void i915_ppgtt_release(struct kref *kref); struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv); diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index aba39c3..0e0c23fe 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -152,8 +152,9 @@ int i915_gem_render_state_prepare(struct intel_engine_cs *ring, return 0; } -int i915_gem_render_state_init(struct intel_engine_cs *ring) +int i915_gem_render_state_init(struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = i915_gem_request_get_ring(req); struct render_state so; int ret; @@ -164,16 +165,17 @@ int i915_gem_render_state_init(struct intel_engine_cs *ring) if (so.rodata == NULL) return 0; - ret = ring->dispatch_execbuffer(ring, + ret = ring->dispatch_execbuffer(req, so.ggtt_offset, so.rodata->batch_items * 4, I915_DISPATCH_SECURE); if (ret) goto out; - i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring); + i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); - ret = __i915_add_request(ring, NULL, so.obj, true); +// ret = __i915_add_request(req, NULL, so.obj, true); + req->batch_obj = so.obj; /* __i915_add_request moves object to inactive if it fails */ out: i915_gem_render_state_fini(&so); diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h index c44961e..7aa7372 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.h +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h @@ -39,7 +39,7 @@ struct render_state { int gen; }; -int i915_gem_render_state_init(struct intel_engine_cs *ring); +int i915_gem_render_state_init(struct drm_i915_gem_request *req); void i915_gem_render_state_fini(struct render_state *so); int i915_gem_render_state_prepare(struct intel_engine_cs *ring, struct render_state *so); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f0cf421..c0b0e37 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9089,14 +9089,15 @@ static int intel_gen2_queue_flip(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_i915_gem_object *obj, - struct intel_engine_cs *ring, + struct drm_i915_gem_request *req, uint32_t flags) { + struct intel_engine_cs *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); u32 flip_mask; int ret; - ret = intel_ring_begin(ring, 6); + ret = intel_ring_begin(req, 6); if (ret) return ret; @@ -9116,7 +9117,7 @@ static int intel_gen2_queue_flip(struct drm_device *dev, intel_ring_emit(ring, 0); /* aux display base address, unused */ intel_mark_page_flip_active(intel_crtc); - i915_add_request_no_flush(ring); + i915_add_request_no_flush(req); return 0; } @@ -9124,14 +9125,15 @@ static int intel_gen3_queue_flip(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_i915_gem_object *obj, - struct intel_engine_cs *ring, + struct drm_i915_gem_request *req, uint32_t flags) { + struct intel_engine_cs *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); u32 flip_mask; int ret; - ret = intel_ring_begin(ring, 6); + ret = intel_ring_begin(req, 6); if (ret) return ret; @@ -9148,7 +9150,7 @@ static int intel_gen3_queue_flip(struct drm_device *dev, intel_ring_emit(ring, MI_NOOP); intel_mark_page_flip_active(intel_crtc); - i915_add_request_no_flush(ring); + i915_add_request_no_flush(req); return 0; } @@ -9156,15 +9158,16 @@ static int intel_gen4_queue_flip(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_i915_gem_object *obj, - struct intel_engine_cs *ring, + struct drm_i915_gem_request *req, uint32_t flags) { + struct intel_engine_cs *ring = req->ring; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t pf, pipesrc; int ret; - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; @@ -9187,7 +9190,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, intel_ring_emit(ring, pf | pipesrc); intel_mark_page_flip_active(intel_crtc); - i915_add_request_no_flush(ring); + i915_add_request_no_flush(req); return 0; } @@ -9195,15 +9198,16 @@ static int intel_gen6_queue_flip(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_i915_gem_object *obj, - struct intel_engine_cs *ring, + struct drm_i915_gem_request *req, uint32_t flags) { + struct intel_engine_cs *ring = req->ring; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t pf, pipesrc; int ret; - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; @@ -9223,7 +9227,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev, intel_ring_emit(ring, pf | pipesrc); intel_mark_page_flip_active(intel_crtc); - i915_add_request_no_flush(ring); + i915_add_request_no_flush(req); return 0; } @@ -9231,9 +9235,10 @@ static int intel_gen7_queue_flip(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_i915_gem_object *obj, - struct intel_engine_cs *ring, + struct drm_i915_gem_request *req, uint32_t flags) { + struct intel_engine_cs *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t plane_bit = 0; int len, ret; @@ -9275,11 +9280,11 @@ static int intel_gen7_queue_flip(struct drm_device *dev, * then do the cacheline alignment, and finally emit the * MI_DISPLAY_FLIP. */ - ret = intel_ring_cacheline_align(ring); + ret = intel_ring_cacheline_align(req); if (ret) return ret; - ret = intel_ring_begin(ring, len); + ret = intel_ring_begin(req, len); if (ret) return ret; @@ -9318,7 +9323,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev, intel_ring_emit(ring, (MI_NOOP)); intel_mark_page_flip_active(intel_crtc); - i915_add_request_no_flush(ring); + i915_add_request_no_flush(req); return 0; } @@ -9474,9 +9479,10 @@ static int intel_gen9_queue_flip(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_i915_gem_object *obj, - struct intel_engine_cs *ring, + struct drm_i915_gem_request *req, uint32_t flags) { + struct intel_engine_cs *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); uint32_t plane = 0, stride; int ret; @@ -9508,7 +9514,7 @@ static int intel_gen9_queue_flip(struct drm_device *dev, return -ENODEV; } - ret = intel_ring_begin(ring, 10); + ret = intel_ring_begin(req, 10); if (ret) return ret; @@ -9528,7 +9534,7 @@ static int intel_gen9_queue_flip(struct drm_device *dev, intel_ring_emit(ring, intel_crtc->unpin_work->gtt_offset); intel_mark_page_flip_active(intel_crtc); - i915_add_request_no_flush(ring); + i915_add_request_no_flush(req); return 0; } @@ -9537,7 +9543,7 @@ static int intel_default_queue_flip(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_i915_gem_object *obj, - struct intel_engine_cs *ring, + struct drm_i915_gem_request *req, uint32_t flags) { return -ENODEV; @@ -9729,22 +9735,18 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, i915_gem_request_assign(&work->flip_queued_req, obj->last_write_req); } else { - ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, ring, + struct drm_i915_gem_request *req; + + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); + if (ret) + return ret; + + i915_gem_request_assign(&work->flip_queued_req, req); + + ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, req, page_flip_flags); if (ret) goto cleanup_unpin; - - /* Borked: need to get the seqno for the request submitted in - * 'queue_flip()' above. However, either the request has been - * posted already and the seqno is gone (q_f calls add_request), - * or the request never gets posted and is merged into whatever - * render comes along next (q_f calls ring_advance). - * - * On the other hand, seqnos are going away soon anyway! So - * hopefully the problem will disappear... - */ - i915_gem_request_assign(&work->flip_queued_req, - ring->outstanding_lazy_request ? intel_ring_get_request(ring) : NULL); } work->flip_queued_vblank = drm_vblank_count(dev, intel_crtc->pipe); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 80cb87e..5077a77 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -203,6 +203,10 @@ enum { }; #define GEN8_CTX_ID_SHIFT 32 +static int intel_logical_ring_begin(struct drm_i915_gem_request *req, + int num_dwords); +static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req, + struct intel_context *ctx); static int intel_lr_context_pin(struct intel_engine_cs *ring, struct intel_context *ctx); @@ -587,9 +591,9 @@ static int execlists_context_queue(struct intel_engine_cs *ring, return 0; } -static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) +static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req) { - struct intel_engine_cs *ring = ringbuf->ring; + struct intel_engine_cs *ring = req->ring; uint32_t flush_domains; int ret; @@ -597,7 +601,7 @@ static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) if (ring->gpu_caches_dirty) flush_domains = I915_GEM_GPU_DOMAINS; - ret = ring->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains); + ret = ring->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains); if (ret) return ret; @@ -605,10 +609,9 @@ static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) return 0; } -static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, +static int execlists_move_to_gpu(struct drm_i915_gem_request *req, struct list_head *vmas) { - struct intel_engine_cs *ring = ringbuf->ring; struct i915_vma *vma; uint32_t flush_domains = 0; bool flush_chipset = false; @@ -617,7 +620,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; - ret = i915_gem_object_sync(obj, ring, true); + ret = i915_gem_object_sync(obj, req); if (ret) return ret; @@ -657,7 +660,6 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, struct drm_device *dev = params->dev; struct intel_engine_cs *ring = params->ring; struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_ringbuffer *ringbuf = params->ctx->engine[ring->id].ringbuf; int ret; params->instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; @@ -706,11 +708,11 @@ int intel_execlists_submission(struct i915_execbuffer_params *params, return -EINVAL; } - ret = execlists_move_to_gpu(ringbuf, vmas); + ret = execlists_move_to_gpu(params->request, vmas); if (ret) return ret; - i915_gem_execbuffer_move_to_active(vmas, ring); + i915_gem_execbuffer_move_to_active(vmas, params->request); ret = dev_priv->gt.do_execfinal(params); if (ret) @@ -742,13 +744,13 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params) /* Unconditionally invalidate gpu caches and ensure that we do flush * any residual writes from the previous batch. */ - ret = logical_ring_invalidate_all_caches(ringbuf); + ret = logical_ring_invalidate_all_caches(params->request); if (ret) return ret; if (ring == &dev_priv->ring[RCS] && params->instp_mode != dev_priv->relative_constants_mode) { - ret = intel_logical_ring_begin(ringbuf, 4); + ret = intel_logical_ring_begin(params->request, 4); if (ret) return ret; @@ -764,13 +766,14 @@ int intel_execlists_submission_final(struct i915_execbuffer_params *params) exec_start = params->batch_obj_vm_offset + params->args_batch_start_offset; - ret = ring->emit_bb_start(ringbuf, exec_start, params->dispatch_flags); + ret = ring->emit_bb_start(params->request, exec_start, params->dispatch_flags); if (ret) return ret; - trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), params->dispatch_flags); + trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); - i915_gem_execbuffer_retire_commands(params->dev, params->file, ring, params->batch_obj); + i915_gem_execbuffer_retire_commands(params->dev, params->file, + params->request, params->batch_obj); return 0; } @@ -827,15 +830,15 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING)); } -int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf) +int logical_ring_flush_all_caches(struct drm_i915_gem_request *req) { - struct intel_engine_cs *ring = ringbuf->ring; + struct intel_engine_cs *ring = req->ring; int ret; if (!ring->gpu_caches_dirty) return 0; - ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS); + ret = ring->emit_flush(req, 0, I915_GEM_GPU_DOMAINS); if (ret) return ret; @@ -910,13 +913,17 @@ void intel_lr_context_unpin(struct intel_engine_cs *ring, } int intel_logical_ring_alloc_request(struct intel_engine_cs *ring, - struct intel_context *ctx) + struct intel_context *ctx, + struct drm_i915_gem_request **req_out) { struct drm_i915_gem_request *request; struct drm_i915_private *dev_private = ring->dev->dev_private; int ret; - if (ring->outstanding_lazy_request) + if (!req_out) + return -EINVAL; + + if ((*req_out = ring->outstanding_lazy_request) != NULL) return 0; request = kzalloc(sizeof(*request), GFP_KERNEL); @@ -953,7 +960,7 @@ int intel_logical_ring_alloc_request(struct intel_engine_cs *ring, i915_gem_context_reference(request->ctx); request->ringbuf = ctx->engine[ring->id].ringbuf; - ring->outstanding_lazy_request = request; + *req_out = ring->outstanding_lazy_request = request; return 0; } @@ -1090,7 +1097,7 @@ static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes) /** * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands * - * @ringbuf: Logical ringbuffer. + * @request: The request to start some new work for * @num_dwords: number of DWORDs that we plan to write to the ringbuffer. * * The ringbuffer might not be ready to accept the commands right away (maybe it needs to @@ -1100,8 +1107,9 @@ static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes) * * Return: non-zero if the ringbuffer is not ready to be written to. */ -int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords) +static int intel_logical_ring_begin(struct drm_i915_gem_request *req, int num_dwords) { + struct intel_ringbuffer *ringbuf = req->ringbuf; struct intel_engine_cs *ring = ringbuf->ring; struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -1116,38 +1124,28 @@ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords) if (ret) return ret; - if(!ring->outstanding_lazy_request) { - printk(KERN_INFO "%s:%d> \x1B[31;1mring->outstanding_lazy_request = 0x%p\x1B[0m\n", __func__, __LINE__, ring->outstanding_lazy_request); - dump_stack(); - } - - /* Preallocate the olr before touching the ring */ - ret = intel_logical_ring_alloc_request(ring, ringbuf->FIXME_lrc_ctx); - if (ret) - return ret; - ringbuf->space -= num_dwords * sizeof(uint32_t); return 0; } -static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring, +static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req, struct intel_context *ctx) { int ret, i; - struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; - struct drm_device *dev = ring->dev; + struct intel_ringbuffer *ringbuf = req->ringbuf; + struct drm_device *dev = req->ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct i915_workarounds *w = &dev_priv->workarounds; if (WARN_ON(w->count == 0)) return 0; - ring->gpu_caches_dirty = true; - ret = logical_ring_flush_all_caches(ringbuf); + req->ring->gpu_caches_dirty = true; + ret = logical_ring_flush_all_caches(req); if (ret) return ret; - ret = intel_logical_ring_begin(ringbuf, w->count * 2 + 2); + ret = intel_logical_ring_begin(req, w->count * 2 + 2); if (ret) return ret; @@ -1160,8 +1158,8 @@ static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring, intel_logical_ring_advance(ringbuf); - ring->gpu_caches_dirty = true; - ret = logical_ring_flush_all_caches(ringbuf); + req->ring->gpu_caches_dirty = true; + ret = logical_ring_flush_all_caches(req); if (ret) return ret; @@ -1210,13 +1208,14 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) return init_workarounds_ring(ring); } -static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf, +static int gen8_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, unsigned flags) { + struct intel_ringbuffer *ringbuf = req->ringbuf; bool ppgtt = !(flags & I915_DISPATCH_SECURE); int ret; - ret = intel_logical_ring_begin(ringbuf, 4); + ret = intel_logical_ring_begin(req, 4); if (ret) return ret; @@ -1263,17 +1262,18 @@ static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring) spin_unlock_irqrestore(&dev_priv->irq_lock, flags); } -static int gen8_emit_flush(struct intel_ringbuffer *ringbuf, +static int gen8_emit_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 unused) { + struct intel_ringbuffer *ringbuf = req->ringbuf; struct intel_engine_cs *ring = ringbuf->ring; struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; uint32_t cmd; int ret; - ret = intel_logical_ring_begin(ringbuf, 4); + ret = intel_logical_ring_begin(req, 4); if (ret) return ret; @@ -1301,10 +1301,11 @@ static int gen8_emit_flush(struct intel_ringbuffer *ringbuf, return 0; } -static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf, +static int gen8_emit_flush_render(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { + struct intel_ringbuffer *ringbuf = req->ringbuf; struct intel_engine_cs *ring = ringbuf->ring; u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; u32 flags = 0; @@ -1328,7 +1329,7 @@ static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf, flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; } - ret = intel_logical_ring_begin(ringbuf, 6); + ret = intel_logical_ring_begin(req, 6); if (ret) return ret; @@ -1353,13 +1354,14 @@ static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno) intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno); } -static int gen8_emit_request(struct intel_ringbuffer *ringbuf) +static int gen8_emit_request(struct drm_i915_gem_request *req) { + struct intel_ringbuffer *ringbuf = req->ringbuf; struct intel_engine_cs *ring = ringbuf->ring; u32 cmd; int ret; - ret = intel_logical_ring_begin(ringbuf, 6); + ret = intel_logical_ring_begin(req, 6); if (ret) return ret; @@ -1371,8 +1373,7 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf) (ring->status_page.gfx_addr + (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT))); intel_logical_ring_emit(ringbuf, 0); - intel_logical_ring_emit(ringbuf, - i915_gem_request_get_seqno(ring->outstanding_lazy_request)); + intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(req)); intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); intel_logical_ring_emit(ringbuf, MI_NOOP); intel_logical_ring_advance_and_submit(ringbuf); @@ -1380,16 +1381,20 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf) return 0; } -static int gen8_init_rcs_context(struct intel_engine_cs *ring, - struct intel_context *ctx) +static int gen8_init_rcs_context(struct drm_i915_gem_request *req, + struct intel_context *ctx) { int ret; - ret = intel_logical_ring_workarounds_emit(ring, ctx); + ret = intel_logical_ring_workarounds_emit(req, ctx); if (ret) return ret; - return intel_lr_context_render_state_init(ring, ctx); + ret = intel_lr_context_render_state_init(req, ctx); + if (ret) + return ret; + + return 0; } /** @@ -1409,6 +1414,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring) intel_logical_ring_stop(ring); WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0); + WARN_ON(ring->outstanding_lazy_request); i915_gem_request_assign(&ring->outstanding_lazy_request, NULL); if (ring->cleanup) @@ -1648,10 +1654,10 @@ cleanup_render_ring: return ret; } -int intel_lr_context_render_state_init(struct intel_engine_cs *ring, - struct intel_context *ctx) +static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req, + struct intel_context *ctx) { - struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; + struct intel_engine_cs *ring = i915_gem_request_get_ring(req); struct render_state so; struct drm_i915_file_private *file_priv = ctx->file_priv; struct drm_file *file = file_priv ? file_priv->file : NULL; @@ -1664,15 +1670,13 @@ int intel_lr_context_render_state_init(struct intel_engine_cs *ring, if (so.rodata == NULL) return 0; - ret = ring->emit_bb_start(ringbuf, - so.ggtt_offset, - I915_DISPATCH_SECURE); + ret = ring->emit_bb_start(req, so.ggtt_offset, I915_DISPATCH_SECURE); if (ret) goto out; - i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring); + i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); - ret = __i915_add_request(ring, file, so.obj, true); + ret = __i915_add_request(req, file, so.obj, true); /* intel_logical_ring_add_request moves object to inactive if it * fails */ out: @@ -1883,6 +1887,7 @@ static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring, int intel_lr_context_deferred_create(struct intel_context *ctx, struct intel_engine_cs *ring) { + struct drm_i915_private *dev_priv = ring->dev->dev_private; const bool is_global_default_ctx = (ctx == ring->default_context); struct drm_device *dev = ring->dev; struct drm_i915_gem_object *ctx_obj; @@ -1964,13 +1969,27 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, lrc_setup_hardware_status_page(ring, ctx_obj); else if (ring->id == RCS && !ctx->rcs_initialized) { if (ring->init_context) { - ret = ring->init_context(ring, ctx); + struct drm_i915_gem_request *req; + + ret = dev_priv->gt.alloc_request(ring, ctx, &req); + if (ret) + return ret; + + ret = ring->init_context(req, ctx); if (ret) { DRM_ERROR("ring init context: %d\n", ret); + i915_gem_request_unreference(req); ctx->engine[ring->id].ringbuf = NULL; ctx->engine[ring->id].state = NULL; goto error; } + + ret = i915_add_request_no_flush(req); + if (ret) { + DRM_ERROR("ring init context: %d\n", ret); + i915_gem_request_unreference(req); + goto error; + } } ctx->rcs_initialized = true; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index ea083d9..a2981ba 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -35,12 +35,13 @@ /* Logical Rings */ int __must_check intel_logical_ring_alloc_request(struct intel_engine_cs *ring, - struct intel_context *ctx); + struct intel_context *ctx, + struct drm_i915_gem_request **req_out); void intel_logical_ring_stop(struct intel_engine_cs *ring); void intel_logical_ring_cleanup(struct intel_engine_cs *ring); int intel_logical_rings_init(struct drm_device *dev); -int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf); +int logical_ring_flush_all_caches(struct drm_i915_gem_request *req); void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf); /** * intel_logical_ring_advance() - advance the ringbuffer tail @@ -63,11 +64,8 @@ static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, iowrite32(data, ringbuf->virtual_start + ringbuf->tail); ringbuf->tail += 4; } -int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords); /* Logical Ring Contexts */ -int intel_lr_context_render_state_init(struct intel_engine_cs *ring, - struct intel_context *ctx); void intel_lr_context_free(struct intel_context *ctx); int intel_lr_context_deferred_create(struct intel_context *ctx, struct intel_engine_cs *ring); diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 973c9de..2d2ce59 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -209,17 +209,15 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay, } static int intel_overlay_do_wait_request(struct intel_overlay *overlay, + struct drm_i915_gem_request *req, void (*tail)(struct intel_overlay *)) { struct drm_device *dev = overlay->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_engine_cs *ring = &dev_priv->ring[RCS]; int ret; BUG_ON(overlay->last_flip_req); - i915_gem_request_assign(&overlay->last_flip_req, - ring->outstanding_lazy_request); - ret = i915_add_request(ring); + i915_gem_request_assign(&overlay->last_flip_req, req); + ret = i915_add_request(overlay->last_flip_req); if (ret) return ret; @@ -239,6 +237,7 @@ static int intel_overlay_on(struct intel_overlay *overlay) struct drm_device *dev = overlay->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring = &dev_priv->ring[RCS]; + struct drm_i915_gem_request *req; int ret; BUG_ON(overlay->active); @@ -246,17 +245,21 @@ static int intel_overlay_on(struct intel_overlay *overlay) WARN_ON(IS_I830(dev) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE)); - ret = intel_ring_begin(ring, 4); + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); if (ret) return ret; - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON); - intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE); - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + ret = intel_ring_begin(req, 4); + if (ret) + return ret; + + intel_ring_emit(req->ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON); + intel_ring_emit(req->ring, overlay->flip_addr | OFC_UPDATE); + intel_ring_emit(req->ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); + intel_ring_emit(req->ring, MI_NOOP); + intel_ring_advance(req->ring); - return intel_overlay_do_wait_request(overlay, NULL); + return intel_overlay_do_wait_request(overlay, req, NULL); } /* overlay needs to be enabled in OCMD reg */ @@ -266,6 +269,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay, struct drm_device *dev = overlay->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring = &dev_priv->ring[RCS]; + struct drm_i915_gem_request *req; u32 flip_addr = overlay->flip_addr; u32 tmp; int ret; @@ -280,7 +284,11 @@ static int intel_overlay_continue(struct intel_overlay *overlay, if (tmp & (1 << 17)) DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp); - ret = intel_ring_begin(ring, 2); + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); + if (ret) + return ret; + + ret = intel_ring_begin(req, 2); if (ret) return ret; @@ -289,9 +297,8 @@ static int intel_overlay_continue(struct intel_overlay *overlay, intel_ring_advance(ring); WARN_ON(overlay->last_flip_req); - i915_gem_request_assign(&overlay->last_flip_req, - ring->outstanding_lazy_request); - return i915_add_request(ring); + i915_gem_request_assign(&overlay->last_flip_req, req); + return i915_add_request(req); } static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay) @@ -326,6 +333,7 @@ static int intel_overlay_off(struct intel_overlay *overlay) struct drm_device *dev = overlay->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring = &dev_priv->ring[RCS]; + struct drm_i915_gem_request *req; u32 flip_addr = overlay->flip_addr; int ret; @@ -337,7 +345,11 @@ static int intel_overlay_off(struct intel_overlay *overlay) * of the hw. Do it in both cases */ flip_addr |= OFC_UPDATE; - ret = intel_ring_begin(ring, 6); + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); + if (ret) + return ret; + + ret = intel_ring_begin(req, 6); if (ret) return ret; @@ -359,7 +371,7 @@ static int intel_overlay_off(struct intel_overlay *overlay) } intel_ring_advance(ring); - return intel_overlay_do_wait_request(overlay, intel_overlay_off_tail); + return intel_overlay_do_wait_request(overlay, req, intel_overlay_off_tail); } /* recover from an interruption due to a signal @@ -404,7 +416,13 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { /* synchronous slowpath */ - ret = intel_ring_begin(ring, 2); + struct drm_i915_gem_request *req; + + ret = dev_priv->gt.alloc_request(ring, ring->default_context, &req); + if (ret) + return ret; + + ret = intel_ring_begin(req, 2); if (ret) return ret; @@ -412,7 +430,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) intel_ring_emit(ring, MI_NOOP); intel_ring_advance(ring); - ret = intel_overlay_do_wait_request(overlay, + ret = intel_overlay_do_wait_request(overlay, req, intel_overlay_release_old_vid_tail); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 78911e2..5905fa5 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5506,6 +5506,7 @@ static void ironlake_enable_rc6(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring = &dev_priv->ring[RCS]; + struct drm_i915_gem_request *req = NULL; bool was_interruptible; int ret; @@ -5524,16 +5525,17 @@ static void ironlake_enable_rc6(struct drm_device *dev) was_interruptible = dev_priv->mm.interruptible; dev_priv->mm.interruptible = false; + ret = dev_priv->gt.alloc_request(ring, NULL, &req); + if (ret) + goto err; + /* * GPU can automatically power down the render unit if given a page * to save state. */ - ret = intel_ring_begin(ring, 6); - if (ret) { - ironlake_teardown_rc6(dev); - dev_priv->mm.interruptible = was_interruptible; - return; - } + ret = intel_ring_begin(req, 6); + if (ret) + goto err; intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN); intel_ring_emit(ring, MI_SET_CONTEXT); @@ -5547,6 +5549,11 @@ static void ironlake_enable_rc6(struct drm_device *dev) intel_ring_emit(ring, MI_FLUSH); intel_ring_advance(ring); + ret = i915_add_request_no_flush(req); + if (ret) + goto err; + req = NULL; + /* * Wait for the command parser to advance past MI_SET_CONTEXT. The HW * does an implicit flush, combined with MI_FLUSH above, it should be @@ -5554,16 +5561,20 @@ static void ironlake_enable_rc6(struct drm_device *dev) */ ret = intel_ring_idle(ring); dev_priv->mm.interruptible = was_interruptible; - if (ret) { - DRM_ERROR("failed to enable ironlake power savings\n"); - ironlake_teardown_rc6(dev); - return; - } + if (ret) + goto err; I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN); I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT); intel_print_rc6_info(dev, GEN6_RC_CTL_RC6_ENABLE); + +err: + DRM_ERROR("failed to enable ironlake power savings\n"); + ironlake_teardown_rc6(dev); + dev_priv->mm.interruptible = was_interruptible; + if (req) + i915_gem_request_unreference(req); } static unsigned long intel_pxfreq(u32 vidfreq) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b60e59b..e6e7bb5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -91,10 +91,11 @@ void __intel_ring_advance(struct intel_engine_cs *ring) } static int -gen2_render_ring_flush(struct intel_engine_cs *ring, +gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { + struct intel_engine_cs *ring = req->ring; u32 cmd; int ret; @@ -105,7 +106,7 @@ gen2_render_ring_flush(struct intel_engine_cs *ring, if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) cmd |= MI_READ_FLUSH; - ret = intel_ring_begin(ring, 2); + ret = intel_ring_begin(req, 2); if (ret) return ret; @@ -117,10 +118,11 @@ gen2_render_ring_flush(struct intel_engine_cs *ring, } static int -gen4_render_ring_flush(struct intel_engine_cs *ring, +gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { + struct intel_engine_cs *ring = req->ring; struct drm_device *dev = ring->dev; u32 cmd; int ret; @@ -163,7 +165,7 @@ gen4_render_ring_flush(struct intel_engine_cs *ring, (IS_G4X(dev) || IS_GEN5(dev))) cmd |= MI_INVALIDATE_ISP; - ret = intel_ring_begin(ring, 2); + ret = intel_ring_begin(req, 2); if (ret) return ret; @@ -212,12 +214,13 @@ gen4_render_ring_flush(struct intel_engine_cs *ring, * really our business. That leaves only stall at scoreboard. */ static int -intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring) +intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = req->ring; u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; - ret = intel_ring_begin(ring, 6); + ret = intel_ring_begin(req, 6); if (ret) return ret; @@ -230,7 +233,7 @@ intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring) intel_ring_emit(ring, MI_NOOP); intel_ring_advance(ring); - ret = intel_ring_begin(ring, 6); + ret = intel_ring_begin(req, 6); if (ret) return ret; @@ -246,15 +249,16 @@ intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring) } static int -gen6_render_ring_flush(struct intel_engine_cs *ring, - u32 invalidate_domains, u32 flush_domains) +gen6_render_ring_flush(struct drm_i915_gem_request *req, + u32 invalidate_domains, u32 flush_domains) { + struct intel_engine_cs *ring = req->ring; u32 flags = 0; u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; /* Force SNB workarounds for PIPE_CONTROL flushes */ - ret = intel_emit_post_sync_nonzero_flush(ring); + ret = intel_emit_post_sync_nonzero_flush(req); if (ret) return ret; @@ -284,7 +288,7 @@ gen6_render_ring_flush(struct intel_engine_cs *ring, flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; } - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; @@ -298,11 +302,12 @@ gen6_render_ring_flush(struct intel_engine_cs *ring, } static int -gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring) +gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = req->ring; int ret; - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; @@ -316,14 +321,15 @@ gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring) return 0; } -static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value) +static int gen7_ring_fbc_flush(struct drm_i915_gem_request *req, u32 value) { + struct intel_engine_cs *ring = req->ring; int ret; if (!ring->fbc_dirty) return 0; - ret = intel_ring_begin(ring, 6); + ret = intel_ring_begin(req, 6); if (ret) return ret; /* WaFbcNukeOn3DBlt:ivb/hsw */ @@ -340,9 +346,10 @@ static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value) } static int -gen7_render_ring_flush(struct intel_engine_cs *ring, +gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { + struct intel_engine_cs *ring = req->ring; u32 flags = 0; u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; @@ -381,10 +388,10 @@ gen7_render_ring_flush(struct intel_engine_cs *ring, /* Workaround: we must issue a pipe_control with CS-stall bit * set before a pipe_control command that has the state cache * invalidate bit set. */ - gen7_render_ring_cs_stall_wa(ring); + gen7_render_ring_cs_stall_wa(req); } - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; @@ -395,18 +402,19 @@ gen7_render_ring_flush(struct intel_engine_cs *ring, intel_ring_advance(ring); if (!invalidate_domains && flush_domains) - return gen7_ring_fbc_flush(ring, FBC_REND_NUKE); + return gen7_ring_fbc_flush(req, FBC_REND_NUKE); return 0; } static int -gen8_emit_pipe_control(struct intel_engine_cs *ring, +gen8_emit_pipe_control(struct drm_i915_gem_request *req, u32 flags, u32 scratch_addr) { + struct intel_engine_cs *ring = req->ring; int ret; - ret = intel_ring_begin(ring, 6); + ret = intel_ring_begin(req, 6); if (ret) return ret; @@ -422,11 +430,11 @@ gen8_emit_pipe_control(struct intel_engine_cs *ring, } static int -gen8_render_ring_flush(struct intel_engine_cs *ring, +gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { u32 flags = 0; - u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; + u32 scratch_addr = req->ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; flags |= PIPE_CONTROL_CS_STALL; @@ -446,7 +454,7 @@ gen8_render_ring_flush(struct intel_engine_cs *ring, flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */ - ret = gen8_emit_pipe_control(ring, + ret = gen8_emit_pipe_control(req, PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD, 0); @@ -454,12 +462,12 @@ gen8_render_ring_flush(struct intel_engine_cs *ring, return ret; } - ret = gen8_emit_pipe_control(ring, flags, scratch_addr); + ret = gen8_emit_pipe_control(req, flags, scratch_addr); if (ret) return ret; if (!invalidate_domains && flush_domains) - return gen7_ring_fbc_flush(ring, FBC_REND_NUKE); + return gen7_ring_fbc_flush(req, FBC_REND_NUKE); return 0; } @@ -670,9 +678,10 @@ err: return ret; } -static int intel_ring_workarounds_emit(struct intel_engine_cs *ring, +static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req, struct intel_context *ctx) { + struct intel_engine_cs *ring = req->ring; int ret, i; struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -682,11 +691,11 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring, return 0; ring->gpu_caches_dirty = true; - ret = intel_ring_flush_all_caches(ring); + ret = intel_ring_flush_all_caches(req); if (ret) return ret; - ret = intel_ring_begin(ring, (w->count * 2 + 2)); + ret = intel_ring_begin(req, (w->count * 2 + 2)); if (ret) return ret; @@ -700,7 +709,7 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring, intel_ring_advance(ring); ring->gpu_caches_dirty = true; - ret = intel_ring_flush_all_caches(ring); + ret = intel_ring_flush_all_caches(req); if (ret) return ret; @@ -898,10 +907,11 @@ static void render_ring_cleanup(struct intel_engine_cs *ring) intel_fini_pipe_control(ring); } -static int gen8_rcs_signal(struct intel_engine_cs *signaller, +static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req, unsigned int num_dwords) { #define MBOX_UPDATE_DWORDS 8 + struct intel_engine_cs *signaller = signaller_req->ring; struct drm_device *dev = signaller->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *waiter; @@ -911,7 +921,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller, num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; #undef MBOX_UPDATE_DWORDS - ret = intel_ring_begin(signaller, num_dwords); + ret = intel_ring_begin(signaller_req, num_dwords); if (ret) return ret; @@ -921,8 +931,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller, if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - seqno = i915_gem_request_get_seqno( - signaller->outstanding_lazy_request); + seqno = i915_gem_request_get_seqno(signaller_req); intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6)); intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE | @@ -939,10 +948,11 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller, return 0; } -static int gen8_xcs_signal(struct intel_engine_cs *signaller, +static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req, unsigned int num_dwords) { #define MBOX_UPDATE_DWORDS 6 + struct intel_engine_cs *signaller = signaller_req->ring; struct drm_device *dev = signaller->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *waiter; @@ -952,7 +962,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller, num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS; #undef MBOX_UPDATE_DWORDS - ret = intel_ring_begin(signaller, num_dwords); + ret = intel_ring_begin(signaller_req, num_dwords); if (ret) return ret; @@ -962,8 +972,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller, if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - seqno = i915_gem_request_get_seqno( - signaller->outstanding_lazy_request); + seqno = i915_gem_request_get_seqno(signaller_req); intel_ring_emit(signaller, (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW); intel_ring_emit(signaller, lower_32_bits(gtt_offset) | @@ -978,9 +987,10 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller, return 0; } -static int gen6_signal(struct intel_engine_cs *signaller, +static int gen6_signal(struct drm_i915_gem_request *signaller_req, unsigned int num_dwords) { + struct intel_engine_cs *signaller = signaller_req->ring; struct drm_device *dev = signaller->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *useless; @@ -991,15 +1001,14 @@ static int gen6_signal(struct intel_engine_cs *signaller, num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2); #undef MBOX_UPDATE_DWORDS - ret = intel_ring_begin(signaller, num_dwords); + ret = intel_ring_begin(signaller_req, num_dwords); if (ret) return ret; for_each_ring(useless, dev_priv, i) { u32 mbox_reg = signaller->semaphore.mbox.signal[i]; if (mbox_reg != GEN6_NOSYNC) { - u32 seqno = i915_gem_request_get_seqno( - signaller->outstanding_lazy_request); + u32 seqno = i915_gem_request_get_seqno(signaller_req); intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit(signaller, mbox_reg); intel_ring_emit(signaller, seqno); @@ -1016,29 +1025,28 @@ static int gen6_signal(struct intel_engine_cs *signaller, /** * gen6_add_request - Update the semaphore mailbox registers * - * @ring - ring that is adding a request - * @seqno - return seqno stuck into the ring + * @request - request to write to the ring * * Update the mailbox registers in the *other* rings with the current seqno. * This acts like a signal in the canonical semaphore. */ static int -gen6_add_request(struct intel_engine_cs *ring) +gen6_add_request(struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = req->ring; int ret; if (ring->semaphore.signal) - ret = ring->semaphore.signal(ring, 4); + ret = ring->semaphore.signal(req, 4); else - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; intel_ring_emit(ring, MI_STORE_DWORD_INDEX); intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(ring, - i915_gem_request_get_seqno(ring->outstanding_lazy_request)); + intel_ring_emit(ring, i915_gem_request_get_seqno(req)); intel_ring_emit(ring, MI_USER_INTERRUPT); __intel_ring_advance(ring); @@ -1061,14 +1069,15 @@ static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev, */ static int -gen8_ring_sync(struct intel_engine_cs *waiter, +gen8_ring_sync(struct drm_i915_gem_request *waiter_req, struct intel_engine_cs *signaller, u32 seqno) { + struct intel_engine_cs *waiter = waiter_req->ring; struct drm_i915_private *dev_priv = waiter->dev->dev_private; int ret; - ret = intel_ring_begin(waiter, 4); + ret = intel_ring_begin(waiter_req, 4); if (ret) return ret; @@ -1086,10 +1095,11 @@ gen8_ring_sync(struct intel_engine_cs *waiter, } static int -gen6_ring_sync(struct intel_engine_cs *waiter, +gen6_ring_sync(struct drm_i915_gem_request *waiter_req, struct intel_engine_cs *signaller, u32 seqno) { + struct intel_engine_cs *waiter = waiter_req->ring; u32 dw1 = MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER; @@ -1104,7 +1114,7 @@ gen6_ring_sync(struct intel_engine_cs *waiter, WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); - ret = intel_ring_begin(waiter, 4); + ret = intel_ring_begin(waiter_req, 4); if (ret) return ret; @@ -1135,8 +1145,9 @@ do { \ } while (0) static int -pc_render_add_request(struct intel_engine_cs *ring) +pc_render_add_request(struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = req->ring; u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; int ret; @@ -1148,7 +1159,7 @@ pc_render_add_request(struct intel_engine_cs *ring) * incoherence by flushing the 6 PIPE_NOTIFY buffers out to * memory before requesting an interrupt. */ - ret = intel_ring_begin(ring, 32); + ret = intel_ring_begin(req, 32); if (ret) return ret; @@ -1156,8 +1167,7 @@ pc_render_add_request(struct intel_engine_cs *ring) PIPE_CONTROL_WRITE_FLUSH | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, - i915_gem_request_get_seqno(ring->outstanding_lazy_request)); + intel_ring_emit(ring, i915_gem_request_get_seqno(req)); intel_ring_emit(ring, 0); PIPE_CONTROL_FLUSH(ring, scratch_addr); scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */ @@ -1176,8 +1186,7 @@ pc_render_add_request(struct intel_engine_cs *ring) PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | PIPE_CONTROL_NOTIFY); intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, - i915_gem_request_get_seqno(ring->outstanding_lazy_request)); + intel_ring_emit(ring, i915_gem_request_get_seqno(req)); intel_ring_emit(ring, 0); __intel_ring_advance(ring); @@ -1390,13 +1399,14 @@ void intel_ring_setup_status_page(struct intel_engine_cs *ring) } static int -bsd_ring_flush(struct intel_engine_cs *ring, +bsd_ring_flush(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains) { + struct intel_engine_cs *ring = req->ring; int ret; - ret = intel_ring_begin(ring, 2); + ret = intel_ring_begin(req, 2); if (ret) return ret; @@ -1407,18 +1417,18 @@ bsd_ring_flush(struct intel_engine_cs *ring, } static int -i9xx_add_request(struct intel_engine_cs *ring) +i9xx_add_request(struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = req->ring; int ret; - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; intel_ring_emit(ring, MI_STORE_DWORD_INDEX); intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(ring, - i915_gem_request_get_seqno(ring->outstanding_lazy_request)); + intel_ring_emit(ring, i915_gem_request_get_seqno(req)); intel_ring_emit(ring, MI_USER_INTERRUPT); __intel_ring_advance(ring); @@ -1550,13 +1560,14 @@ gen8_ring_put_irq(struct intel_engine_cs *ring) } static int -i965_dispatch_execbuffer(struct intel_engine_cs *ring, +i965_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 length, unsigned flags) { + struct intel_engine_cs *ring = req->ring; int ret; - ret = intel_ring_begin(ring, 2); + ret = intel_ring_begin(req, 2); if (ret) return ret; @@ -1575,14 +1586,15 @@ i965_dispatch_execbuffer(struct intel_engine_cs *ring, #define I830_TLB_ENTRIES (2) #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT) static int -i830_dispatch_execbuffer(struct intel_engine_cs *ring, - u64 offset, u32 len, - unsigned flags) +i830_dispatch_execbuffer(struct drm_i915_gem_request *req, + u64 offset, u32 len, + unsigned flags) { + struct intel_engine_cs *ring = req->ring; u32 cs_offset = ring->scratch.gtt_offset; int ret; - ret = intel_ring_begin(ring, 6); + ret = intel_ring_begin(req, 6); if (ret) return ret; @@ -1599,7 +1611,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring, if (len > I830_BATCH_LIMIT) return -ENOSPC; - ret = intel_ring_begin(ring, 6 + 2); + ret = intel_ring_begin(req, 6 + 2); if (ret) return ret; @@ -1622,7 +1634,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring, offset = cs_offset; } - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; @@ -1636,13 +1648,14 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring, } static int -i915_dispatch_execbuffer(struct intel_engine_cs *ring, +i915_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned flags) { + struct intel_engine_cs *ring = req->ring; int ret; - ret = intel_ring_begin(ring, 2); + ret = intel_ring_begin(req, 2); if (ret) return ret; @@ -1885,6 +1898,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring) intel_unpin_ringbuffer_obj(ringbuf); intel_destroy_ringbuffer_obj(ringbuf); + WARN_ON(ring->outstanding_lazy_request); i915_gem_request_assign(&ring->outstanding_lazy_request, NULL); if (ring->cleanup) @@ -2007,8 +2021,9 @@ int intel_ring_idle(struct intel_engine_cs *ring) int ret; /* We need to add any requests required to flush the objects and ring */ + WARN_ON(ring->outstanding_lazy_request); if (ring->outstanding_lazy_request) { - ret = i915_add_request(ring); + ret = i915_add_request(ring->outstanding_lazy_request); if (ret) return ret; } @@ -2025,13 +2040,18 @@ int intel_ring_idle(struct intel_engine_cs *ring) } int -intel_ring_alloc_request(struct intel_engine_cs *ring, struct intel_context *ctx) +intel_ring_alloc_request(struct intel_engine_cs *ring, + struct intel_context *ctx, + struct drm_i915_gem_request **req_out) { int ret; struct drm_i915_gem_request *request; struct drm_i915_private *dev_private = ring->dev->dev_private; - if (ring->outstanding_lazy_request) + if (!req_out) + return -EINVAL; + + if ((*req_out = ring->outstanding_lazy_request) != NULL) return 0; request = kzalloc(sizeof(*request), GFP_KERNEL); @@ -2053,7 +2073,7 @@ intel_ring_alloc_request(struct intel_engine_cs *ring, struct intel_context *ctx spewThisReq(request, "\x1B[32mCreated: %d:%d, ref => %d\x1B[0m", request->uniq, request->seqno, request->ref.refcount.counter); //printk(KERN_INFO "%s:%d> <%s> OLR = 0x%p, uniq = %d, seqno = %d\n", __func__, __LINE__, ring->name, request, request->uniq, request->seqno); - ring->outstanding_lazy_request = request; + *req_out = ring->outstanding_lazy_request = request; return 0; } @@ -2078,9 +2098,10 @@ static int __intel_ring_prepare(struct intel_engine_cs *ring, return 0; } -int intel_ring_begin(struct intel_engine_cs *ring, +int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) { + struct intel_engine_cs *ring = req->ring; struct drm_i915_private *dev_priv = ring->dev->dev_private; int ret; @@ -2093,18 +2114,14 @@ int intel_ring_begin(struct intel_engine_cs *ring, if (ret) return ret; - /* Preallocate the olr before touching the ring */ - ret = intel_ring_alloc_request(ring, NULL); - if (ret) - return ret; - ring->buffer->space -= num_dwords * sizeof(uint32_t); return 0; } /* Align the ring tail to a cacheline boundary */ -int intel_ring_cacheline_align(struct intel_engine_cs *ring) +int intel_ring_cacheline_align(struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = req->ring; int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); int ret; @@ -2112,7 +2129,7 @@ int intel_ring_cacheline_align(struct intel_engine_cs *ring) return 0; num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords; - ret = intel_ring_begin(ring, num_dwords); + ret = intel_ring_begin(req, num_dwords); if (ret) return ret; @@ -2176,13 +2193,14 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring, _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); } -static int gen6_bsd_ring_flush(struct intel_engine_cs *ring, +static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 invalidate, u32 flush) { + struct intel_engine_cs *ring = req->ring; uint32_t cmd; int ret; - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; @@ -2212,14 +2230,15 @@ static int gen6_bsd_ring_flush(struct intel_engine_cs *ring, } static int -gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, +gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned flags) { + struct intel_engine_cs *ring = req->ring; bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE); int ret; - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; @@ -2234,13 +2253,14 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, } static int -hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, +hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned flags) { + struct intel_engine_cs *ring = req->ring; int ret; - ret = intel_ring_begin(ring, 2); + ret = intel_ring_begin(req, 2); if (ret) return ret; @@ -2256,13 +2276,14 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, } static int -gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring, +gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned flags) { + struct intel_engine_cs *ring = req->ring; int ret; - ret = intel_ring_begin(ring, 2); + ret = intel_ring_begin(req, 2); if (ret) return ret; @@ -2278,15 +2299,16 @@ gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring, /* Blitter support (SandyBridge+) */ -static int gen6_ring_flush(struct intel_engine_cs *ring, +static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 invalidate, u32 flush) { + struct intel_engine_cs *ring = req->ring; struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; uint32_t cmd; int ret; - ret = intel_ring_begin(ring, 4); + ret = intel_ring_begin(req, 4); if (ret) return ret; @@ -2315,7 +2337,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring, if (!invalidate && flush) { if (IS_GEN7(dev)) - return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN); + return gen7_ring_fbc_flush(req, FBC_REND_CACHE_CLEAN); else if (IS_BROADWELL(dev)) dev_priv->fbc.need_sw_cache_clean = true; } @@ -2696,14 +2718,15 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) } int -intel_ring_flush_all_caches(struct intel_engine_cs *ring) +intel_ring_flush_all_caches(struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = req->ring; int ret; if (!ring->gpu_caches_dirty) return 0; - ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); + ret = ring->flush(req, 0, I915_GEM_GPU_DOMAINS); if (ret) return ret; @@ -2714,8 +2737,9 @@ intel_ring_flush_all_caches(struct intel_engine_cs *ring) } int -intel_ring_invalidate_all_caches(struct intel_engine_cs *ring) +intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req) { + struct intel_engine_cs *ring = req->ring; uint32_t flush_domains; int ret; @@ -2723,7 +2747,7 @@ intel_ring_invalidate_all_caches(struct intel_engine_cs *ring) if (ring->gpu_caches_dirty) flush_domains = I915_GEM_GPU_DOMAINS; - ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); + ret = ring->flush(req, I915_GEM_GPU_DOMAINS, flush_domains); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 48cbb00..a7e47ad 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -154,15 +154,15 @@ struct intel_engine_cs { int (*init_hw)(struct intel_engine_cs *ring); - int (*init_context)(struct intel_engine_cs *ring, + int (*init_context)(struct drm_i915_gem_request *req, struct intel_context *ctx); void (*write_tail)(struct intel_engine_cs *ring, u32 value); - int __must_check (*flush)(struct intel_engine_cs *ring, + int __must_check (*flush)(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains); - int (*add_request)(struct intel_engine_cs *ring); + int (*add_request)(struct drm_i915_gem_request *req); /* Some chipsets are not quite as coherent as advertised and need * an expensive kick to force a true read of the up-to-date seqno. * However, the up-to-date seqno is not always required and the last @@ -173,7 +173,7 @@ struct intel_engine_cs { bool lazy_coherency); void (*set_seqno)(struct intel_engine_cs *ring, u32 seqno); - int (*dispatch_execbuffer)(struct intel_engine_cs *ring, + int (*dispatch_execbuffer)(struct drm_i915_gem_request *req, u64 offset, u32 length, unsigned dispatch_flags); #define I915_DISPATCH_SECURE 0x1 @@ -231,10 +231,10 @@ struct intel_engine_cs { }; /* AKA wait() */ - int (*sync_to)(struct intel_engine_cs *ring, - struct intel_engine_cs *to, + int (*sync_to)(struct drm_i915_gem_request *to_req, + struct intel_engine_cs *from, u32 seqno); - int (*signal)(struct intel_engine_cs *signaller, + int (*signal)(struct drm_i915_gem_request *signaller_req, /* num_dwords needed by caller */ unsigned int num_dwords); } semaphore; @@ -245,11 +245,11 @@ struct intel_engine_cs { struct list_head execlist_retired_req_list; u8 next_context_status_buffer; u32 irq_keep_mask; /* bitmask for interrupts that should not be masked */ - int (*emit_request)(struct intel_ringbuffer *ringbuf); - int (*emit_flush)(struct intel_ringbuffer *ringbuf, + int (*emit_request)(struct drm_i915_gem_request *req); + int (*emit_flush)(struct drm_i915_gem_request *req, u32 invalidate_domains, u32 flush_domains); - int (*emit_bb_start)(struct intel_ringbuffer *ringbuf, + int (*emit_bb_start)(struct drm_i915_gem_request *req, u64 offset, unsigned flags); /** @@ -433,10 +433,11 @@ int intel_alloc_ringbuffer_obj(struct drm_device *dev, void intel_stop_ring_buffer(struct intel_engine_cs *ring); void intel_cleanup_ring_buffer(struct intel_engine_cs *ring); -int __must_check intel_ring_begin(struct intel_engine_cs *ring, int n); -int __must_check intel_ring_cacheline_align(struct intel_engine_cs *ring); +int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n); +int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); int __must_check intel_ring_alloc_request(struct intel_engine_cs *ring, - struct intel_context *ctx); + struct intel_context *ctx, + struct drm_i915_gem_request **req_out); static inline void intel_ring_emit(struct intel_engine_cs *ring, u32 data) { @@ -457,8 +458,8 @@ void __intel_ring_advance(struct intel_engine_cs *ring); int __must_check intel_ring_idle(struct intel_engine_cs *ring); void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno); -int intel_ring_flush_all_caches(struct intel_engine_cs *ring); -int intel_ring_invalidate_all_caches(struct intel_engine_cs *ring); +int intel_ring_flush_all_caches(struct drm_i915_gem_request *req); +int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req); void intel_fini_pipe_control(struct intel_engine_cs *ring); int intel_init_pipe_control(struct intel_engine_cs *ring); @@ -479,11 +480,4 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf) return ringbuf->tail; } -static inline struct drm_i915_gem_request * -intel_ring_get_request(struct intel_engine_cs *ring) -{ - BUG_ON(ring->outstanding_lazy_request == NULL); - return ring->outstanding_lazy_request; -} - #endif /* _INTEL_RINGBUFFER_H_ */