Message ID | 20210203124740.9354-1-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915: Prevent waiting inside ring construction for critical sections | expand |
Hi Chris, Thank you for the patch! Yet something to improve: [auto build test ERROR on drm-intel/for-linux-next] [also build test ERROR on drm-tip/drm-tip v5.11-rc6 next-20210125] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-Prevent-waiting-inside-ring-construction-for-critical-sections/20210203-204914 base: git://anongit.freedesktop.org/drm-intel for-linux-next config: x86_64-rhel (attached as .config) compiler: gcc-9 (Debian 9.3.0-15) 9.3.0 reproduce (this is a W=1 build): # https://github.com/0day-ci/linux/commit/7930da83ebb0a7bdfaba6f8f2fc96e3c2ec34a78 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Chris-Wilson/drm-i915-Prevent-waiting-inside-ring-construction-for-critical-sections/20210203-204914 git checkout 7930da83ebb0a7bdfaba6f8f2fc96e3c2ec34a78 # save the attached .config to linux build tree make W=1 ARCH=x86_64 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c: In function 'i915_gem_do_execbuffer': >> drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:3298:10: error: 'struct drm_file' has no member named 'f_flags' 3298 | if (file->f_flags & O_NONBLOCK) | ^~ -- drivers/gpu/drm/i915/i915_request.c: In function '__i915_request_create': >> drivers/gpu/drm/i915/i915_request.c:854:7: error: 'flags' undeclared (first use in this function) 854 | if ((flags & __GFP_RECLAIM) == 0) | ^~~~~ drivers/gpu/drm/i915/i915_request.c:854:7: note: each undeclared identifier is reported only once for each function it appears in vim +3298 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 3240 3241 if (args->flags & I915_EXEC_FENCE_OUT) { 3242 out_fence_fd = get_unused_fd_flags(O_CLOEXEC); 3243 if (out_fence_fd < 0) { 3244 err = out_fence_fd; 3245 goto err_in_fence; 3246 } 3247 } 3248 3249 err = eb_create(&eb); 3250 if (err) 3251 goto err_out_fence; 3252 3253 GEM_BUG_ON(!eb.lut_size); 3254 3255 err = eb_select_context(&eb); 3256 if (unlikely(err)) 3257 goto err_destroy; 3258 3259 err = eb_select_engine(&eb); 3260 if (unlikely(err)) 3261 goto err_context; 3262 3263 err = eb_lookup_vmas(&eb); 3264 if (err) { 3265 eb_release_vmas(&eb, true); 3266 goto err_engine; 3267 } 3268 3269 i915_gem_ww_ctx_init(&eb.ww, true); 3270 3271 err = eb_relocate_parse(&eb); 3272 if (err) { 3273 /* 3274 * If the user expects the execobject.offset and 3275 * reloc.presumed_offset to be an exact match, 3276 * as for using NO_RELOC, then we cannot update 3277 * the execobject.offset until we have completed 3278 * relocation. 3279 */ 3280 args->flags &= ~__EXEC_HAS_RELOC; 3281 goto err_vma; 3282 } 3283 3284 ww_acquire_done(&eb.ww.ctx); 3285 3286 batch = eb.batch->vma; 3287 3288 /* All GPU relocation batches must be submitted prior to the user rq */ 3289 GEM_BUG_ON(eb.reloc_cache.rq); 3290 3291 /* Allocate a request for this batch buffer nice and early. */ 3292 eb.request = i915_request_create(eb.context); 3293 if (IS_ERR(eb.request)) { 3294 err = PTR_ERR(eb.request); 3295 goto err_vma; 3296 } 3297 > 3298 if (file->f_flags & O_NONBLOCK) 3299 i915_request_set_nowait(eb.request); 3300 3301 if (in_fence) { 3302 if (args->flags & I915_EXEC_FENCE_SUBMIT) 3303 err = i915_request_await_execution(eb.request, 3304 in_fence, 3305 eb.engine->bond_execute); 3306 else 3307 err = i915_request_await_dma_fence(eb.request, 3308 in_fence); 3309 if (err < 0) 3310 goto err_request; 3311 } 3312 3313 if (eb.fences) { 3314 err = await_fence_array(&eb); 3315 if (err) 3316 goto err_request; 3317 } 3318 3319 if (out_fence_fd != -1) { 3320 out_fence = sync_file_create(&eb.request->fence); 3321 if (!out_fence) { 3322 err = -ENOMEM; 3323 goto err_request; 3324 } 3325 } 3326 3327 /* 3328 * Whilst this request exists, batch_obj will be on the 3329 * active_list, and so will hold the active reference. Only when this 3330 * request is retired will the the batch_obj be moved onto the 3331 * inactive_list and lose its active reference. Hence we do not need 3332 * to explicitly hold another reference here. 3333 */ 3334 eb.request->batch = batch; 3335 if (eb.batch_pool) 3336 intel_gt_buffer_pool_mark_active(eb.batch_pool, eb.request); 3337 3338 trace_i915_request_queue(eb.request, eb.batch_flags); 3339 err = eb_submit(&eb, batch); 3340 err_request: 3341 i915_request_get(eb.request); 3342 err = eb_request_add(&eb, err); 3343 3344 if (eb.fences) 3345 signal_fence_array(&eb); 3346 3347 if (out_fence) { 3348 if (err == 0) { 3349 fd_install(out_fence_fd, out_fence->file); 3350 args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */ 3351 args->rsvd2 |= (u64)out_fence_fd << 32; 3352 out_fence_fd = -1; 3353 } else { 3354 fput(out_fence->file); 3355 } 3356 } 3357 i915_request_put(eb.request); 3358 3359 err_vma: 3360 eb_release_vmas(&eb, true); 3361 if (eb.trampoline) 3362 i915_vma_unpin(eb.trampoline); 3363 WARN_ON(err == -EDEADLK); 3364 i915_gem_ww_ctx_fini(&eb.ww); 3365 3366 if (eb.batch_pool) 3367 intel_gt_buffer_pool_put(eb.batch_pool); 3368 if (eb.reloc_pool) 3369 intel_gt_buffer_pool_put(eb.reloc_pool); 3370 if (eb.reloc_context) 3371 intel_context_put(eb.reloc_context); 3372 err_engine: 3373 eb_put_engine(&eb); 3374 err_context: 3375 i915_gem_context_put(eb.gem_context); 3376 err_destroy: 3377 eb_destroy(&eb); 3378 err_out_fence: 3379 if (out_fence_fd != -1) 3380 put_unused_fd(out_fence_fd); 3381 err_in_fence: 3382 dma_fence_put(in_fence); 3383 err_ext: 3384 put_fence_array(eb.fences, eb.num_fences); 3385 return err; 3386 } 3387 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index fe170186dd42..903eccad7ae2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -3289,6 +3289,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } + if (file->f_flags & O_NONBLOCK) + i915_request_set_nowait(eb.request); + if (in_fence) { if (args->flags & I915_EXEC_FENCE_SUBMIT) err = i915_request_await_execution(eb.request, diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index aee0a77c77e0..9f149fdc8416 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -184,9 +184,10 @@ void intel_ring_free(struct kref *ref) static noinline int wait_for_space(struct intel_ring *ring, - struct intel_timeline *tl, + struct i915_request *rq, unsigned int bytes) { + struct intel_timeline *tl = i915_request_timeline(rq); struct i915_request *target; long timeout; @@ -207,11 +208,13 @@ wait_for_space(struct intel_ring *ring, if (GEM_WARN_ON(&target->link == &tl->requests)) return -ENOSPC; - timeout = i915_request_wait(target, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (timeout < 0) - return timeout; + timeout = MAX_SCHEDULE_TIMEOUT; + if (i915_request_nowait(rq)) + timeout = 0; + + timeout = i915_request_wait(target, I915_WAIT_INTERRUPTIBLE, timeout); + if (unlikely(timeout < 0)) + return i915_request_nowait(rq) ? -EWOULDBLOCK : timeout; i915_request_retire_upto(target); @@ -271,9 +274,7 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) */ GEM_BUG_ON(!rq->reserved_space); - ret = wait_for_space(ring, - i915_request_timeline(rq), - total_bytes); + ret = wait_for_space(ring, rq, total_bytes); if (unlikely(ret)) return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a336d6c40d8b..44b4c2a9f454 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -857,6 +857,8 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) kref_init(&rq->fence.refcount); rq->fence.flags = 0; + if ((flags & __GFP_RECLAIM) == 0) + __set_bit(I915_FENCE_FLAG_NOWAIT, &rq->fence.flags); rq->fence.error = 0; INIT_LIST_HEAD(&rq->fence.cb_list); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index c0bd4cb8786a..4cecfecc82e5 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -137,6 +137,18 @@ enum { * the GPU. Here we track such boost requests on a per-request basis. */ I915_FENCE_FLAG_BOOST, + + /* + * I915_FENCE_FLAG_NOWAIT - avoid waits while constructing the request + * + * We may wish to construct a request from some contexts where + * we do not want to wait, and sometimes the client would prefer + * to have a nonblocking interface. We may have to wait in a few place + * during request construction (e.g. waiting for space in the + * ringbuffer), this flag allows us to opt out of those waits and + * return -EAGAIN instead. + */ + I915_FENCE_FLAG_NOWAIT, }; /** @@ -558,6 +570,16 @@ static inline void i915_request_mark_complete(struct i915_request *rq) (u32 *)&rq->fence.seqno); } +static inline bool i915_request_nowait(const struct i915_request *rq) +{ + return test_bit(I915_FENCE_FLAG_NOWAIT, &rq->fence.flags); +} + +static inline void i915_request_set_nowait(struct i915_request *rq) +{ + set_bit(I915_FENCE_FLAG_NOWAIT, &rq->fence.flags); +} + static inline bool i915_request_has_waitboost(const struct i915_request *rq) { return test_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags);
From some contexts, we may not be allowed to wait during request construction. For example, in the powermanagement handler that should never block (as the engine was idle) and the driver would be crippled if we did. Similarly, the user may request that the execbuf does not block, and so would prefer to handle an EWOULDBLOCK error instead. In both cases we need to propagate the flag to various blocking wait points, the first and usually hit is intel_ring::wait_for_space(). Testcase: igt/gem_ctx_ringsize/spin Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 3 +++ drivers/gpu/drm/i915/gt/intel_ring.c | 19 ++++++++-------- drivers/gpu/drm/i915/i915_request.c | 2 ++ drivers/gpu/drm/i915/i915_request.h | 22 +++++++++++++++++++ 4 files changed, 37 insertions(+), 9 deletions(-)