Message ID | 20210506191451.77768-56-matthew.brost@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Basic GuC submission support in the i915 | expand |
On 06/05/2021 20:14, Matthew Brost wrote: > When running the GuC the GPU can't be considered idle if the GuC still > has contexts pinned. As such, a call has been added in > intel_gt_wait_for_idle to idle the UC and in turn the GuC by waiting for > the number of unpinned contexts to go to zero. > > Cc: John Harrison <john.c.harrison@intel.com> > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > --- > drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- > drivers/gpu/drm/i915/gt/intel_gt.c | 18 ++++ > drivers/gpu/drm/i915/gt/intel_gt.h | 2 + > drivers/gpu/drm/i915/gt/intel_gt_requests.c | 22 ++--- > drivers/gpu/drm/i915/gt/intel_gt_requests.h | 7 +- > drivers/gpu/drm/i915/gt/uc/intel_guc.h | 4 + > drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 1 + > drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 4 + > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 91 ++++++++++++++++++- > drivers/gpu/drm/i915/gt/uc/intel_uc.h | 5 + > drivers/gpu/drm/i915/i915_debugfs.c | 1 + > drivers/gpu/drm/i915/i915_gem_evict.c | 1 + > .../gpu/drm/i915/selftests/igt_live_test.c | 2 +- > .../gpu/drm/i915/selftests/mock_gem_device.c | 3 +- > 14 files changed, 137 insertions(+), 27 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c > index 8598a1c78a4c..2f5295c9408d 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c > @@ -634,7 +634,8 @@ mmap_offset_attach(struct drm_i915_gem_object *obj, > goto insert; > > /* Attempt to reap some mmap space from dead objects */ > - err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT); > + err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT, > + NULL); > if (err) > goto err; > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c > index 8d77dcbad059..1742a8561f69 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt.c > +++ b/drivers/gpu/drm/i915/gt/intel_gt.c > @@ -574,6 +574,24 @@ static void __intel_gt_disable(struct intel_gt *gt) > GEM_BUG_ON(intel_gt_pm_is_awake(gt)); > } > > +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) > +{ > + long rtimeout; > + > + /* If the device is asleep, we have no requests outstanding */ > + if (!intel_gt_pm_is_awake(gt)) > + return 0; > + > + while ((timeout = intel_gt_retire_requests_timeout(gt, timeout, > + &rtimeout)) > 0) { > + cond_resched(); > + if (signal_pending(current)) > + return -EINTR; > + } > + > + return timeout ? timeout : intel_uc_wait_for_idle(>->uc, rtimeout); > +} > + > int intel_gt_init(struct intel_gt *gt) > { > int err; > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h > index 7ec395cace69..c775043334bf 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt.h > +++ b/drivers/gpu/drm/i915/gt/intel_gt.h > @@ -48,6 +48,8 @@ void intel_gt_driver_release(struct intel_gt *gt); > > void intel_gt_driver_late_release(struct intel_gt *gt); > > +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); > + > void intel_gt_check_and_clear_faults(struct intel_gt *gt); > void intel_gt_clear_error_registers(struct intel_gt *gt, > intel_engine_mask_t engine_mask); > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c > index 647eca9d867a..c6c702f236fa 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c > +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c > @@ -13,6 +13,7 @@ > #include "intel_gt_pm.h" > #include "intel_gt_requests.h" > #include "intel_timeline.h" > +#include "uc/intel_uc.h" > > static bool retire_requests(struct intel_timeline *tl) > { > @@ -130,7 +131,8 @@ void intel_engine_fini_retire(struct intel_engine_cs *engine) > GEM_BUG_ON(engine->retire); > } > > -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) > +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout, > + long *rtimeout) What is 'rtimeout', I know remaining, but it can be more self-descriptive to start with. It feels a bit churny for what it is. How plausible would be alternatives to either change existing timeout to in/out, or measure sleep internally in this function, or just risk sleeping twice as long by passing the original timeout to uc idle as well? > { > struct intel_gt_timelines *timelines = >->timelines; > struct intel_timeline *tl, *tn; > @@ -195,22 +197,10 @@ out_active: spin_lock(&timelines->lock); > if (flush_submission(gt, timeout)) /* Wait, there's more! */ > active_count++; > > - return active_count ? timeout : 0; > -} > - > -int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) > -{ > - /* If the device is asleep, we have no requests outstanding */ > - if (!intel_gt_pm_is_awake(gt)) > - return 0; > - > - while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) { > - cond_resched(); > - if (signal_pending(current)) > - return -EINTR; > - } > + if (rtimeout) > + *rtimeout = timeout; > > - return timeout; > + return active_count ? timeout : 0; > } > > static void retire_work_handler(struct work_struct *work) > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h > index fcc30a6e4fe9..4419787124e2 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h > +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h > @@ -10,10 +10,11 @@ struct intel_engine_cs; > struct intel_gt; > struct intel_timeline; > > -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); > +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout, > + long *rtimeout); > static inline void intel_gt_retire_requests(struct intel_gt *gt) > { > - intel_gt_retire_requests_timeout(gt, 0); > + intel_gt_retire_requests_timeout(gt, 0, NULL); > } > > void intel_engine_init_retire(struct intel_engine_cs *engine); > @@ -21,8 +22,6 @@ void intel_engine_add_retire(struct intel_engine_cs *engine, > struct intel_timeline *tl); > void intel_engine_fini_retire(struct intel_engine_cs *engine); > > -int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); > - > void intel_gt_init_requests(struct intel_gt *gt); > void intel_gt_park_requests(struct intel_gt *gt); > void intel_gt_unpark_requests(struct intel_gt *gt); > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > index 485e98f3f304..47eaa69809e8 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > @@ -38,6 +38,8 @@ struct intel_guc { > spinlock_t irq_lock; > unsigned int msg_enabled_mask; > > + atomic_t outstanding_submission_g2h; > + > struct { > bool enabled; > void (*reset)(struct intel_guc *guc); > @@ -239,6 +241,8 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask) > spin_unlock_irq(&guc->irq_lock); > } > > +int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout); > + > int intel_guc_reset_engine(struct intel_guc *guc, > struct intel_engine_cs *engine); > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > index f1893030ca88..cf701056fa14 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > @@ -111,6 +111,7 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct) > INIT_LIST_HEAD(&ct->requests.incoming); > INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func); > tasklet_init(&ct->receive_tasklet, ct_receive_tasklet_func, (unsigned long)ct); > + init_waitqueue_head(&ct->wq); > } > > static inline const char *guc_ct_buffer_type_to_str(u32 type) > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h > index 660bf37238e2..ab1b79ab960b 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h > @@ -10,6 +10,7 @@ > #include <linux/spinlock.h> > #include <linux/workqueue.h> > #include <linux/ktime.h> > +#include <linux/wait.h> > > #include "intel_guc_fwif.h" > > @@ -68,6 +69,9 @@ struct intel_guc_ct { > > struct tasklet_struct receive_tasklet; > > + /** @wq: wait queue for g2h chanenl */ > + wait_queue_head_t wq; > + > struct { > u16 last_fence; /* last fence used to send request */ > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > index ae0b386467e3..0ff7dd6d337d 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > @@ -253,6 +253,74 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, > xa_store_irq(&guc->context_lookup, id, ce, GFP_ATOMIC); > } > > +static int guc_submission_busy_loop(struct intel_guc* guc, > + const u32 *action, > + u32 len, > + u32 g2h_len_dw, > + bool loop) > +{ > + int err; > + > + err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); > + > + if (!err && g2h_len_dw) > + atomic_inc(&guc->outstanding_submission_g2h); > + > + return err; > +} > + > +static int guc_wait_for_pending_msg(struct intel_guc *guc, > + atomic_t *wait_var, > + bool interruptible, > + long timeout) > +{ > + const int state = interruptible ? > + TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; > + DEFINE_WAIT(wait); > + > + might_sleep(); > + GEM_BUG_ON(timeout < 0); > + > + if (!atomic_read(wait_var)) > + return 0; > + > + if (!timeout) > + return -ETIME; > + > + for (;;) { > + prepare_to_wait(&guc->ct.wq, &wait, state); > + > + if (!atomic_read(wait_var)) > + break; > + > + if (signal_pending_state(state, current)) { > + timeout = -ERESTARTSYS; > + break; > + } > + > + if (!timeout) { > + timeout = -ETIME; > + break; > + } > + > + timeout = io_schedule_timeout(timeout); > + } > + finish_wait(&guc->ct.wq, &wait); > + > + return (timeout < 0) ? timeout : 0; > +} See if it is possible to simplify all this with wait_var_event and wake_up_var. > + > +int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) > +{ > + bool interruptible = true; > + > + if (unlikely(timeout < 0)) > + timeout = -timeout, interruptible = false; > + > + return guc_wait_for_pending_msg(guc, &guc->outstanding_submission_g2h, > + interruptible, timeout); > +} > + > static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) > { > int err; > @@ -279,6 +347,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) > > err = intel_guc_send_nb(guc, action, len, g2h_len_dw); > if (!enabled && !err) { > + atomic_inc(&guc->outstanding_submission_g2h); > set_context_enabled(ce); > } else if (!enabled) { > clr_context_pending_enable(ce); > @@ -734,7 +803,7 @@ static int __guc_action_register_context(struct intel_guc *guc, > offset, > }; > > - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); > + return guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); > } > > static int register_context(struct intel_context *ce) > @@ -754,7 +823,7 @@ static int __guc_action_deregister_context(struct intel_guc *guc, > guc_id, > }; > > - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), > + return guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), > G2H_LEN_DW_DEREGISTER_CONTEXT, true); > } > > @@ -871,7 +940,9 @@ static int guc_context_pin(struct intel_context *ce, void *vaddr) > > static void guc_context_unpin(struct intel_context *ce) > { > - unpin_guc_id(ce_to_guc(ce), ce); > + struct intel_guc *guc = ce_to_guc(ce); > + > + unpin_guc_id(guc, ce); > lrc_unpin(ce); > } > > @@ -894,7 +965,7 @@ static void __guc_context_sched_disable(struct intel_guc *guc, > > intel_context_get(ce); > > - intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), > + guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), > G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); > } > > @@ -1437,6 +1508,15 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) > return ce; > } > > +static void decr_outstanding_submission_g2h(struct intel_guc *guc) > +{ > + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) { > + smp_mb(); > + if (waitqueue_active(&guc->ct.wq)) > + wake_up_all(&guc->ct.wq); I keep pointing out this pattern is racy and at least needs comment why it is safe. Regards, Tvrtko > + } > +} > + > int intel_guc_deregister_done_process_msg(struct intel_guc *guc, > const u32 *msg, > u32 len) > @@ -1472,6 +1552,8 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, > lrc_destroy(&ce->ref); > } > > + decr_outstanding_submission_g2h(guc); > + > return 0; > } > > @@ -1520,6 +1602,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, > spin_unlock_irqrestore(&ce->guc_state.lock, flags); > } > > + decr_outstanding_submission_g2h(guc); > intel_context_put(ce); > > return 0; > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h > index 9c954c589edf..c4cef885e984 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h > +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h > @@ -81,6 +81,11 @@ uc_state_checkers(guc, guc_submission); > #undef uc_state_checkers > #undef __uc_state_checker > > +static inline int intel_uc_wait_for_idle(struct intel_uc *uc, long timeout) > +{ > + return intel_guc_wait_for_idle(&uc->guc, timeout); > +} > + > #define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \ > static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \ > { \ > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 8dd374691102..bb29838d1cd7 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -36,6 +36,7 @@ > #include "gt/intel_gt_clock_utils.h" > #include "gt/intel_gt.h" > #include "gt/intel_gt_pm.h" > +#include "gt/intel_gt.h" > #include "gt/intel_gt_requests.h" > #include "gt/intel_reset.h" > #include "gt/intel_rc6.h" > diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c > index 4d2d59a9942b..2b73ddb11c66 100644 > --- a/drivers/gpu/drm/i915/i915_gem_evict.c > +++ b/drivers/gpu/drm/i915/i915_gem_evict.c > @@ -27,6 +27,7 @@ > */ > > #include "gem/i915_gem_context.h" > +#include "gt/intel_gt.h" > #include "gt/intel_gt_requests.h" > > #include "i915_drv.h" > diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c > index c130010a7033..1c721542e277 100644 > --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c > +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c > @@ -5,7 +5,7 @@ > */ > > #include "i915_drv.h" > -#include "gt/intel_gt_requests.h" > +#include "gt/intel_gt.h" > > #include "../i915_selftest.h" > #include "igt_flush_test.h" > diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c > index cf40004bc92a..6c06816e2b99 100644 > --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c > +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c > @@ -51,7 +51,8 @@ void mock_device_flush(struct drm_i915_private *i915) > do { > for_each_engine(engine, gt, id) > mock_engine_flush(engine); > - } while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT)); > + } while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT, > + NULL)); > } > > static void mock_device_release(struct drm_device *dev) >
On Tue, May 25, 2021 at 11:06:00AM +0100, Tvrtko Ursulin wrote: > > On 06/05/2021 20:14, Matthew Brost wrote: > > When running the GuC the GPU can't be considered idle if the GuC still > > has contexts pinned. As such, a call has been added in > > intel_gt_wait_for_idle to idle the UC and in turn the GuC by waiting for > > the number of unpinned contexts to go to zero. > > > > Cc: John Harrison <john.c.harrison@intel.com> > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > > --- > > drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- > > drivers/gpu/drm/i915/gt/intel_gt.c | 18 ++++ > > drivers/gpu/drm/i915/gt/intel_gt.h | 2 + > > drivers/gpu/drm/i915/gt/intel_gt_requests.c | 22 ++--- > > drivers/gpu/drm/i915/gt/intel_gt_requests.h | 7 +- > > drivers/gpu/drm/i915/gt/uc/intel_guc.h | 4 + > > drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 1 + > > drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 4 + > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 91 ++++++++++++++++++- > > drivers/gpu/drm/i915/gt/uc/intel_uc.h | 5 + > > drivers/gpu/drm/i915/i915_debugfs.c | 1 + > > drivers/gpu/drm/i915/i915_gem_evict.c | 1 + > > .../gpu/drm/i915/selftests/igt_live_test.c | 2 +- > > .../gpu/drm/i915/selftests/mock_gem_device.c | 3 +- > > 14 files changed, 137 insertions(+), 27 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c > > index 8598a1c78a4c..2f5295c9408d 100644 > > --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c > > @@ -634,7 +634,8 @@ mmap_offset_attach(struct drm_i915_gem_object *obj, > > goto insert; > > /* Attempt to reap some mmap space from dead objects */ > > - err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT); > > + err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT, > > + NULL); > > if (err) > > goto err; > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c > > index 8d77dcbad059..1742a8561f69 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_gt.c > > +++ b/drivers/gpu/drm/i915/gt/intel_gt.c > > @@ -574,6 +574,24 @@ static void __intel_gt_disable(struct intel_gt *gt) > > GEM_BUG_ON(intel_gt_pm_is_awake(gt)); > > } > > +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) > > +{ > > + long rtimeout; > > + > > + /* If the device is asleep, we have no requests outstanding */ > > + if (!intel_gt_pm_is_awake(gt)) > > + return 0; > > + > > + while ((timeout = intel_gt_retire_requests_timeout(gt, timeout, > > + &rtimeout)) > 0) { > > + cond_resched(); > > + if (signal_pending(current)) > > + return -EINTR; > > + } > > + > > + return timeout ? timeout : intel_uc_wait_for_idle(>->uc, rtimeout); > > +} > > + > > int intel_gt_init(struct intel_gt *gt) > > { > > int err; > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h > > index 7ec395cace69..c775043334bf 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_gt.h > > +++ b/drivers/gpu/drm/i915/gt/intel_gt.h > > @@ -48,6 +48,8 @@ void intel_gt_driver_release(struct intel_gt *gt); > > void intel_gt_driver_late_release(struct intel_gt *gt); > > +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); > > + > > void intel_gt_check_and_clear_faults(struct intel_gt *gt); > > void intel_gt_clear_error_registers(struct intel_gt *gt, > > intel_engine_mask_t engine_mask); > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c > > index 647eca9d867a..c6c702f236fa 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c > > @@ -13,6 +13,7 @@ > > #include "intel_gt_pm.h" > > #include "intel_gt_requests.h" > > #include "intel_timeline.h" > > +#include "uc/intel_uc.h" > > static bool retire_requests(struct intel_timeline *tl) > > { > > @@ -130,7 +131,8 @@ void intel_engine_fini_retire(struct intel_engine_cs *engine) > > GEM_BUG_ON(engine->retire); > > } > > -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) > > +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout, > > + long *rtimeout) > > What is 'rtimeout', I know remaining, but it can be more self-descriptive to > start with. > 'remaining_timeout' it is. > It feels a bit churny for what it is. How plausible would be alternatives to > either change existing timeout to in/out, or measure sleep internally in > this function, or just risk sleeping twice as long by passing the original > timeout to uc idle as well? > Originally had it just passing in the same value, got review feedback saying I should pass in the adjusted value. Hard to make everyone happy. > > { > > struct intel_gt_timelines *timelines = >->timelines; > > struct intel_timeline *tl, *tn; > > @@ -195,22 +197,10 @@ out_active: spin_lock(&timelines->lock); > > if (flush_submission(gt, timeout)) /* Wait, there's more! */ > > active_count++; > > - return active_count ? timeout : 0; > > -} > > - > > -int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) > > -{ > > - /* If the device is asleep, we have no requests outstanding */ > > - if (!intel_gt_pm_is_awake(gt)) > > - return 0; > > - > > - while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) { > > - cond_resched(); > > - if (signal_pending(current)) > > - return -EINTR; > > - } > > + if (rtimeout) > > + *rtimeout = timeout; > > - return timeout; > > + return active_count ? timeout : 0; > > } > > static void retire_work_handler(struct work_struct *work) > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h > > index fcc30a6e4fe9..4419787124e2 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h > > @@ -10,10 +10,11 @@ struct intel_engine_cs; > > struct intel_gt; > > struct intel_timeline; > > -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); > > +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout, > > + long *rtimeout); > > static inline void intel_gt_retire_requests(struct intel_gt *gt) > > { > > - intel_gt_retire_requests_timeout(gt, 0); > > + intel_gt_retire_requests_timeout(gt, 0, NULL); > > } > > void intel_engine_init_retire(struct intel_engine_cs *engine); > > @@ -21,8 +22,6 @@ void intel_engine_add_retire(struct intel_engine_cs *engine, > > struct intel_timeline *tl); > > void intel_engine_fini_retire(struct intel_engine_cs *engine); > > -int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); > > - > > void intel_gt_init_requests(struct intel_gt *gt); > > void intel_gt_park_requests(struct intel_gt *gt); > > void intel_gt_unpark_requests(struct intel_gt *gt); > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > index 485e98f3f304..47eaa69809e8 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > @@ -38,6 +38,8 @@ struct intel_guc { > > spinlock_t irq_lock; > > unsigned int msg_enabled_mask; > > + atomic_t outstanding_submission_g2h; > > + > > struct { > > bool enabled; > > void (*reset)(struct intel_guc *guc); > > @@ -239,6 +241,8 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask) > > spin_unlock_irq(&guc->irq_lock); > > } > > +int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout); > > + > > int intel_guc_reset_engine(struct intel_guc *guc, > > struct intel_engine_cs *engine); > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > index f1893030ca88..cf701056fa14 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > @@ -111,6 +111,7 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct) > > INIT_LIST_HEAD(&ct->requests.incoming); > > INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func); > > tasklet_init(&ct->receive_tasklet, ct_receive_tasklet_func, (unsigned long)ct); > > + init_waitqueue_head(&ct->wq); > > } > > static inline const char *guc_ct_buffer_type_to_str(u32 type) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h > > index 660bf37238e2..ab1b79ab960b 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h > > @@ -10,6 +10,7 @@ > > #include <linux/spinlock.h> > > #include <linux/workqueue.h> > > #include <linux/ktime.h> > > +#include <linux/wait.h> > > #include "intel_guc_fwif.h" > > @@ -68,6 +69,9 @@ struct intel_guc_ct { > > struct tasklet_struct receive_tasklet; > > + /** @wq: wait queue for g2h chanenl */ > > + wait_queue_head_t wq; > > + > > struct { > > u16 last_fence; /* last fence used to send request */ > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > index ae0b386467e3..0ff7dd6d337d 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > @@ -253,6 +253,74 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, > > xa_store_irq(&guc->context_lookup, id, ce, GFP_ATOMIC); > > } > > +static int guc_submission_busy_loop(struct intel_guc* guc, > > + const u32 *action, > > + u32 len, > > + u32 g2h_len_dw, > > + bool loop) > > +{ > > + int err; > > + > > + err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); > > + > > + if (!err && g2h_len_dw) > > + atomic_inc(&guc->outstanding_submission_g2h); > > + > > + return err; > > +} > > + > > +static int guc_wait_for_pending_msg(struct intel_guc *guc, > > + atomic_t *wait_var, > > + bool interruptible, > > + long timeout) > > +{ > > + const int state = interruptible ? > > + TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; > > + DEFINE_WAIT(wait); > > + > > + might_sleep(); > > + GEM_BUG_ON(timeout < 0); > > + > > + if (!atomic_read(wait_var)) > > + return 0; > > + > > + if (!timeout) > > + return -ETIME; > > + > > + for (;;) { > > + prepare_to_wait(&guc->ct.wq, &wait, state); > > + > > + if (!atomic_read(wait_var)) > > + break; > > + > > + if (signal_pending_state(state, current)) { > > + timeout = -ERESTARTSYS; > > + break; > > + } > > + > > + if (!timeout) { > > + timeout = -ETIME; > > + break; > > + } > > + > > + timeout = io_schedule_timeout(timeout); > > + } > > + finish_wait(&guc->ct.wq, &wait); > > + > > + return (timeout < 0) ? timeout : 0; > > +} > > See if it is possible to simplify all this with wait_var_event and > wake_up_var. > Let me check on that. > > + > > +int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) > > +{ > > + bool interruptible = true; > > + > > + if (unlikely(timeout < 0)) > > + timeout = -timeout, interruptible = false; > > + > > + return guc_wait_for_pending_msg(guc, &guc->outstanding_submission_g2h, > > + interruptible, timeout); > > +} > > + > > static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) > > { > > int err; > > @@ -279,6 +347,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) > > err = intel_guc_send_nb(guc, action, len, g2h_len_dw); > > if (!enabled && !err) { > > + atomic_inc(&guc->outstanding_submission_g2h); > > set_context_enabled(ce); > > } else if (!enabled) { > > clr_context_pending_enable(ce); > > @@ -734,7 +803,7 @@ static int __guc_action_register_context(struct intel_guc *guc, > > offset, > > }; > > - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); > > + return guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); > > } > > static int register_context(struct intel_context *ce) > > @@ -754,7 +823,7 @@ static int __guc_action_deregister_context(struct intel_guc *guc, > > guc_id, > > }; > > - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), > > + return guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), > > G2H_LEN_DW_DEREGISTER_CONTEXT, true); > > } > > @@ -871,7 +940,9 @@ static int guc_context_pin(struct intel_context *ce, void *vaddr) > > static void guc_context_unpin(struct intel_context *ce) > > { > > - unpin_guc_id(ce_to_guc(ce), ce); > > + struct intel_guc *guc = ce_to_guc(ce); > > + > > + unpin_guc_id(guc, ce); > > lrc_unpin(ce); > > } > > @@ -894,7 +965,7 @@ static void __guc_context_sched_disable(struct intel_guc *guc, > > intel_context_get(ce); > > - intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), > > + guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), > > G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); > > } > > @@ -1437,6 +1508,15 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) > > return ce; > > } > > +static void decr_outstanding_submission_g2h(struct intel_guc *guc) > > +{ > > + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) { > > + smp_mb(); > > + if (waitqueue_active(&guc->ct.wq)) > > + wake_up_all(&guc->ct.wq); > > I keep pointing out this pattern is racy and at least needs comment why it > is safe. > There is a comment in wake queue code header saying why this is safe. I don't think we need to repeat this here. Matt > Regards, > > Tvrtko > > > + } > > +} > > + > > int intel_guc_deregister_done_process_msg(struct intel_guc *guc, > > const u32 *msg, > > u32 len) > > @@ -1472,6 +1552,8 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, > > lrc_destroy(&ce->ref); > > } > > + decr_outstanding_submission_g2h(guc); > > + > > return 0; > > } > > @@ -1520,6 +1602,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, > > spin_unlock_irqrestore(&ce->guc_state.lock, flags); > > } > > + decr_outstanding_submission_g2h(guc); > > intel_context_put(ce); > > return 0; > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h > > index 9c954c589edf..c4cef885e984 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h > > @@ -81,6 +81,11 @@ uc_state_checkers(guc, guc_submission); > > #undef uc_state_checkers > > #undef __uc_state_checker > > +static inline int intel_uc_wait_for_idle(struct intel_uc *uc, long timeout) > > +{ > > + return intel_guc_wait_for_idle(&uc->guc, timeout); > > +} > > + > > #define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \ > > static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \ > > { \ > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > > index 8dd374691102..bb29838d1cd7 100644 > > --- a/drivers/gpu/drm/i915/i915_debugfs.c > > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > > @@ -36,6 +36,7 @@ > > #include "gt/intel_gt_clock_utils.h" > > #include "gt/intel_gt.h" > > #include "gt/intel_gt_pm.h" > > +#include "gt/intel_gt.h" > > #include "gt/intel_gt_requests.h" > > #include "gt/intel_reset.h" > > #include "gt/intel_rc6.h" > > diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c > > index 4d2d59a9942b..2b73ddb11c66 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_evict.c > > +++ b/drivers/gpu/drm/i915/i915_gem_evict.c > > @@ -27,6 +27,7 @@ > > */ > > #include "gem/i915_gem_context.h" > > +#include "gt/intel_gt.h" > > #include "gt/intel_gt_requests.h" > > #include "i915_drv.h" > > diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c > > index c130010a7033..1c721542e277 100644 > > --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c > > +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c > > @@ -5,7 +5,7 @@ > > */ > > #include "i915_drv.h" > > -#include "gt/intel_gt_requests.h" > > +#include "gt/intel_gt.h" > > #include "../i915_selftest.h" > > #include "igt_flush_test.h" > > diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c > > index cf40004bc92a..6c06816e2b99 100644 > > --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c > > +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c > > @@ -51,7 +51,8 @@ void mock_device_flush(struct drm_i915_private *i915) > > do { > > for_each_engine(engine, gt, id) > > mock_engine_flush(engine); > > - } while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT)); > > + } while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT, > > + NULL)); > > } > > static void mock_device_release(struct drm_device *dev) > >
On 25/05/2021 18:07, Matthew Brost wrote: > On Tue, May 25, 2021 at 11:06:00AM +0100, Tvrtko Ursulin wrote: >> >> On 06/05/2021 20:14, Matthew Brost wrote: >>> When running the GuC the GPU can't be considered idle if the GuC still >>> has contexts pinned. As such, a call has been added in >>> intel_gt_wait_for_idle to idle the UC and in turn the GuC by waiting for >>> the number of unpinned contexts to go to zero. >>> >>> Cc: John Harrison <john.c.harrison@intel.com> >>> Signed-off-by: Matthew Brost <matthew.brost@intel.com> >>> --- >>> drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- >>> drivers/gpu/drm/i915/gt/intel_gt.c | 18 ++++ >>> drivers/gpu/drm/i915/gt/intel_gt.h | 2 + >>> drivers/gpu/drm/i915/gt/intel_gt_requests.c | 22 ++--- >>> drivers/gpu/drm/i915/gt/intel_gt_requests.h | 7 +- >>> drivers/gpu/drm/i915/gt/uc/intel_guc.h | 4 + >>> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 1 + >>> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 4 + >>> .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 91 ++++++++++++++++++- >>> drivers/gpu/drm/i915/gt/uc/intel_uc.h | 5 + >>> drivers/gpu/drm/i915/i915_debugfs.c | 1 + >>> drivers/gpu/drm/i915/i915_gem_evict.c | 1 + >>> .../gpu/drm/i915/selftests/igt_live_test.c | 2 +- >>> .../gpu/drm/i915/selftests/mock_gem_device.c | 3 +- >>> 14 files changed, 137 insertions(+), 27 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c >>> index 8598a1c78a4c..2f5295c9408d 100644 >>> --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c >>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c >>> @@ -634,7 +634,8 @@ mmap_offset_attach(struct drm_i915_gem_object *obj, >>> goto insert; >>> /* Attempt to reap some mmap space from dead objects */ >>> - err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT); >>> + err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT, >>> + NULL); >>> if (err) >>> goto err; >>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c >>> index 8d77dcbad059..1742a8561f69 100644 >>> --- a/drivers/gpu/drm/i915/gt/intel_gt.c >>> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c >>> @@ -574,6 +574,24 @@ static void __intel_gt_disable(struct intel_gt *gt) >>> GEM_BUG_ON(intel_gt_pm_is_awake(gt)); >>> } >>> +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) >>> +{ >>> + long rtimeout; >>> + >>> + /* If the device is asleep, we have no requests outstanding */ >>> + if (!intel_gt_pm_is_awake(gt)) >>> + return 0; >>> + >>> + while ((timeout = intel_gt_retire_requests_timeout(gt, timeout, >>> + &rtimeout)) > 0) { >>> + cond_resched(); >>> + if (signal_pending(current)) >>> + return -EINTR; >>> + } >>> + >>> + return timeout ? timeout : intel_uc_wait_for_idle(>->uc, rtimeout); >>> +} >>> + >>> int intel_gt_init(struct intel_gt *gt) >>> { >>> int err; >>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h >>> index 7ec395cace69..c775043334bf 100644 >>> --- a/drivers/gpu/drm/i915/gt/intel_gt.h >>> +++ b/drivers/gpu/drm/i915/gt/intel_gt.h >>> @@ -48,6 +48,8 @@ void intel_gt_driver_release(struct intel_gt *gt); >>> void intel_gt_driver_late_release(struct intel_gt *gt); >>> +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); >>> + >>> void intel_gt_check_and_clear_faults(struct intel_gt *gt); >>> void intel_gt_clear_error_registers(struct intel_gt *gt, >>> intel_engine_mask_t engine_mask); >>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c >>> index 647eca9d867a..c6c702f236fa 100644 >>> --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c >>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c >>> @@ -13,6 +13,7 @@ >>> #include "intel_gt_pm.h" >>> #include "intel_gt_requests.h" >>> #include "intel_timeline.h" >>> +#include "uc/intel_uc.h" >>> static bool retire_requests(struct intel_timeline *tl) >>> { >>> @@ -130,7 +131,8 @@ void intel_engine_fini_retire(struct intel_engine_cs *engine) >>> GEM_BUG_ON(engine->retire); >>> } >>> -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) >>> +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout, >>> + long *rtimeout) >> >> What is 'rtimeout', I know remaining, but it can be more self-descriptive to >> start with. >> > > 'remaining_timeout' it is. > >> It feels a bit churny for what it is. How plausible would be alternatives to >> either change existing timeout to in/out, or measure sleep internally in >> this function, or just risk sleeping twice as long by passing the original >> timeout to uc idle as well? >> > > Originally had it just passing in the same value, got review feedback > saying I should pass in the adjusted value. Hard to make everyone happy. Ok. > >>> { >>> struct intel_gt_timelines *timelines = >->timelines; >>> struct intel_timeline *tl, *tn; >>> @@ -195,22 +197,10 @@ out_active: spin_lock(&timelines->lock); >>> if (flush_submission(gt, timeout)) /* Wait, there's more! */ >>> active_count++; >>> - return active_count ? timeout : 0; >>> -} >>> - >>> -int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) >>> -{ >>> - /* If the device is asleep, we have no requests outstanding */ >>> - if (!intel_gt_pm_is_awake(gt)) >>> - return 0; >>> - >>> - while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) { >>> - cond_resched(); >>> - if (signal_pending(current)) >>> - return -EINTR; >>> - } >>> + if (rtimeout) >>> + *rtimeout = timeout; >>> - return timeout; >>> + return active_count ? timeout : 0; >>> } >>> static void retire_work_handler(struct work_struct *work) >>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h >>> index fcc30a6e4fe9..4419787124e2 100644 >>> --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h >>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h >>> @@ -10,10 +10,11 @@ struct intel_engine_cs; >>> struct intel_gt; >>> struct intel_timeline; >>> -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); >>> +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout, >>> + long *rtimeout); >>> static inline void intel_gt_retire_requests(struct intel_gt *gt) >>> { >>> - intel_gt_retire_requests_timeout(gt, 0); >>> + intel_gt_retire_requests_timeout(gt, 0, NULL); >>> } >>> void intel_engine_init_retire(struct intel_engine_cs *engine); >>> @@ -21,8 +22,6 @@ void intel_engine_add_retire(struct intel_engine_cs *engine, >>> struct intel_timeline *tl); >>> void intel_engine_fini_retire(struct intel_engine_cs *engine); >>> -int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); >>> - >>> void intel_gt_init_requests(struct intel_gt *gt); >>> void intel_gt_park_requests(struct intel_gt *gt); >>> void intel_gt_unpark_requests(struct intel_gt *gt); >>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h >>> index 485e98f3f304..47eaa69809e8 100644 >>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h >>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h >>> @@ -38,6 +38,8 @@ struct intel_guc { >>> spinlock_t irq_lock; >>> unsigned int msg_enabled_mask; >>> + atomic_t outstanding_submission_g2h; >>> + >>> struct { >>> bool enabled; >>> void (*reset)(struct intel_guc *guc); >>> @@ -239,6 +241,8 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask) >>> spin_unlock_irq(&guc->irq_lock); >>> } >>> +int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout); >>> + >>> int intel_guc_reset_engine(struct intel_guc *guc, >>> struct intel_engine_cs *engine); >>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c >>> index f1893030ca88..cf701056fa14 100644 >>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c >>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c >>> @@ -111,6 +111,7 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct) >>> INIT_LIST_HEAD(&ct->requests.incoming); >>> INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func); >>> tasklet_init(&ct->receive_tasklet, ct_receive_tasklet_func, (unsigned long)ct); >>> + init_waitqueue_head(&ct->wq); >>> } >>> static inline const char *guc_ct_buffer_type_to_str(u32 type) >>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h >>> index 660bf37238e2..ab1b79ab960b 100644 >>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h >>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h >>> @@ -10,6 +10,7 @@ >>> #include <linux/spinlock.h> >>> #include <linux/workqueue.h> >>> #include <linux/ktime.h> >>> +#include <linux/wait.h> >>> #include "intel_guc_fwif.h" >>> @@ -68,6 +69,9 @@ struct intel_guc_ct { >>> struct tasklet_struct receive_tasklet; >>> + /** @wq: wait queue for g2h chanenl */ >>> + wait_queue_head_t wq; >>> + >>> struct { >>> u16 last_fence; /* last fence used to send request */ >>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c >>> index ae0b386467e3..0ff7dd6d337d 100644 >>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c >>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c >>> @@ -253,6 +253,74 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, >>> xa_store_irq(&guc->context_lookup, id, ce, GFP_ATOMIC); >>> } >>> +static int guc_submission_busy_loop(struct intel_guc* guc, >>> + const u32 *action, >>> + u32 len, >>> + u32 g2h_len_dw, >>> + bool loop) >>> +{ >>> + int err; >>> + >>> + err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); >>> + >>> + if (!err && g2h_len_dw) >>> + atomic_inc(&guc->outstanding_submission_g2h); >>> + >>> + return err; >>> +} >>> + >>> +static int guc_wait_for_pending_msg(struct intel_guc *guc, >>> + atomic_t *wait_var, >>> + bool interruptible, >>> + long timeout) >>> +{ >>> + const int state = interruptible ? >>> + TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; >>> + DEFINE_WAIT(wait); >>> + >>> + might_sleep(); >>> + GEM_BUG_ON(timeout < 0); >>> + >>> + if (!atomic_read(wait_var)) >>> + return 0; >>> + >>> + if (!timeout) >>> + return -ETIME; >>> + >>> + for (;;) { >>> + prepare_to_wait(&guc->ct.wq, &wait, state); >>> + >>> + if (!atomic_read(wait_var)) >>> + break; >>> + >>> + if (signal_pending_state(state, current)) { >>> + timeout = -ERESTARTSYS; >>> + break; >>> + } >>> + >>> + if (!timeout) { >>> + timeout = -ETIME; >>> + break; >>> + } >>> + >>> + timeout = io_schedule_timeout(timeout); >>> + } >>> + finish_wait(&guc->ct.wq, &wait); >>> + >>> + return (timeout < 0) ? timeout : 0; >>> +} >> >> See if it is possible to simplify all this with wait_var_event and >> wake_up_var. >> > > Let me check on that. > >>> + >>> +int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) >>> +{ >>> + bool interruptible = true; >>> + >>> + if (unlikely(timeout < 0)) >>> + timeout = -timeout, interruptible = false; >>> + >>> + return guc_wait_for_pending_msg(guc, &guc->outstanding_submission_g2h, >>> + interruptible, timeout); >>> +} >>> + >>> static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) >>> { >>> int err; >>> @@ -279,6 +347,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) >>> err = intel_guc_send_nb(guc, action, len, g2h_len_dw); >>> if (!enabled && !err) { >>> + atomic_inc(&guc->outstanding_submission_g2h); >>> set_context_enabled(ce); >>> } else if (!enabled) { >>> clr_context_pending_enable(ce); >>> @@ -734,7 +803,7 @@ static int __guc_action_register_context(struct intel_guc *guc, >>> offset, >>> }; >>> - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); >>> + return guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); >>> } >>> static int register_context(struct intel_context *ce) >>> @@ -754,7 +823,7 @@ static int __guc_action_deregister_context(struct intel_guc *guc, >>> guc_id, >>> }; >>> - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), >>> + return guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), >>> G2H_LEN_DW_DEREGISTER_CONTEXT, true); >>> } >>> @@ -871,7 +940,9 @@ static int guc_context_pin(struct intel_context *ce, void *vaddr) >>> static void guc_context_unpin(struct intel_context *ce) >>> { >>> - unpin_guc_id(ce_to_guc(ce), ce); >>> + struct intel_guc *guc = ce_to_guc(ce); >>> + >>> + unpin_guc_id(guc, ce); >>> lrc_unpin(ce); >>> } >>> @@ -894,7 +965,7 @@ static void __guc_context_sched_disable(struct intel_guc *guc, >>> intel_context_get(ce); >>> - intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), >>> + guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), >>> G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); >>> } >>> @@ -1437,6 +1508,15 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) >>> return ce; >>> } >>> +static void decr_outstanding_submission_g2h(struct intel_guc *guc) >>> +{ >>> + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) { >>> + smp_mb(); >>> + if (waitqueue_active(&guc->ct.wq)) >>> + wake_up_all(&guc->ct.wq); >> >> I keep pointing out this pattern is racy and at least needs comment why it >> is safe. >> > > There is a comment in wake queue code header saying why this is safe. I > don't think we need to repeat this here. Yeah, _describing how to make it safe_, after it starts with: * NOTE: this function is lockless and requires care, incorrect usage _will_ * lead to sporadic and non-obvious failure. Then it also says: * Also note that this 'optimization' trades a spin_lock() for an smp_mb(), * which (when the lock is uncontended) are of roughly equal cost. I question the need to optimize this path since it means reader has to figure out if it is safe while a simple wake_up_all after atomic_dec_and_test would have done it. Is the case of no waiters a predominant one? It at least deserves a comment explaining why the optimisation is important. Regards, Tvrtko > > Matt > >> Regards, >> >> Tvrtko >> >>> + } >>> +} >>> + >>> int intel_guc_deregister_done_process_msg(struct intel_guc *guc, >>> const u32 *msg, >>> u32 len) >>> @@ -1472,6 +1552,8 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, >>> lrc_destroy(&ce->ref); >>> } >>> + decr_outstanding_submission_g2h(guc); >>> + >>> return 0; >>> } >>> @@ -1520,6 +1602,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, >>> spin_unlock_irqrestore(&ce->guc_state.lock, flags); >>> } >>> + decr_outstanding_submission_g2h(guc); >>> intel_context_put(ce); >>> return 0; >>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h >>> index 9c954c589edf..c4cef885e984 100644 >>> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h >>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h >>> @@ -81,6 +81,11 @@ uc_state_checkers(guc, guc_submission); >>> #undef uc_state_checkers >>> #undef __uc_state_checker >>> +static inline int intel_uc_wait_for_idle(struct intel_uc *uc, long timeout) >>> +{ >>> + return intel_guc_wait_for_idle(&uc->guc, timeout); >>> +} >>> + >>> #define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \ >>> static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \ >>> { \ >>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c >>> index 8dd374691102..bb29838d1cd7 100644 >>> --- a/drivers/gpu/drm/i915/i915_debugfs.c >>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c >>> @@ -36,6 +36,7 @@ >>> #include "gt/intel_gt_clock_utils.h" >>> #include "gt/intel_gt.h" >>> #include "gt/intel_gt_pm.h" >>> +#include "gt/intel_gt.h" >>> #include "gt/intel_gt_requests.h" >>> #include "gt/intel_reset.h" >>> #include "gt/intel_rc6.h" >>> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c >>> index 4d2d59a9942b..2b73ddb11c66 100644 >>> --- a/drivers/gpu/drm/i915/i915_gem_evict.c >>> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c >>> @@ -27,6 +27,7 @@ >>> */ >>> #include "gem/i915_gem_context.h" >>> +#include "gt/intel_gt.h" >>> #include "gt/intel_gt_requests.h" >>> #include "i915_drv.h" >>> diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c >>> index c130010a7033..1c721542e277 100644 >>> --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c >>> +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c >>> @@ -5,7 +5,7 @@ >>> */ >>> #include "i915_drv.h" >>> -#include "gt/intel_gt_requests.h" >>> +#include "gt/intel_gt.h" >>> #include "../i915_selftest.h" >>> #include "igt_flush_test.h" >>> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c >>> index cf40004bc92a..6c06816e2b99 100644 >>> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c >>> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c >>> @@ -51,7 +51,8 @@ void mock_device_flush(struct drm_i915_private *i915) >>> do { >>> for_each_engine(engine, gt, id) >>> mock_engine_flush(engine); >>> - } while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT)); >>> + } while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT, >>> + NULL)); >>> } >>> static void mock_device_release(struct drm_device *dev) >>>
On Wed, May 26, 2021 at 10:21:05AM +0100, Tvrtko Ursulin wrote: > > On 25/05/2021 18:07, Matthew Brost wrote: > > On Tue, May 25, 2021 at 11:06:00AM +0100, Tvrtko Ursulin wrote: > > > > > > On 06/05/2021 20:14, Matthew Brost wrote: > > > > When running the GuC the GPU can't be considered idle if the GuC still > > > > has contexts pinned. As such, a call has been added in > > > > intel_gt_wait_for_idle to idle the UC and in turn the GuC by waiting for > > > > the number of unpinned contexts to go to zero. > > > > > > > > Cc: John Harrison <john.c.harrison@intel.com> > > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > > > > --- > > > > drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- > > > > drivers/gpu/drm/i915/gt/intel_gt.c | 18 ++++ > > > > drivers/gpu/drm/i915/gt/intel_gt.h | 2 + > > > > drivers/gpu/drm/i915/gt/intel_gt_requests.c | 22 ++--- > > > > drivers/gpu/drm/i915/gt/intel_gt_requests.h | 7 +- > > > > drivers/gpu/drm/i915/gt/uc/intel_guc.h | 4 + > > > > drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 1 + > > > > drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 4 + > > > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 91 ++++++++++++++++++- > > > > drivers/gpu/drm/i915/gt/uc/intel_uc.h | 5 + > > > > drivers/gpu/drm/i915/i915_debugfs.c | 1 + > > > > drivers/gpu/drm/i915/i915_gem_evict.c | 1 + > > > > .../gpu/drm/i915/selftests/igt_live_test.c | 2 +- > > > > .../gpu/drm/i915/selftests/mock_gem_device.c | 3 +- > > > > 14 files changed, 137 insertions(+), 27 deletions(-) > > > > > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c > > > > index 8598a1c78a4c..2f5295c9408d 100644 > > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c > > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c > > > > @@ -634,7 +634,8 @@ mmap_offset_attach(struct drm_i915_gem_object *obj, > > > > goto insert; > > > > /* Attempt to reap some mmap space from dead objects */ > > > > - err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT); > > > > + err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT, > > > > + NULL); > > > > if (err) > > > > goto err; > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c > > > > index 8d77dcbad059..1742a8561f69 100644 > > > > --- a/drivers/gpu/drm/i915/gt/intel_gt.c > > > > +++ b/drivers/gpu/drm/i915/gt/intel_gt.c > > > > @@ -574,6 +574,24 @@ static void __intel_gt_disable(struct intel_gt *gt) > > > > GEM_BUG_ON(intel_gt_pm_is_awake(gt)); > > > > } > > > > +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) > > > > +{ > > > > + long rtimeout; > > > > + > > > > + /* If the device is asleep, we have no requests outstanding */ > > > > + if (!intel_gt_pm_is_awake(gt)) > > > > + return 0; > > > > + > > > > + while ((timeout = intel_gt_retire_requests_timeout(gt, timeout, > > > > + &rtimeout)) > 0) { > > > > + cond_resched(); > > > > + if (signal_pending(current)) > > > > + return -EINTR; > > > > + } > > > > + > > > > + return timeout ? timeout : intel_uc_wait_for_idle(>->uc, rtimeout); > > > > +} > > > > + > > > > int intel_gt_init(struct intel_gt *gt) > > > > { > > > > int err; > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h > > > > index 7ec395cace69..c775043334bf 100644 > > > > --- a/drivers/gpu/drm/i915/gt/intel_gt.h > > > > +++ b/drivers/gpu/drm/i915/gt/intel_gt.h > > > > @@ -48,6 +48,8 @@ void intel_gt_driver_release(struct intel_gt *gt); > > > > void intel_gt_driver_late_release(struct intel_gt *gt); > > > > +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); > > > > + > > > > void intel_gt_check_and_clear_faults(struct intel_gt *gt); > > > > void intel_gt_clear_error_registers(struct intel_gt *gt, > > > > intel_engine_mask_t engine_mask); > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c > > > > index 647eca9d867a..c6c702f236fa 100644 > > > > --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c > > > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c > > > > @@ -13,6 +13,7 @@ > > > > #include "intel_gt_pm.h" > > > > #include "intel_gt_requests.h" > > > > #include "intel_timeline.h" > > > > +#include "uc/intel_uc.h" > > > > static bool retire_requests(struct intel_timeline *tl) > > > > { > > > > @@ -130,7 +131,8 @@ void intel_engine_fini_retire(struct intel_engine_cs *engine) > > > > GEM_BUG_ON(engine->retire); > > > > } > > > > -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) > > > > +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout, > > > > + long *rtimeout) > > > > > > What is 'rtimeout', I know remaining, but it can be more self-descriptive to > > > start with. > > > > > > > 'remaining_timeout' it is. > > > > > It feels a bit churny for what it is. How plausible would be alternatives to > > > either change existing timeout to in/out, or measure sleep internally in > > > this function, or just risk sleeping twice as long by passing the original > > > timeout to uc idle as well? > > > > > > > Originally had it just passing in the same value, got review feedback > > saying I should pass in the adjusted value. Hard to make everyone happy. > > Ok. > > > > > { > > > > struct intel_gt_timelines *timelines = >->timelines; > > > > struct intel_timeline *tl, *tn; > > > > @@ -195,22 +197,10 @@ out_active: spin_lock(&timelines->lock); > > > > if (flush_submission(gt, timeout)) /* Wait, there's more! */ > > > > active_count++; > > > > - return active_count ? timeout : 0; > > > > -} > > > > - > > > > -int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) > > > > -{ > > > > - /* If the device is asleep, we have no requests outstanding */ > > > > - if (!intel_gt_pm_is_awake(gt)) > > > > - return 0; > > > > - > > > > - while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) { > > > > - cond_resched(); > > > > - if (signal_pending(current)) > > > > - return -EINTR; > > > > - } > > > > + if (rtimeout) > > > > + *rtimeout = timeout; > > > > - return timeout; > > > > + return active_count ? timeout : 0; > > > > } > > > > static void retire_work_handler(struct work_struct *work) > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h > > > > index fcc30a6e4fe9..4419787124e2 100644 > > > > --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h > > > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h > > > > @@ -10,10 +10,11 @@ struct intel_engine_cs; > > > > struct intel_gt; > > > > struct intel_timeline; > > > > -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); > > > > +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout, > > > > + long *rtimeout); > > > > static inline void intel_gt_retire_requests(struct intel_gt *gt) > > > > { > > > > - intel_gt_retire_requests_timeout(gt, 0); > > > > + intel_gt_retire_requests_timeout(gt, 0, NULL); > > > > } > > > > void intel_engine_init_retire(struct intel_engine_cs *engine); > > > > @@ -21,8 +22,6 @@ void intel_engine_add_retire(struct intel_engine_cs *engine, > > > > struct intel_timeline *tl); > > > > void intel_engine_fini_retire(struct intel_engine_cs *engine); > > > > -int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); > > > > - > > > > void intel_gt_init_requests(struct intel_gt *gt); > > > > void intel_gt_park_requests(struct intel_gt *gt); > > > > void intel_gt_unpark_requests(struct intel_gt *gt); > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > > > index 485e98f3f304..47eaa69809e8 100644 > > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > > > @@ -38,6 +38,8 @@ struct intel_guc { > > > > spinlock_t irq_lock; > > > > unsigned int msg_enabled_mask; > > > > + atomic_t outstanding_submission_g2h; > > > > + > > > > struct { > > > > bool enabled; > > > > void (*reset)(struct intel_guc *guc); > > > > @@ -239,6 +241,8 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask) > > > > spin_unlock_irq(&guc->irq_lock); > > > > } > > > > +int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout); > > > > + > > > > int intel_guc_reset_engine(struct intel_guc *guc, > > > > struct intel_engine_cs *engine); > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > > > index f1893030ca88..cf701056fa14 100644 > > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > > > @@ -111,6 +111,7 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct) > > > > INIT_LIST_HEAD(&ct->requests.incoming); > > > > INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func); > > > > tasklet_init(&ct->receive_tasklet, ct_receive_tasklet_func, (unsigned long)ct); > > > > + init_waitqueue_head(&ct->wq); > > > > } > > > > static inline const char *guc_ct_buffer_type_to_str(u32 type) > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h > > > > index 660bf37238e2..ab1b79ab960b 100644 > > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h > > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h > > > > @@ -10,6 +10,7 @@ > > > > #include <linux/spinlock.h> > > > > #include <linux/workqueue.h> > > > > #include <linux/ktime.h> > > > > +#include <linux/wait.h> > > > > #include "intel_guc_fwif.h" > > > > @@ -68,6 +69,9 @@ struct intel_guc_ct { > > > > struct tasklet_struct receive_tasklet; > > > > + /** @wq: wait queue for g2h chanenl */ > > > > + wait_queue_head_t wq; > > > > + > > > > struct { > > > > u16 last_fence; /* last fence used to send request */ > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > index ae0b386467e3..0ff7dd6d337d 100644 > > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > @@ -253,6 +253,74 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, > > > > xa_store_irq(&guc->context_lookup, id, ce, GFP_ATOMIC); > > > > } > > > > +static int guc_submission_busy_loop(struct intel_guc* guc, > > > > + const u32 *action, > > > > + u32 len, > > > > + u32 g2h_len_dw, > > > > + bool loop) > > > > +{ > > > > + int err; > > > > + > > > > + err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); > > > > + > > > > + if (!err && g2h_len_dw) > > > > + atomic_inc(&guc->outstanding_submission_g2h); > > > > + > > > > + return err; > > > > +} > > > > + > > > > +static int guc_wait_for_pending_msg(struct intel_guc *guc, > > > > + atomic_t *wait_var, > > > > + bool interruptible, > > > > + long timeout) > > > > +{ > > > > + const int state = interruptible ? > > > > + TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; > > > > + DEFINE_WAIT(wait); > > > > + > > > > + might_sleep(); > > > > + GEM_BUG_ON(timeout < 0); > > > > + > > > > + if (!atomic_read(wait_var)) > > > > + return 0; > > > > + > > > > + if (!timeout) > > > > + return -ETIME; > > > > + > > > > + for (;;) { > > > > + prepare_to_wait(&guc->ct.wq, &wait, state); > > > > + > > > > + if (!atomic_read(wait_var)) > > > > + break; > > > > + > > > > + if (signal_pending_state(state, current)) { > > > > + timeout = -ERESTARTSYS; > > > > + break; > > > > + } > > > > + > > > > + if (!timeout) { > > > > + timeout = -ETIME; > > > > + break; > > > > + } > > > > + > > > > + timeout = io_schedule_timeout(timeout); > > > > + } > > > > + finish_wait(&guc->ct.wq, &wait); > > > > + > > > > + return (timeout < 0) ? timeout : 0; > > > > +} > > > > > > See if it is possible to simplify all this with wait_var_event and > > > wake_up_var. > > > > > > > Let me check on that. > > > > + > > > > +int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) > > > > +{ > > > > + bool interruptible = true; > > > > + > > > > + if (unlikely(timeout < 0)) > > > > + timeout = -timeout, interruptible = false; > > > > + > > > > + return guc_wait_for_pending_msg(guc, &guc->outstanding_submission_g2h, > > > > + interruptible, timeout); > > > > +} > > > > + > > > > static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) > > > > { > > > > int err; > > > > @@ -279,6 +347,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) > > > > err = intel_guc_send_nb(guc, action, len, g2h_len_dw); > > > > if (!enabled && !err) { > > > > + atomic_inc(&guc->outstanding_submission_g2h); > > > > set_context_enabled(ce); > > > > } else if (!enabled) { > > > > clr_context_pending_enable(ce); > > > > @@ -734,7 +803,7 @@ static int __guc_action_register_context(struct intel_guc *guc, > > > > offset, > > > > }; > > > > - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); > > > > + return guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); > > > > } > > > > static int register_context(struct intel_context *ce) > > > > @@ -754,7 +823,7 @@ static int __guc_action_deregister_context(struct intel_guc *guc, > > > > guc_id, > > > > }; > > > > - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), > > > > + return guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), > > > > G2H_LEN_DW_DEREGISTER_CONTEXT, true); > > > > } > > > > @@ -871,7 +940,9 @@ static int guc_context_pin(struct intel_context *ce, void *vaddr) > > > > static void guc_context_unpin(struct intel_context *ce) > > > > { > > > > - unpin_guc_id(ce_to_guc(ce), ce); > > > > + struct intel_guc *guc = ce_to_guc(ce); > > > > + > > > > + unpin_guc_id(guc, ce); > > > > lrc_unpin(ce); > > > > } > > > > @@ -894,7 +965,7 @@ static void __guc_context_sched_disable(struct intel_guc *guc, > > > > intel_context_get(ce); > > > > - intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), > > > > + guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), > > > > G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); > > > > } > > > > @@ -1437,6 +1508,15 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) > > > > return ce; > > > > } > > > > +static void decr_outstanding_submission_g2h(struct intel_guc *guc) > > > > +{ > > > > + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) { > > > > + smp_mb(); > > > > + if (waitqueue_active(&guc->ct.wq)) > > > > + wake_up_all(&guc->ct.wq); > > > > > > I keep pointing out this pattern is racy and at least needs comment why it > > > is safe. > > > > > > > There is a comment in wake queue code header saying why this is safe. I > > don't think we need to repeat this here. > > Yeah, _describing how to make it safe_, after it starts with: > > * NOTE: this function is lockless and requires care, incorrect usage _will_ > * lead to sporadic and non-obvious failure. > > Then it also says: > > * Also note that this 'optimization' trades a spin_lock() for an smp_mb(), > * which (when the lock is uncontended) are of roughly equal cost. > > I question the need to optimize this path since it means reader has to figure out if it is safe while a simple wake_up_all after atomic_dec_and_test would have done it. > > Is the case of no waiters a predominant one? It at least deserves a comment explaining why the optimisation is important. > I just didn't want to add a spin_lock if there is known working code path without one and our code fits into that path. I can add a comment but I don't really think it necessary. Matt > Regards, > > Tvrtko > > > > > Matt > > > > > Regards, > > > > > > Tvrtko > > > > > > > + } > > > > +} > > > > + > > > > int intel_guc_deregister_done_process_msg(struct intel_guc *guc, > > > > const u32 *msg, > > > > u32 len) > > > > @@ -1472,6 +1552,8 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, > > > > lrc_destroy(&ce->ref); > > > > } > > > > + decr_outstanding_submission_g2h(guc); > > > > + > > > > return 0; > > > > } > > > > @@ -1520,6 +1602,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, > > > > spin_unlock_irqrestore(&ce->guc_state.lock, flags); > > > > } > > > > + decr_outstanding_submission_g2h(guc); > > > > intel_context_put(ce); > > > > return 0; > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h > > > > index 9c954c589edf..c4cef885e984 100644 > > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h > > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h > > > > @@ -81,6 +81,11 @@ uc_state_checkers(guc, guc_submission); > > > > #undef uc_state_checkers > > > > #undef __uc_state_checker > > > > +static inline int intel_uc_wait_for_idle(struct intel_uc *uc, long timeout) > > > > +{ > > > > + return intel_guc_wait_for_idle(&uc->guc, timeout); > > > > +} > > > > + > > > > #define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \ > > > > static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \ > > > > { \ > > > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > > > > index 8dd374691102..bb29838d1cd7 100644 > > > > --- a/drivers/gpu/drm/i915/i915_debugfs.c > > > > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > > > > @@ -36,6 +36,7 @@ > > > > #include "gt/intel_gt_clock_utils.h" > > > > #include "gt/intel_gt.h" > > > > #include "gt/intel_gt_pm.h" > > > > +#include "gt/intel_gt.h" > > > > #include "gt/intel_gt_requests.h" > > > > #include "gt/intel_reset.h" > > > > #include "gt/intel_rc6.h" > > > > diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c > > > > index 4d2d59a9942b..2b73ddb11c66 100644 > > > > --- a/drivers/gpu/drm/i915/i915_gem_evict.c > > > > +++ b/drivers/gpu/drm/i915/i915_gem_evict.c > > > > @@ -27,6 +27,7 @@ > > > > */ > > > > #include "gem/i915_gem_context.h" > > > > +#include "gt/intel_gt.h" > > > > #include "gt/intel_gt_requests.h" > > > > #include "i915_drv.h" > > > > diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c > > > > index c130010a7033..1c721542e277 100644 > > > > --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c > > > > +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c > > > > @@ -5,7 +5,7 @@ > > > > */ > > > > #include "i915_drv.h" > > > > -#include "gt/intel_gt_requests.h" > > > > +#include "gt/intel_gt.h" > > > > #include "../i915_selftest.h" > > > > #include "igt_flush_test.h" > > > > diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c > > > > index cf40004bc92a..6c06816e2b99 100644 > > > > --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c > > > > +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c > > > > @@ -51,7 +51,8 @@ void mock_device_flush(struct drm_i915_private *i915) > > > > do { > > > > for_each_engine(engine, gt, id) > > > > mock_engine_flush(engine); > > > > - } while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT)); > > > > + } while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT, > > > > + NULL)); > > > > } > > > > static void mock_device_release(struct drm_device *dev) > > > >
On 26/05/2021 19:18, Matthew Brost wrote: > On Wed, May 26, 2021 at 10:21:05AM +0100, Tvrtko Ursulin wrote: >> >> On 25/05/2021 18:07, Matthew Brost wrote: >>> On Tue, May 25, 2021 at 11:06:00AM +0100, Tvrtko Ursulin wrote: >>>> >>>> On 06/05/2021 20:14, Matthew Brost wrote: >>>>> When running the GuC the GPU can't be considered idle if the GuC still >>>>> has contexts pinned. As such, a call has been added in >>>>> intel_gt_wait_for_idle to idle the UC and in turn the GuC by waiting for >>>>> the number of unpinned contexts to go to zero. >>>>> >>>>> Cc: John Harrison <john.c.harrison@intel.com> >>>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com> >>>>> --- >>>>> drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- >>>>> drivers/gpu/drm/i915/gt/intel_gt.c | 18 ++++ >>>>> drivers/gpu/drm/i915/gt/intel_gt.h | 2 + >>>>> drivers/gpu/drm/i915/gt/intel_gt_requests.c | 22 ++--- >>>>> drivers/gpu/drm/i915/gt/intel_gt_requests.h | 7 +- >>>>> drivers/gpu/drm/i915/gt/uc/intel_guc.h | 4 + >>>>> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 1 + >>>>> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 4 + >>>>> .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 91 ++++++++++++++++++- >>>>> drivers/gpu/drm/i915/gt/uc/intel_uc.h | 5 + >>>>> drivers/gpu/drm/i915/i915_debugfs.c | 1 + >>>>> drivers/gpu/drm/i915/i915_gem_evict.c | 1 + >>>>> .../gpu/drm/i915/selftests/igt_live_test.c | 2 +- >>>>> .../gpu/drm/i915/selftests/mock_gem_device.c | 3 +- >>>>> 14 files changed, 137 insertions(+), 27 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c >>>>> index 8598a1c78a4c..2f5295c9408d 100644 >>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c >>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c >>>>> @@ -634,7 +634,8 @@ mmap_offset_attach(struct drm_i915_gem_object *obj, >>>>> goto insert; >>>>> /* Attempt to reap some mmap space from dead objects */ >>>>> - err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT); >>>>> + err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT, >>>>> + NULL); >>>>> if (err) >>>>> goto err; >>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c >>>>> index 8d77dcbad059..1742a8561f69 100644 >>>>> --- a/drivers/gpu/drm/i915/gt/intel_gt.c >>>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c >>>>> @@ -574,6 +574,24 @@ static void __intel_gt_disable(struct intel_gt *gt) >>>>> GEM_BUG_ON(intel_gt_pm_is_awake(gt)); >>>>> } >>>>> +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) >>>>> +{ >>>>> + long rtimeout; >>>>> + >>>>> + /* If the device is asleep, we have no requests outstanding */ >>>>> + if (!intel_gt_pm_is_awake(gt)) >>>>> + return 0; >>>>> + >>>>> + while ((timeout = intel_gt_retire_requests_timeout(gt, timeout, >>>>> + &rtimeout)) > 0) { >>>>> + cond_resched(); >>>>> + if (signal_pending(current)) >>>>> + return -EINTR; >>>>> + } >>>>> + >>>>> + return timeout ? timeout : intel_uc_wait_for_idle(>->uc, rtimeout); >>>>> +} >>>>> + >>>>> int intel_gt_init(struct intel_gt *gt) >>>>> { >>>>> int err; >>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h >>>>> index 7ec395cace69..c775043334bf 100644 >>>>> --- a/drivers/gpu/drm/i915/gt/intel_gt.h >>>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt.h >>>>> @@ -48,6 +48,8 @@ void intel_gt_driver_release(struct intel_gt *gt); >>>>> void intel_gt_driver_late_release(struct intel_gt *gt); >>>>> +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); >>>>> + >>>>> void intel_gt_check_and_clear_faults(struct intel_gt *gt); >>>>> void intel_gt_clear_error_registers(struct intel_gt *gt, >>>>> intel_engine_mask_t engine_mask); >>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c >>>>> index 647eca9d867a..c6c702f236fa 100644 >>>>> --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c >>>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c >>>>> @@ -13,6 +13,7 @@ >>>>> #include "intel_gt_pm.h" >>>>> #include "intel_gt_requests.h" >>>>> #include "intel_timeline.h" >>>>> +#include "uc/intel_uc.h" >>>>> static bool retire_requests(struct intel_timeline *tl) >>>>> { >>>>> @@ -130,7 +131,8 @@ void intel_engine_fini_retire(struct intel_engine_cs *engine) >>>>> GEM_BUG_ON(engine->retire); >>>>> } >>>>> -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) >>>>> +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout, >>>>> + long *rtimeout) >>>> >>>> What is 'rtimeout', I know remaining, but it can be more self-descriptive to >>>> start with. >>>> >>> >>> 'remaining_timeout' it is. >>> >>>> It feels a bit churny for what it is. How plausible would be alternatives to >>>> either change existing timeout to in/out, or measure sleep internally in >>>> this function, or just risk sleeping twice as long by passing the original >>>> timeout to uc idle as well? >>>> >>> >>> Originally had it just passing in the same value, got review feedback >>> saying I should pass in the adjusted value. Hard to make everyone happy. >> >> Ok. >> >>>>> { >>>>> struct intel_gt_timelines *timelines = >->timelines; >>>>> struct intel_timeline *tl, *tn; >>>>> @@ -195,22 +197,10 @@ out_active: spin_lock(&timelines->lock); >>>>> if (flush_submission(gt, timeout)) /* Wait, there's more! */ >>>>> active_count++; >>>>> - return active_count ? timeout : 0; >>>>> -} >>>>> - >>>>> -int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) >>>>> -{ >>>>> - /* If the device is asleep, we have no requests outstanding */ >>>>> - if (!intel_gt_pm_is_awake(gt)) >>>>> - return 0; >>>>> - >>>>> - while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) { >>>>> - cond_resched(); >>>>> - if (signal_pending(current)) >>>>> - return -EINTR; >>>>> - } >>>>> + if (rtimeout) >>>>> + *rtimeout = timeout; >>>>> - return timeout; >>>>> + return active_count ? timeout : 0; >>>>> } >>>>> static void retire_work_handler(struct work_struct *work) >>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h >>>>> index fcc30a6e4fe9..4419787124e2 100644 >>>>> --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h >>>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h >>>>> @@ -10,10 +10,11 @@ struct intel_engine_cs; >>>>> struct intel_gt; >>>>> struct intel_timeline; >>>>> -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); >>>>> +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout, >>>>> + long *rtimeout); >>>>> static inline void intel_gt_retire_requests(struct intel_gt *gt) >>>>> { >>>>> - intel_gt_retire_requests_timeout(gt, 0); >>>>> + intel_gt_retire_requests_timeout(gt, 0, NULL); >>>>> } >>>>> void intel_engine_init_retire(struct intel_engine_cs *engine); >>>>> @@ -21,8 +22,6 @@ void intel_engine_add_retire(struct intel_engine_cs *engine, >>>>> struct intel_timeline *tl); >>>>> void intel_engine_fini_retire(struct intel_engine_cs *engine); >>>>> -int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); >>>>> - >>>>> void intel_gt_init_requests(struct intel_gt *gt); >>>>> void intel_gt_park_requests(struct intel_gt *gt); >>>>> void intel_gt_unpark_requests(struct intel_gt *gt); >>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h >>>>> index 485e98f3f304..47eaa69809e8 100644 >>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h >>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h >>>>> @@ -38,6 +38,8 @@ struct intel_guc { >>>>> spinlock_t irq_lock; >>>>> unsigned int msg_enabled_mask; >>>>> + atomic_t outstanding_submission_g2h; >>>>> + >>>>> struct { >>>>> bool enabled; >>>>> void (*reset)(struct intel_guc *guc); >>>>> @@ -239,6 +241,8 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask) >>>>> spin_unlock_irq(&guc->irq_lock); >>>>> } >>>>> +int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout); >>>>> + >>>>> int intel_guc_reset_engine(struct intel_guc *guc, >>>>> struct intel_engine_cs *engine); >>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c >>>>> index f1893030ca88..cf701056fa14 100644 >>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c >>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c >>>>> @@ -111,6 +111,7 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct) >>>>> INIT_LIST_HEAD(&ct->requests.incoming); >>>>> INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func); >>>>> tasklet_init(&ct->receive_tasklet, ct_receive_tasklet_func, (unsigned long)ct); >>>>> + init_waitqueue_head(&ct->wq); >>>>> } >>>>> static inline const char *guc_ct_buffer_type_to_str(u32 type) >>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h >>>>> index 660bf37238e2..ab1b79ab960b 100644 >>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h >>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h >>>>> @@ -10,6 +10,7 @@ >>>>> #include <linux/spinlock.h> >>>>> #include <linux/workqueue.h> >>>>> #include <linux/ktime.h> >>>>> +#include <linux/wait.h> >>>>> #include "intel_guc_fwif.h" >>>>> @@ -68,6 +69,9 @@ struct intel_guc_ct { >>>>> struct tasklet_struct receive_tasklet; >>>>> + /** @wq: wait queue for g2h chanenl */ >>>>> + wait_queue_head_t wq; >>>>> + >>>>> struct { >>>>> u16 last_fence; /* last fence used to send request */ >>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c >>>>> index ae0b386467e3..0ff7dd6d337d 100644 >>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c >>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c >>>>> @@ -253,6 +253,74 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, >>>>> xa_store_irq(&guc->context_lookup, id, ce, GFP_ATOMIC); >>>>> } >>>>> +static int guc_submission_busy_loop(struct intel_guc* guc, >>>>> + const u32 *action, >>>>> + u32 len, >>>>> + u32 g2h_len_dw, >>>>> + bool loop) >>>>> +{ >>>>> + int err; >>>>> + >>>>> + err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); >>>>> + >>>>> + if (!err && g2h_len_dw) >>>>> + atomic_inc(&guc->outstanding_submission_g2h); >>>>> + >>>>> + return err; >>>>> +} >>>>> + >>>>> +static int guc_wait_for_pending_msg(struct intel_guc *guc, >>>>> + atomic_t *wait_var, >>>>> + bool interruptible, >>>>> + long timeout) >>>>> +{ >>>>> + const int state = interruptible ? >>>>> + TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; >>>>> + DEFINE_WAIT(wait); >>>>> + >>>>> + might_sleep(); >>>>> + GEM_BUG_ON(timeout < 0); >>>>> + >>>>> + if (!atomic_read(wait_var)) >>>>> + return 0; >>>>> + >>>>> + if (!timeout) >>>>> + return -ETIME; >>>>> + >>>>> + for (;;) { >>>>> + prepare_to_wait(&guc->ct.wq, &wait, state); >>>>> + >>>>> + if (!atomic_read(wait_var)) >>>>> + break; >>>>> + >>>>> + if (signal_pending_state(state, current)) { >>>>> + timeout = -ERESTARTSYS; >>>>> + break; >>>>> + } >>>>> + >>>>> + if (!timeout) { >>>>> + timeout = -ETIME; >>>>> + break; >>>>> + } >>>>> + >>>>> + timeout = io_schedule_timeout(timeout); >>>>> + } >>>>> + finish_wait(&guc->ct.wq, &wait); >>>>> + >>>>> + return (timeout < 0) ? timeout : 0; >>>>> +} >>>> >>>> See if it is possible to simplify all this with wait_var_event and >>>> wake_up_var. >>>> >>> >>> Let me check on that. >>>>> + >>>>> +int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) >>>>> +{ >>>>> + bool interruptible = true; >>>>> + >>>>> + if (unlikely(timeout < 0)) >>>>> + timeout = -timeout, interruptible = false; >>>>> + >>>>> + return guc_wait_for_pending_msg(guc, &guc->outstanding_submission_g2h, >>>>> + interruptible, timeout); >>>>> +} >>>>> + >>>>> static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) >>>>> { >>>>> int err; >>>>> @@ -279,6 +347,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) >>>>> err = intel_guc_send_nb(guc, action, len, g2h_len_dw); >>>>> if (!enabled && !err) { >>>>> + atomic_inc(&guc->outstanding_submission_g2h); >>>>> set_context_enabled(ce); >>>>> } else if (!enabled) { >>>>> clr_context_pending_enable(ce); >>>>> @@ -734,7 +803,7 @@ static int __guc_action_register_context(struct intel_guc *guc, >>>>> offset, >>>>> }; >>>>> - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); >>>>> + return guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); >>>>> } >>>>> static int register_context(struct intel_context *ce) >>>>> @@ -754,7 +823,7 @@ static int __guc_action_deregister_context(struct intel_guc *guc, >>>>> guc_id, >>>>> }; >>>>> - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), >>>>> + return guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), >>>>> G2H_LEN_DW_DEREGISTER_CONTEXT, true); >>>>> } >>>>> @@ -871,7 +940,9 @@ static int guc_context_pin(struct intel_context *ce, void *vaddr) >>>>> static void guc_context_unpin(struct intel_context *ce) >>>>> { >>>>> - unpin_guc_id(ce_to_guc(ce), ce); >>>>> + struct intel_guc *guc = ce_to_guc(ce); >>>>> + >>>>> + unpin_guc_id(guc, ce); >>>>> lrc_unpin(ce); >>>>> } >>>>> @@ -894,7 +965,7 @@ static void __guc_context_sched_disable(struct intel_guc *guc, >>>>> intel_context_get(ce); >>>>> - intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), >>>>> + guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), >>>>> G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); >>>>> } >>>>> @@ -1437,6 +1508,15 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) >>>>> return ce; >>>>> } >>>>> +static void decr_outstanding_submission_g2h(struct intel_guc *guc) >>>>> +{ >>>>> + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) { >>>>> + smp_mb(); >>>>> + if (waitqueue_active(&guc->ct.wq)) >>>>> + wake_up_all(&guc->ct.wq); >>>> >>>> I keep pointing out this pattern is racy and at least needs comment why it >>>> is safe. >>>> >>> >>> There is a comment in wake queue code header saying why this is safe. I >>> don't think we need to repeat this here. >> >> Yeah, _describing how to make it safe_, after it starts with: >> >> * NOTE: this function is lockless and requires care, incorrect usage _will_ >> * lead to sporadic and non-obvious failure. >> >> Then it also says: >> >> * Also note that this 'optimization' trades a spin_lock() for an smp_mb(), >> * which (when the lock is uncontended) are of roughly equal cost. >> >> I question the need to optimize this path since it means reader has to figure out if it is safe while a simple wake_up_all after atomic_dec_and_test would have done it. >> >> Is the case of no waiters a predominant one? It at least deserves a comment explaining why the optimisation is important. >> > > I just didn't want to add a spin_lock if there is known working code > path without one and our code fits into that path. I can add a comment > but I don't really think it necessary. Lock already exists in the wake_up_all, it is not about adding your own. As premature optimisations are usually best avoided it is simply about how do you justify a): +static void decr_outstanding_submission_g2h(struct intel_guc *guc) +{ + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) { + smp_mb(); + if (waitqueue_active(&guc->ct.wq)) + wake_up_all(&guc->ct.wq); When the easy alternative (easy to read, easy to review, easy to maintain) is b): +static void decr_outstanding_submission_g2h(struct intel_guc *guc) +{ + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) + wake_up_all(&guc->ct.wq); For me as external reader the question seems to be, I will say it again, is the case of no waiters a common one and is this a hot path to justify avoiding a function call by adding the mental complexity explained in the waitqueue_active comment? Here and in the other places in the GuC backend waitqueue_active is used. Regards, Tvrtko
On Thu, May 27, 2021 at 10:02:55AM +0100, Tvrtko Ursulin wrote: > > On 26/05/2021 19:18, Matthew Brost wrote: > > On Wed, May 26, 2021 at 10:21:05AM +0100, Tvrtko Ursulin wrote: > > > > > > On 25/05/2021 18:07, Matthew Brost wrote: > > > > On Tue, May 25, 2021 at 11:06:00AM +0100, Tvrtko Ursulin wrote: > > > > > > > > > > On 06/05/2021 20:14, Matthew Brost wrote: > > > > > > When running the GuC the GPU can't be considered idle if the GuC still > > > > > > has contexts pinned. As such, a call has been added in > > > > > > intel_gt_wait_for_idle to idle the UC and in turn the GuC by waiting for > > > > > > the number of unpinned contexts to go to zero. > > > > > > > > > > > > Cc: John Harrison <john.c.harrison@intel.com> > > > > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > > > > > > --- > > > > > > drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- > > > > > > drivers/gpu/drm/i915/gt/intel_gt.c | 18 ++++ > > > > > > drivers/gpu/drm/i915/gt/intel_gt.h | 2 + > > > > > > drivers/gpu/drm/i915/gt/intel_gt_requests.c | 22 ++--- > > > > > > drivers/gpu/drm/i915/gt/intel_gt_requests.h | 7 +- > > > > > > drivers/gpu/drm/i915/gt/uc/intel_guc.h | 4 + > > > > > > drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 1 + > > > > > > drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 4 + > > > > > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 91 ++++++++++++++++++- > > > > > > drivers/gpu/drm/i915/gt/uc/intel_uc.h | 5 + > > > > > > drivers/gpu/drm/i915/i915_debugfs.c | 1 + > > > > > > drivers/gpu/drm/i915/i915_gem_evict.c | 1 + > > > > > > .../gpu/drm/i915/selftests/igt_live_test.c | 2 +- > > > > > > .../gpu/drm/i915/selftests/mock_gem_device.c | 3 +- > > > > > > 14 files changed, 137 insertions(+), 27 deletions(-) > > > > > > > > > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c > > > > > > index 8598a1c78a4c..2f5295c9408d 100644 > > > > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c > > > > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c > > > > > > @@ -634,7 +634,8 @@ mmap_offset_attach(struct drm_i915_gem_object *obj, > > > > > > goto insert; > > > > > > /* Attempt to reap some mmap space from dead objects */ > > > > > > - err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT); > > > > > > + err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT, > > > > > > + NULL); > > > > > > if (err) > > > > > > goto err; > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c > > > > > > index 8d77dcbad059..1742a8561f69 100644 > > > > > > --- a/drivers/gpu/drm/i915/gt/intel_gt.c > > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_gt.c > > > > > > @@ -574,6 +574,24 @@ static void __intel_gt_disable(struct intel_gt *gt) > > > > > > GEM_BUG_ON(intel_gt_pm_is_awake(gt)); > > > > > > } > > > > > > +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) > > > > > > +{ > > > > > > + long rtimeout; > > > > > > + > > > > > > + /* If the device is asleep, we have no requests outstanding */ > > > > > > + if (!intel_gt_pm_is_awake(gt)) > > > > > > + return 0; > > > > > > + > > > > > > + while ((timeout = intel_gt_retire_requests_timeout(gt, timeout, > > > > > > + &rtimeout)) > 0) { > > > > > > + cond_resched(); > > > > > > + if (signal_pending(current)) > > > > > > + return -EINTR; > > > > > > + } > > > > > > + > > > > > > + return timeout ? timeout : intel_uc_wait_for_idle(>->uc, rtimeout); > > > > > > +} > > > > > > + > > > > > > int intel_gt_init(struct intel_gt *gt) > > > > > > { > > > > > > int err; > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h > > > > > > index 7ec395cace69..c775043334bf 100644 > > > > > > --- a/drivers/gpu/drm/i915/gt/intel_gt.h > > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_gt.h > > > > > > @@ -48,6 +48,8 @@ void intel_gt_driver_release(struct intel_gt *gt); > > > > > > void intel_gt_driver_late_release(struct intel_gt *gt); > > > > > > +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); > > > > > > + > > > > > > void intel_gt_check_and_clear_faults(struct intel_gt *gt); > > > > > > void intel_gt_clear_error_registers(struct intel_gt *gt, > > > > > > intel_engine_mask_t engine_mask); > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c > > > > > > index 647eca9d867a..c6c702f236fa 100644 > > > > > > --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c > > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c > > > > > > @@ -13,6 +13,7 @@ > > > > > > #include "intel_gt_pm.h" > > > > > > #include "intel_gt_requests.h" > > > > > > #include "intel_timeline.h" > > > > > > +#include "uc/intel_uc.h" > > > > > > static bool retire_requests(struct intel_timeline *tl) > > > > > > { > > > > > > @@ -130,7 +131,8 @@ void intel_engine_fini_retire(struct intel_engine_cs *engine) > > > > > > GEM_BUG_ON(engine->retire); > > > > > > } > > > > > > -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) > > > > > > +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout, > > > > > > + long *rtimeout) > > > > > > > > > > What is 'rtimeout', I know remaining, but it can be more self-descriptive to > > > > > start with. > > > > > > > > > > > > > 'remaining_timeout' it is. > > > > > > > > > It feels a bit churny for what it is. How plausible would be alternatives to > > > > > either change existing timeout to in/out, or measure sleep internally in > > > > > this function, or just risk sleeping twice as long by passing the original > > > > > timeout to uc idle as well? > > > > > > > > > > > > > Originally had it just passing in the same value, got review feedback > > > > saying I should pass in the adjusted value. Hard to make everyone happy. > > > > > > Ok. > > > > > > > > > { > > > > > > struct intel_gt_timelines *timelines = >->timelines; > > > > > > struct intel_timeline *tl, *tn; > > > > > > @@ -195,22 +197,10 @@ out_active: spin_lock(&timelines->lock); > > > > > > if (flush_submission(gt, timeout)) /* Wait, there's more! */ > > > > > > active_count++; > > > > > > - return active_count ? timeout : 0; > > > > > > -} > > > > > > - > > > > > > -int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) > > > > > > -{ > > > > > > - /* If the device is asleep, we have no requests outstanding */ > > > > > > - if (!intel_gt_pm_is_awake(gt)) > > > > > > - return 0; > > > > > > - > > > > > > - while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) { > > > > > > - cond_resched(); > > > > > > - if (signal_pending(current)) > > > > > > - return -EINTR; > > > > > > - } > > > > > > + if (rtimeout) > > > > > > + *rtimeout = timeout; > > > > > > - return timeout; > > > > > > + return active_count ? timeout : 0; > > > > > > } > > > > > > static void retire_work_handler(struct work_struct *work) > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h > > > > > > index fcc30a6e4fe9..4419787124e2 100644 > > > > > > --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h > > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h > > > > > > @@ -10,10 +10,11 @@ struct intel_engine_cs; > > > > > > struct intel_gt; > > > > > > struct intel_timeline; > > > > > > -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); > > > > > > +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout, > > > > > > + long *rtimeout); > > > > > > static inline void intel_gt_retire_requests(struct intel_gt *gt) > > > > > > { > > > > > > - intel_gt_retire_requests_timeout(gt, 0); > > > > > > + intel_gt_retire_requests_timeout(gt, 0, NULL); > > > > > > } > > > > > > void intel_engine_init_retire(struct intel_engine_cs *engine); > > > > > > @@ -21,8 +22,6 @@ void intel_engine_add_retire(struct intel_engine_cs *engine, > > > > > > struct intel_timeline *tl); > > > > > > void intel_engine_fini_retire(struct intel_engine_cs *engine); > > > > > > -int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); > > > > > > - > > > > > > void intel_gt_init_requests(struct intel_gt *gt); > > > > > > void intel_gt_park_requests(struct intel_gt *gt); > > > > > > void intel_gt_unpark_requests(struct intel_gt *gt); > > > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > > > > > index 485e98f3f304..47eaa69809e8 100644 > > > > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > > > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > > > > > > @@ -38,6 +38,8 @@ struct intel_guc { > > > > > > spinlock_t irq_lock; > > > > > > unsigned int msg_enabled_mask; > > > > > > + atomic_t outstanding_submission_g2h; > > > > > > + > > > > > > struct { > > > > > > bool enabled; > > > > > > void (*reset)(struct intel_guc *guc); > > > > > > @@ -239,6 +241,8 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask) > > > > > > spin_unlock_irq(&guc->irq_lock); > > > > > > } > > > > > > +int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout); > > > > > > + > > > > > > int intel_guc_reset_engine(struct intel_guc *guc, > > > > > > struct intel_engine_cs *engine); > > > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > > > > > index f1893030ca88..cf701056fa14 100644 > > > > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > > > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c > > > > > > @@ -111,6 +111,7 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct) > > > > > > INIT_LIST_HEAD(&ct->requests.incoming); > > > > > > INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func); > > > > > > tasklet_init(&ct->receive_tasklet, ct_receive_tasklet_func, (unsigned long)ct); > > > > > > + init_waitqueue_head(&ct->wq); > > > > > > } > > > > > > static inline const char *guc_ct_buffer_type_to_str(u32 type) > > > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h > > > > > > index 660bf37238e2..ab1b79ab960b 100644 > > > > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h > > > > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h > > > > > > @@ -10,6 +10,7 @@ > > > > > > #include <linux/spinlock.h> > > > > > > #include <linux/workqueue.h> > > > > > > #include <linux/ktime.h> > > > > > > +#include <linux/wait.h> > > > > > > #include "intel_guc_fwif.h" > > > > > > @@ -68,6 +69,9 @@ struct intel_guc_ct { > > > > > > struct tasklet_struct receive_tasklet; > > > > > > + /** @wq: wait queue for g2h chanenl */ > > > > > > + wait_queue_head_t wq; > > > > > > + > > > > > > struct { > > > > > > u16 last_fence; /* last fence used to send request */ > > > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > > > index ae0b386467e3..0ff7dd6d337d 100644 > > > > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > > > > @@ -253,6 +253,74 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, > > > > > > xa_store_irq(&guc->context_lookup, id, ce, GFP_ATOMIC); > > > > > > } > > > > > > +static int guc_submission_busy_loop(struct intel_guc* guc, > > > > > > + const u32 *action, > > > > > > + u32 len, > > > > > > + u32 g2h_len_dw, > > > > > > + bool loop) > > > > > > +{ > > > > > > + int err; > > > > > > + > > > > > > + err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); > > > > > > + > > > > > > + if (!err && g2h_len_dw) > > > > > > + atomic_inc(&guc->outstanding_submission_g2h); > > > > > > + > > > > > > + return err; > > > > > > +} > > > > > > + > > > > > > +static int guc_wait_for_pending_msg(struct intel_guc *guc, > > > > > > + atomic_t *wait_var, > > > > > > + bool interruptible, > > > > > > + long timeout) > > > > > > +{ > > > > > > + const int state = interruptible ? > > > > > > + TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; > > > > > > + DEFINE_WAIT(wait); > > > > > > + > > > > > > + might_sleep(); > > > > > > + GEM_BUG_ON(timeout < 0); > > > > > > + > > > > > > + if (!atomic_read(wait_var)) > > > > > > + return 0; > > > > > > + > > > > > > + if (!timeout) > > > > > > + return -ETIME; > > > > > > + > > > > > > + for (;;) { > > > > > > + prepare_to_wait(&guc->ct.wq, &wait, state); > > > > > > + > > > > > > + if (!atomic_read(wait_var)) > > > > > > + break; > > > > > > + > > > > > > + if (signal_pending_state(state, current)) { > > > > > > + timeout = -ERESTARTSYS; > > > > > > + break; > > > > > > + } > > > > > > + > > > > > > + if (!timeout) { > > > > > > + timeout = -ETIME; > > > > > > + break; > > > > > > + } > > > > > > + > > > > > > + timeout = io_schedule_timeout(timeout); > > > > > > + } > > > > > > + finish_wait(&guc->ct.wq, &wait); > > > > > > + > > > > > > + return (timeout < 0) ? timeout : 0; > > > > > > +} > > > > > > > > > > See if it is possible to simplify all this with wait_var_event and > > > > > wake_up_var. > > > > > > > > > > > > > Let me check on that. > > > > > > + > > > > > > +int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) > > > > > > +{ > > > > > > + bool interruptible = true; > > > > > > + > > > > > > + if (unlikely(timeout < 0)) > > > > > > + timeout = -timeout, interruptible = false; > > > > > > + > > > > > > + return guc_wait_for_pending_msg(guc, &guc->outstanding_submission_g2h, > > > > > > + interruptible, timeout); > > > > > > +} > > > > > > + > > > > > > static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) > > > > > > { > > > > > > int err; > > > > > > @@ -279,6 +347,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) > > > > > > err = intel_guc_send_nb(guc, action, len, g2h_len_dw); > > > > > > if (!enabled && !err) { > > > > > > + atomic_inc(&guc->outstanding_submission_g2h); > > > > > > set_context_enabled(ce); > > > > > > } else if (!enabled) { > > > > > > clr_context_pending_enable(ce); > > > > > > @@ -734,7 +803,7 @@ static int __guc_action_register_context(struct intel_guc *guc, > > > > > > offset, > > > > > > }; > > > > > > - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); > > > > > > + return guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); > > > > > > } > > > > > > static int register_context(struct intel_context *ce) > > > > > > @@ -754,7 +823,7 @@ static int __guc_action_deregister_context(struct intel_guc *guc, > > > > > > guc_id, > > > > > > }; > > > > > > - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), > > > > > > + return guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), > > > > > > G2H_LEN_DW_DEREGISTER_CONTEXT, true); > > > > > > } > > > > > > @@ -871,7 +940,9 @@ static int guc_context_pin(struct intel_context *ce, void *vaddr) > > > > > > static void guc_context_unpin(struct intel_context *ce) > > > > > > { > > > > > > - unpin_guc_id(ce_to_guc(ce), ce); > > > > > > + struct intel_guc *guc = ce_to_guc(ce); > > > > > > + > > > > > > + unpin_guc_id(guc, ce); > > > > > > lrc_unpin(ce); > > > > > > } > > > > > > @@ -894,7 +965,7 @@ static void __guc_context_sched_disable(struct intel_guc *guc, > > > > > > intel_context_get(ce); > > > > > > - intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), > > > > > > + guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), > > > > > > G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); > > > > > > } > > > > > > @@ -1437,6 +1508,15 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) > > > > > > return ce; > > > > > > } > > > > > > +static void decr_outstanding_submission_g2h(struct intel_guc *guc) > > > > > > +{ > > > > > > + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) { > > > > > > + smp_mb(); > > > > > > + if (waitqueue_active(&guc->ct.wq)) > > > > > > + wake_up_all(&guc->ct.wq); > > > > > > > > > > I keep pointing out this pattern is racy and at least needs comment why it > > > > > is safe. > > > > > > > > > > > > > There is a comment in wake queue code header saying why this is safe. I > > > > don't think we need to repeat this here. > > > > > > Yeah, _describing how to make it safe_, after it starts with: > > > > > > * NOTE: this function is lockless and requires care, incorrect usage _will_ > > > * lead to sporadic and non-obvious failure. > > > > > > Then it also says: > > > > > > * Also note that this 'optimization' trades a spin_lock() for an smp_mb(), > > > * which (when the lock is uncontended) are of roughly equal cost. > > > > > > I question the need to optimize this path since it means reader has to figure out if it is safe while a simple wake_up_all after atomic_dec_and_test would have done it. > > > > > > Is the case of no waiters a predominant one? It at least deserves a comment explaining why the optimisation is important. > > > > > > > I just didn't want to add a spin_lock if there is known working code > > path without one and our code fits into that path. I can add a comment > > but I don't really think it necessary. > > Lock already exists in the wake_up_all, it is not about adding your own. > > As premature optimisations are usually best avoided it is simply about how > do you justify a): > > +static void decr_outstanding_submission_g2h(struct intel_guc *guc) > +{ > + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) { > + smp_mb(); > + if (waitqueue_active(&guc->ct.wq)) > + wake_up_all(&guc->ct.wq); > > When the easy alternative (easy to read, easy to review, easy to maintain) > is b): > > +static void decr_outstanding_submission_g2h(struct intel_guc *guc) > +{ > + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) > + wake_up_all(&guc->ct.wq); > I'll go with option B. Matt > For me as external reader the question seems to be, I will say it again, is > the case of no waiters a common one and is this a hot path to justify > avoiding a function call by adding the mental complexity explained in the > waitqueue_active comment? Here and in the other places in the GuC backend > waitqueue_active is used. > > Regards, > > Tvrtko
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 8598a1c78a4c..2f5295c9408d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -634,7 +634,8 @@ mmap_offset_attach(struct drm_i915_gem_object *obj, goto insert; /* Attempt to reap some mmap space from dead objects */ - err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT); + err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT, + NULL); if (err) goto err; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 8d77dcbad059..1742a8561f69 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -574,6 +574,24 @@ static void __intel_gt_disable(struct intel_gt *gt) GEM_BUG_ON(intel_gt_pm_is_awake(gt)); } +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) +{ + long rtimeout; + + /* If the device is asleep, we have no requests outstanding */ + if (!intel_gt_pm_is_awake(gt)) + return 0; + + while ((timeout = intel_gt_retire_requests_timeout(gt, timeout, + &rtimeout)) > 0) { + cond_resched(); + if (signal_pending(current)) + return -EINTR; + } + + return timeout ? timeout : intel_uc_wait_for_idle(>->uc, rtimeout); +} + int intel_gt_init(struct intel_gt *gt) { int err; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 7ec395cace69..c775043334bf 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -48,6 +48,8 @@ void intel_gt_driver_release(struct intel_gt *gt); void intel_gt_driver_late_release(struct intel_gt *gt); +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); + void intel_gt_check_and_clear_faults(struct intel_gt *gt); void intel_gt_clear_error_registers(struct intel_gt *gt, intel_engine_mask_t engine_mask); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index 647eca9d867a..c6c702f236fa 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -13,6 +13,7 @@ #include "intel_gt_pm.h" #include "intel_gt_requests.h" #include "intel_timeline.h" +#include "uc/intel_uc.h" static bool retire_requests(struct intel_timeline *tl) { @@ -130,7 +131,8 @@ void intel_engine_fini_retire(struct intel_engine_cs *engine) GEM_BUG_ON(engine->retire); } -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout, + long *rtimeout) { struct intel_gt_timelines *timelines = >->timelines; struct intel_timeline *tl, *tn; @@ -195,22 +197,10 @@ out_active: spin_lock(&timelines->lock); if (flush_submission(gt, timeout)) /* Wait, there's more! */ active_count++; - return active_count ? timeout : 0; -} - -int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) -{ - /* If the device is asleep, we have no requests outstanding */ - if (!intel_gt_pm_is_awake(gt)) - return 0; - - while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) { - cond_resched(); - if (signal_pending(current)) - return -EINTR; - } + if (rtimeout) + *rtimeout = timeout; - return timeout; + return active_count ? timeout : 0; } static void retire_work_handler(struct work_struct *work) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h index fcc30a6e4fe9..4419787124e2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h @@ -10,10 +10,11 @@ struct intel_engine_cs; struct intel_gt; struct intel_timeline; -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout, + long *rtimeout); static inline void intel_gt_retire_requests(struct intel_gt *gt) { - intel_gt_retire_requests_timeout(gt, 0); + intel_gt_retire_requests_timeout(gt, 0, NULL); } void intel_engine_init_retire(struct intel_engine_cs *engine); @@ -21,8 +22,6 @@ void intel_engine_add_retire(struct intel_engine_cs *engine, struct intel_timeline *tl); void intel_engine_fini_retire(struct intel_engine_cs *engine); -int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); - void intel_gt_init_requests(struct intel_gt *gt); void intel_gt_park_requests(struct intel_gt *gt); void intel_gt_unpark_requests(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 485e98f3f304..47eaa69809e8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -38,6 +38,8 @@ struct intel_guc { spinlock_t irq_lock; unsigned int msg_enabled_mask; + atomic_t outstanding_submission_g2h; + struct { bool enabled; void (*reset)(struct intel_guc *guc); @@ -239,6 +241,8 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask) spin_unlock_irq(&guc->irq_lock); } +int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout); + int intel_guc_reset_engine(struct intel_guc *guc, struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index f1893030ca88..cf701056fa14 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -111,6 +111,7 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct) INIT_LIST_HEAD(&ct->requests.incoming); INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func); tasklet_init(&ct->receive_tasklet, ct_receive_tasklet_func, (unsigned long)ct); + init_waitqueue_head(&ct->wq); } static inline const char *guc_ct_buffer_type_to_str(u32 type) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h index 660bf37238e2..ab1b79ab960b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -10,6 +10,7 @@ #include <linux/spinlock.h> #include <linux/workqueue.h> #include <linux/ktime.h> +#include <linux/wait.h> #include "intel_guc_fwif.h" @@ -68,6 +69,9 @@ struct intel_guc_ct { struct tasklet_struct receive_tasklet; + /** @wq: wait queue for g2h chanenl */ + wait_queue_head_t wq; + struct { u16 last_fence; /* last fence used to send request */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index ae0b386467e3..0ff7dd6d337d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -253,6 +253,74 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, xa_store_irq(&guc->context_lookup, id, ce, GFP_ATOMIC); } +static int guc_submission_busy_loop(struct intel_guc* guc, + const u32 *action, + u32 len, + u32 g2h_len_dw, + bool loop) +{ + int err; + + err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); + + if (!err && g2h_len_dw) + atomic_inc(&guc->outstanding_submission_g2h); + + return err; +} + +static int guc_wait_for_pending_msg(struct intel_guc *guc, + atomic_t *wait_var, + bool interruptible, + long timeout) +{ + const int state = interruptible ? + TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; + DEFINE_WAIT(wait); + + might_sleep(); + GEM_BUG_ON(timeout < 0); + + if (!atomic_read(wait_var)) + return 0; + + if (!timeout) + return -ETIME; + + for (;;) { + prepare_to_wait(&guc->ct.wq, &wait, state); + + if (!atomic_read(wait_var)) + break; + + if (signal_pending_state(state, current)) { + timeout = -ERESTARTSYS; + break; + } + + if (!timeout) { + timeout = -ETIME; + break; + } + + timeout = io_schedule_timeout(timeout); + } + finish_wait(&guc->ct.wq, &wait); + + return (timeout < 0) ? timeout : 0; +} + +int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) +{ + bool interruptible = true; + + if (unlikely(timeout < 0)) + timeout = -timeout, interruptible = false; + + return guc_wait_for_pending_msg(guc, &guc->outstanding_submission_g2h, + interruptible, timeout); +} + static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) { int err; @@ -279,6 +347,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) err = intel_guc_send_nb(guc, action, len, g2h_len_dw); if (!enabled && !err) { + atomic_inc(&guc->outstanding_submission_g2h); set_context_enabled(ce); } else if (!enabled) { clr_context_pending_enable(ce); @@ -734,7 +803,7 @@ static int __guc_action_register_context(struct intel_guc *guc, offset, }; - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); + return guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); } static int register_context(struct intel_context *ce) @@ -754,7 +823,7 @@ static int __guc_action_deregister_context(struct intel_guc *guc, guc_id, }; - return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), + return guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), G2H_LEN_DW_DEREGISTER_CONTEXT, true); } @@ -871,7 +940,9 @@ static int guc_context_pin(struct intel_context *ce, void *vaddr) static void guc_context_unpin(struct intel_context *ce) { - unpin_guc_id(ce_to_guc(ce), ce); + struct intel_guc *guc = ce_to_guc(ce); + + unpin_guc_id(guc, ce); lrc_unpin(ce); } @@ -894,7 +965,7 @@ static void __guc_context_sched_disable(struct intel_guc *guc, intel_context_get(ce); - intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), + guc_submission_busy_loop(guc, action, ARRAY_SIZE(action), G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); } @@ -1437,6 +1508,15 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) return ce; } +static void decr_outstanding_submission_g2h(struct intel_guc *guc) +{ + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) { + smp_mb(); + if (waitqueue_active(&guc->ct.wq)) + wake_up_all(&guc->ct.wq); + } +} + int intel_guc_deregister_done_process_msg(struct intel_guc *guc, const u32 *msg, u32 len) @@ -1472,6 +1552,8 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, lrc_destroy(&ce->ref); } + decr_outstanding_submission_g2h(guc); + return 0; } @@ -1520,6 +1602,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, spin_unlock_irqrestore(&ce->guc_state.lock, flags); } + decr_outstanding_submission_g2h(guc); intel_context_put(ce); return 0; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h index 9c954c589edf..c4cef885e984 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -81,6 +81,11 @@ uc_state_checkers(guc, guc_submission); #undef uc_state_checkers #undef __uc_state_checker +static inline int intel_uc_wait_for_idle(struct intel_uc *uc, long timeout) +{ + return intel_guc_wait_for_idle(&uc->guc, timeout); +} + #define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \ static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \ { \ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 8dd374691102..bb29838d1cd7 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -36,6 +36,7 @@ #include "gt/intel_gt_clock_utils.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_requests.h" #include "gt/intel_reset.h" #include "gt/intel_rc6.h" diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 4d2d59a9942b..2b73ddb11c66 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -27,6 +27,7 @@ */ #include "gem/i915_gem_context.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_requests.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index c130010a7033..1c721542e277 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -5,7 +5,7 @@ */ #include "i915_drv.h" -#include "gt/intel_gt_requests.h" +#include "gt/intel_gt.h" #include "../i915_selftest.h" #include "igt_flush_test.h" diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index cf40004bc92a..6c06816e2b99 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -51,7 +51,8 @@ void mock_device_flush(struct drm_i915_private *i915) do { for_each_engine(engine, gt, id) mock_engine_flush(engine); - } while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT)); + } while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT, + NULL)); } static void mock_device_release(struct drm_device *dev)
When running the GuC the GPU can't be considered idle if the GuC still has contexts pinned. As such, a call has been added in intel_gt_wait_for_idle to idle the UC and in turn the GuC by waiting for the number of unpinned contexts to go to zero. Cc: John Harrison <john.c.harrison@intel.com> Signed-off-by: Matthew Brost <matthew.brost@intel.com> --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/gt/intel_gt.c | 18 ++++ drivers/gpu/drm/i915/gt/intel_gt.h | 2 + drivers/gpu/drm/i915/gt/intel_gt_requests.c | 22 ++--- drivers/gpu/drm/i915/gt/intel_gt_requests.h | 7 +- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 4 + drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 4 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 91 ++++++++++++++++++- drivers/gpu/drm/i915/gt/uc/intel_uc.h | 5 + drivers/gpu/drm/i915/i915_debugfs.c | 1 + drivers/gpu/drm/i915/i915_gem_evict.c | 1 + .../gpu/drm/i915/selftests/igt_live_test.c | 2 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 3 +- 14 files changed, 137 insertions(+), 27 deletions(-)