Message ID | 20220819123904.913750-1-matthew.auld@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Revert "drm/i915/guc: Add delay to disable scheduling after pin count goes to zero" | expand |
On 8/19/2022 05:39, Matthew Auld wrote: > This reverts commit 6a079903847cce1dd06345127d2a32f26d2cd9c6. > > Everything in CI using GuC is now timing out[1], and killing the machine > with this change (perhaps a deadlock?). CI was recently on fire due to > some changes coming in from -rc1, so likely the pre-merge CI results for > this series were invalid? For now just revert, unless GuC experts > already have a fix in mind. > > [1] https://intel-gfx-ci.01.org/tree/drm-tip/index.html? > > Signed-off-by: Matthew Auld <matthew.auld@intel.com> > Cc: Matthew Brost <matthew.brost@intel.com> > Cc: Alan Previn <alan.previn.teres.alexis@intel.com> > Cc: John Harrison <John.C.Harrison@Intel.com> Reviewed-by: John Harrison <John.C.Harrison@Intel.com> Given that CI was claiming a pass for the original patch set, no we don't have a fix in mind. It is most frustrating when CI says all green if the entire universe is so broken that no tests were even running :(. John. > --- > drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- > drivers/gpu/drm/i915/gt/intel_context.h | 8 - > drivers/gpu/drm/i915/gt/intel_context_types.h | 7 - > drivers/gpu/drm/i915/gt/uc/intel_guc.h | 17 +- > .../gpu/drm/i915/gt/uc/intel_guc_debugfs.c | 60 ------- > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 154 +++--------------- > drivers/gpu/drm/i915/i915_selftest.h | 2 - > 7 files changed, 27 insertions(+), 223 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c > index df7fd1b019ec..dabdfe09f5e5 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > @@ -1454,7 +1454,7 @@ static void engines_idle_release(struct i915_gem_context *ctx, > int err; > > /* serialises with execbuf */ > - intel_context_close(ce); > + set_bit(CONTEXT_CLOSED_BIT, &ce->flags); > if (!intel_context_pin_if_active(ce)) > continue; > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h > index f96420f0b5bb..8e2d70630c49 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context.h > +++ b/drivers/gpu/drm/i915/gt/intel_context.h > @@ -276,14 +276,6 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce) > return test_bit(CONTEXT_BARRIER_BIT, &ce->flags); > } > > -static inline void intel_context_close(struct intel_context *ce) > -{ > - set_bit(CONTEXT_CLOSED_BIT, &ce->flags); > - > - if (ce->ops->close) > - ce->ops->close(ce); > -} > - > static inline bool intel_context_is_closed(const struct intel_context *ce) > { > return test_bit(CONTEXT_CLOSED_BIT, &ce->flags); > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h > index 86ac84e2edb9..04eacae1aca5 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h > @@ -43,8 +43,6 @@ struct intel_context_ops { > void (*revoke)(struct intel_context *ce, struct i915_request *rq, > unsigned int preempt_timeout_ms); > > - void (*close)(struct intel_context *ce); > - > int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr); > int (*pin)(struct intel_context *ce, void *vaddr); > void (*unpin)(struct intel_context *ce); > @@ -210,11 +208,6 @@ struct intel_context { > * each priority bucket > */ > u32 prio_count[GUC_CLIENT_PRIORITY_NUM]; > - /** > - * @sched_disable_delay: worker to disable scheduling on this > - * context > - */ > - struct delayed_work sched_disable_delay; > } guc_state; > > struct { > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > index 944b549b8797..804133df1ac9 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > @@ -112,10 +112,6 @@ struct intel_guc { > * refs > */ > struct list_head guc_id_list; > - /** > - * @guc_ids_in_use: Number single-lrc guc_ids in use > - */ > - u16 guc_ids_in_use; > /** > * @destroyed_contexts: list of contexts waiting to be destroyed > * (deregistered with the GuC) > @@ -136,16 +132,6 @@ struct intel_guc { > * @reset_fail_mask: mask of engines that failed to reset > */ > intel_engine_mask_t reset_fail_mask; > - /** > - * @sched_disable_delay_ms: schedule disable delay, in ms, for > - * contexts > - */ > - u64 sched_disable_delay_ms; > - /** > - * @sched_disable_gucid_threshold: threshold of min remaining available > - * guc_ids before we start bypassing the schedule disable delay > - */ > - int sched_disable_gucid_threshold; > } submission_state; > > /** > @@ -475,10 +461,9 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc); > void intel_guc_submission_cancel_requests(struct intel_guc *guc); > > void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); > -void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p); > > void intel_guc_write_barrier(struct intel_guc *guc); > > -int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc); > +void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p); > > #endif > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c > index c91b150bb7ac..25f09a420561 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c > @@ -71,72 +71,12 @@ static bool intel_eval_slpc_support(void *data) > return intel_guc_slpc_is_used(guc); > } > > -static int guc_sched_disable_delay_ms_get(void *data, u64 *val) > -{ > - struct intel_guc *guc = data; > - > - if (!intel_guc_submission_is_used(guc)) > - return -ENODEV; > - > - *val = guc->submission_state.sched_disable_delay_ms; > - > - return 0; > -} > - > -static int guc_sched_disable_delay_ms_set(void *data, u64 val) > -{ > - struct intel_guc *guc = data; > - > - if (!intel_guc_submission_is_used(guc)) > - return -ENODEV; > - > - guc->submission_state.sched_disable_delay_ms = val; > - > - return 0; > -} > -DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_delay_ms_fops, > - guc_sched_disable_delay_ms_get, > - guc_sched_disable_delay_ms_set, "%lld\n"); > - > -static int guc_sched_disable_gucid_threshold_get(void *data, u64 *val) > -{ > - struct intel_guc *guc = data; > - > - if (!intel_guc_submission_is_used(guc)) > - return -ENODEV; > - > - *val = guc->submission_state.sched_disable_gucid_threshold; > - return 0; > -} > - > -static int guc_sched_disable_gucid_threshold_set(void *data, u64 val) > -{ > - struct intel_guc *guc = data; > - > - if (!intel_guc_submission_is_used(guc)) > - return -ENODEV; > - > - if (val > intel_guc_sched_disable_gucid_threshold_max(guc)) > - guc->submission_state.sched_disable_gucid_threshold = > - intel_guc_sched_disable_gucid_threshold_max(guc); > - else > - guc->submission_state.sched_disable_gucid_threshold = val; > - > - return 0; > -} > -DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_gucid_threshold_fops, > - guc_sched_disable_gucid_threshold_get, > - guc_sched_disable_gucid_threshold_set, "%lld\n"); > - > void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root) > { > static const struct intel_gt_debugfs_file files[] = { > { "guc_info", &guc_info_fops, NULL }, > { "guc_registered_contexts", &guc_registered_contexts_fops, NULL }, > { "guc_slpc_info", &guc_slpc_info_fops, &intel_eval_slpc_support}, > - { "guc_sched_disable_delay_ms", &guc_sched_disable_delay_ms_fops, NULL }, > - { "guc_sched_disable_gucid_threshold", &guc_sched_disable_gucid_threshold_fops, > - NULL }, > }; > > if (!intel_guc_is_supported(guc)) > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > index a0cebb4590e9..0d56b615bf78 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > @@ -65,13 +65,7 @@ > * corresponding G2H returns indicating the scheduling disable operation has > * completed it is safe to unpin the context. While a disable is in flight it > * isn't safe to resubmit the context so a fence is used to stall all future > - * requests of that context until the G2H is returned. Because this interaction > - * with the GuC takes a non-zero amount of time we delay the disabling of > - * scheduling after the pin count goes to zero by a configurable period of time > - * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of > - * time to resubmit something on the context before doing this costly operation. > - * This delay is only done if the context isn't closed and the guc_id usage is > - * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD). > + * requests of that context until the G2H is returned. > * > * Context deregistration: > * Before a context can be destroyed or if we steal its guc_id we must > @@ -1995,9 +1989,6 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) > if (unlikely(ret < 0)) > return ret; > > - if (!intel_context_is_parent(ce)) > - ++guc->submission_state.guc_ids_in_use; > - > ce->guc_id.id = ret; > return 0; > } > @@ -2007,16 +1998,14 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) > GEM_BUG_ON(intel_context_is_child(ce)); > > if (!context_guc_id_invalid(ce)) { > - if (intel_context_is_parent(ce)) { > + if (intel_context_is_parent(ce)) > bitmap_release_region(guc->submission_state.guc_ids_bitmap, > ce->guc_id.id, > order_base_2(ce->parallel.number_children > + 1)); > - } else { > - --guc->submission_state.guc_ids_in_use; > + else > ida_simple_remove(&guc->submission_state.guc_ids, > ce->guc_id.id); > - } > clr_ctx_id_mapping(guc, ce->guc_id.id); > set_context_guc_id_invalid(ce); > } > @@ -3004,98 +2993,41 @@ guc_context_revoke(struct intel_context *ce, struct i915_request *rq, > } > } > > -static void guc_context_sched_disable(struct intel_context *ce); > - > -static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce, > - unsigned long flags) > - __releases(ce->guc_state.lock) > +static void guc_context_sched_disable(struct intel_context *ce) > { > + struct intel_guc *guc = ce_to_guc(ce); > + unsigned long flags; > struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; > intel_wakeref_t wakeref; > + u16 guc_id; > > - lockdep_assert_held(&ce->guc_state.lock); > - > - spin_unlock_irqrestore(&ce->guc_state.lock, flags); > - > - with_intel_runtime_pm(runtime_pm, wakeref) > - guc_context_sched_disable(ce); > -} > - > -static bool bypass_sched_disable(struct intel_guc *guc, > - struct intel_context *ce) > -{ > - lockdep_assert_held(&ce->guc_state.lock); > GEM_BUG_ON(intel_context_is_child(ce)); > > - if (submission_disabled(guc) || context_guc_id_invalid(ce) || > - !ctx_id_mapped(guc, ce->guc_id.id)) { > - clr_context_enabled(ce); > - return true; > - } > - > - return !context_enabled(ce); > -} > - > -static void __delay_sched_disable(struct work_struct *wrk) > -{ > - struct intel_context *ce = > - container_of(wrk, typeof(*ce), guc_state.sched_disable_delay.work); > - struct intel_guc *guc = ce_to_guc(ce); > - unsigned long flags; > - > spin_lock_irqsave(&ce->guc_state.lock, flags); > > - if (bypass_sched_disable(guc, ce)) { > - spin_unlock_irqrestore(&ce->guc_state.lock, flags); > - intel_context_sched_disable_unpin(ce); > - } else { > - do_sched_disable(guc, ce, flags); > - } > -} > - > -static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce) > -{ > - /* > - * parent contexts are perma-pinned, if we are unpinning do schedule > - * disable immediately. > - */ > - if (intel_context_is_parent(ce)) > - return true; > - > /* > - * If we are beyond the threshold for avail guc_ids, do schedule disable immediately. > + * We have to check if the context has been disabled by another thread, > + * check if submssion has been disabled to seal a race with reset and > + * finally check if any more requests have been committed to the > + * context ensursing that a request doesn't slip through the > + * 'context_pending_disable' fence. > */ > - return guc->submission_state.guc_ids_in_use > > - guc->submission_state.sched_disable_gucid_threshold; > -} > - > -static void guc_context_sched_disable(struct intel_context *ce) > -{ > - struct intel_guc *guc = ce_to_guc(ce); > - u64 delay = guc->submission_state.sched_disable_delay_ms; > - unsigned long flags; > - > - spin_lock_irqsave(&ce->guc_state.lock, flags); > - > - if (bypass_sched_disable(guc, ce)) { > - spin_unlock_irqrestore(&ce->guc_state.lock, flags); > - intel_context_sched_disable_unpin(ce); > - } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) && > - delay) { > + if (unlikely(!context_enabled(ce) || submission_disabled(guc) || > + context_has_committed_requests(ce))) { > + clr_context_enabled(ce); > spin_unlock_irqrestore(&ce->guc_state.lock, flags); > - mod_delayed_work(system_unbound_wq, > - &ce->guc_state.sched_disable_delay, > - msecs_to_jiffies(delay)); > - } else { > - do_sched_disable(guc, ce, flags); > + goto unpin; > } > -} > + guc_id = prep_context_pending_disable(ce); > > -static void guc_context_close(struct intel_context *ce) > -{ > - if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && > - cancel_delayed_work(&ce->guc_state.sched_disable_delay)) > - __delay_sched_disable(&ce->guc_state.sched_disable_delay.work); > + spin_unlock_irqrestore(&ce->guc_state.lock, flags); > + > + with_intel_runtime_pm(runtime_pm, wakeref) > + __guc_context_sched_disable(guc, ce, guc_id); > + > + return; > +unpin: > + intel_context_sched_disable_unpin(ce); > } > > static inline void guc_lrc_desc_unpin(struct intel_context *ce) > @@ -3414,8 +3346,6 @@ static void remove_from_context(struct i915_request *rq) > static const struct intel_context_ops guc_context_ops = { > .alloc = guc_context_alloc, > > - .close = guc_context_close, > - > .pre_pin = guc_context_pre_pin, > .pin = guc_context_pin, > .unpin = guc_context_unpin, > @@ -3498,10 +3428,6 @@ static void guc_context_init(struct intel_context *ce) > rcu_read_unlock(); > > ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); > - > - INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay, > - __delay_sched_disable); > - > set_bit(CONTEXT_GUC_INIT, &ce->flags); > } > > @@ -3539,9 +3465,6 @@ static int guc_request_alloc(struct i915_request *rq) > if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) > guc_context_init(ce); > > - if (cancel_delayed_work(&ce->guc_state.sched_disable_delay)) > - intel_context_sched_disable_unpin(ce); > - > /* > * Call pin_guc_id here rather than in the pinning step as with > * dma_resv, contexts can be repeatedly pinned / unpinned trashing the > @@ -3672,8 +3595,6 @@ static int guc_virtual_context_alloc(struct intel_context *ce) > static const struct intel_context_ops virtual_guc_context_ops = { > .alloc = guc_virtual_context_alloc, > > - .close = guc_context_close, > - > .pre_pin = guc_virtual_context_pre_pin, > .pin = guc_virtual_context_pin, > .unpin = guc_virtual_context_unpin, > @@ -3763,8 +3684,6 @@ static void guc_child_context_destroy(struct kref *kref) > static const struct intel_context_ops virtual_parent_context_ops = { > .alloc = guc_virtual_context_alloc, > > - .close = guc_context_close, > - > .pre_pin = guc_context_pre_pin, > .pin = guc_parent_context_pin, > .unpin = guc_parent_context_unpin, > @@ -4295,26 +4214,6 @@ static bool __guc_submission_selected(struct intel_guc *guc) > return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; > } > > -int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc) > -{ > - return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc); > -} > - > -/* > - * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher > - * workloads are able to enjoy the latency reduction when delaying the schedule-disable > - * operation. This matches the 30fps game-render + encode (real world) workload this > - * knob was tested against. > - */ > -#define SCHED_DISABLE_DELAY_MS 34 > - > -/* > - * A threshold of 75% is a reasonable starting point considering that real world apps > - * generally don't get anywhere near this. > - */ > -#define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \ > - (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4) > - > void intel_guc_submission_init_early(struct intel_guc *guc) > { > xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); > @@ -4331,10 +4230,7 @@ void intel_guc_submission_init_early(struct intel_guc *guc) > spin_lock_init(&guc->timestamp.lock); > INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); > > - guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS; > guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; > - guc->submission_state.sched_disable_gucid_threshold = > - NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc); > guc->submission_supported = __guc_submission_supported(guc); > guc->submission_selected = __guc_submission_selected(guc); > } > diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h > index bdf3e22c0a34..f54de0499be7 100644 > --- a/drivers/gpu/drm/i915/i915_selftest.h > +++ b/drivers/gpu/drm/i915/i915_selftest.h > @@ -92,14 +92,12 @@ int __i915_subtests(const char *caller, > T, ARRAY_SIZE(T), data) > #define i915_live_subtests(T, data) ({ \ > typecheck(struct drm_i915_private *, data); \ > - (data)->gt[0]->uc.guc.submission_state.sched_disable_delay_ms = 0; \ > __i915_subtests(__func__, \ > __i915_live_setup, __i915_live_teardown, \ > T, ARRAY_SIZE(T), data); \ > }) > #define intel_gt_live_subtests(T, data) ({ \ > typecheck(struct intel_gt *, data); \ > - (data)->uc.guc.submission_state.sched_disable_delay_ms = 0; \ > __i915_subtests(__func__, \ > __intel_gt_live_setup, __intel_gt_live_teardown, \ > T, ARRAY_SIZE(T), data); \
Will look into this - apologies for the trouble Matt. ...alan -----Original Message----- From: Harrison, John C <john.c.harrison@intel.com> Sent: Friday, August 19, 2022 8:46 AM To: Auld, Matthew <matthew.auld@intel.com>; intel-gfx@lists.freedesktop.org Cc: Brost, Matthew <matthew.brost@intel.com>; Teres Alexis, Alan Previn <alan.previn.teres.alexis@intel.com> Subject: Re: [PATCH] Revert "drm/i915/guc: Add delay to disable scheduling after pin count goes to zero" On 8/19/2022 05:39, Matthew Auld wrote: > This reverts commit 6a079903847cce1dd06345127d2a32f26d2cd9c6. > > Everything in CI using GuC is now timing out[1], and killing the > machine with this change (perhaps a deadlock?). CI was recently on > fire due to some changes coming in from -rc1, so likely the pre-merge > CI results for this series were invalid? For now just revert, unless > GuC experts already have a fix in mind. > > [1] https://intel-gfx-ci.01.org/tree/drm-tip/index.html? > > Signed-off-by: Matthew Auld <matthew.auld@intel.com> > Cc: Matthew Brost <matthew.brost@intel.com> > Cc: Alan Previn <alan.previn.teres.alexis@intel.com> > Cc: John Harrison <John.C.Harrison@Intel.com> Reviewed-by: John Harrison <John.C.Harrison@Intel.com> Given that CI was claiming a pass for the original patch set, no we don't have a fix in mind. It is most frustrating when CI says all green if the entire universe is so broken that no tests were even running :(. John. > --- > drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- > drivers/gpu/drm/i915/gt/intel_context.h | 8 - > drivers/gpu/drm/i915/gt/intel_context_types.h | 7 - > drivers/gpu/drm/i915/gt/uc/intel_guc.h | 17 +- > .../gpu/drm/i915/gt/uc/intel_guc_debugfs.c | 60 ------- > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 154 +++--------------- > drivers/gpu/drm/i915/i915_selftest.h | 2 - > 7 files changed, 27 insertions(+), 223 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c > b/drivers/gpu/drm/i915/gem/i915_gem_context.c > index df7fd1b019ec..dabdfe09f5e5 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > @@ -1454,7 +1454,7 @@ static void engines_idle_release(struct i915_gem_context *ctx, > int err; > > /* serialises with execbuf */ > - intel_context_close(ce); > + set_bit(CONTEXT_CLOSED_BIT, &ce->flags); > if (!intel_context_pin_if_active(ce)) > continue; > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.h > b/drivers/gpu/drm/i915/gt/intel_context.h > index f96420f0b5bb..8e2d70630c49 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context.h > +++ b/drivers/gpu/drm/i915/gt/intel_context.h > @@ -276,14 +276,6 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce) > return test_bit(CONTEXT_BARRIER_BIT, &ce->flags); > } > > -static inline void intel_context_close(struct intel_context *ce) -{ > - set_bit(CONTEXT_CLOSED_BIT, &ce->flags); > - > - if (ce->ops->close) > - ce->ops->close(ce); > -} > - > static inline bool intel_context_is_closed(const struct intel_context *ce) > { > return test_bit(CONTEXT_CLOSED_BIT, &ce->flags); diff --git > a/drivers/gpu/drm/i915/gt/intel_context_types.h > b/drivers/gpu/drm/i915/gt/intel_context_types.h > index 86ac84e2edb9..04eacae1aca5 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h > @@ -43,8 +43,6 @@ struct intel_context_ops { > void (*revoke)(struct intel_context *ce, struct i915_request *rq, > unsigned int preempt_timeout_ms); > > - void (*close)(struct intel_context *ce); > - > int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr); > int (*pin)(struct intel_context *ce, void *vaddr); > void (*unpin)(struct intel_context *ce); @@ -210,11 +208,6 @@ > struct intel_context { > * each priority bucket > */ > u32 prio_count[GUC_CLIENT_PRIORITY_NUM]; > - /** > - * @sched_disable_delay: worker to disable scheduling on this > - * context > - */ > - struct delayed_work sched_disable_delay; > } guc_state; > > struct { > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > index 944b549b8797..804133df1ac9 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h > @@ -112,10 +112,6 @@ struct intel_guc { > * refs > */ > struct list_head guc_id_list; > - /** > - * @guc_ids_in_use: Number single-lrc guc_ids in use > - */ > - u16 guc_ids_in_use; > /** > * @destroyed_contexts: list of contexts waiting to be destroyed > * (deregistered with the GuC) > @@ -136,16 +132,6 @@ struct intel_guc { > * @reset_fail_mask: mask of engines that failed to reset > */ > intel_engine_mask_t reset_fail_mask; > - /** > - * @sched_disable_delay_ms: schedule disable delay, in ms, for > - * contexts > - */ > - u64 sched_disable_delay_ms; > - /** > - * @sched_disable_gucid_threshold: threshold of min remaining available > - * guc_ids before we start bypassing the schedule disable delay > - */ > - int sched_disable_gucid_threshold; > } submission_state; > > /** > @@ -475,10 +461,9 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc); > void intel_guc_submission_cancel_requests(struct intel_guc *guc); > > void intel_guc_load_status(struct intel_guc *guc, struct drm_printer > *p); -void intel_guc_dump_time_info(struct intel_guc *guc, struct > drm_printer *p); > > void intel_guc_write_barrier(struct intel_guc *guc); > > -int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc > *guc); > +void intel_guc_dump_time_info(struct intel_guc *guc, struct > +drm_printer *p); > > #endif > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c > b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c > index c91b150bb7ac..25f09a420561 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c > @@ -71,72 +71,12 @@ static bool intel_eval_slpc_support(void *data) > return intel_guc_slpc_is_used(guc); > } > > -static int guc_sched_disable_delay_ms_get(void *data, u64 *val) -{ > - struct intel_guc *guc = data; > - > - if (!intel_guc_submission_is_used(guc)) > - return -ENODEV; > - > - *val = guc->submission_state.sched_disable_delay_ms; > - > - return 0; > -} > - > -static int guc_sched_disable_delay_ms_set(void *data, u64 val) -{ > - struct intel_guc *guc = data; > - > - if (!intel_guc_submission_is_used(guc)) > - return -ENODEV; > - > - guc->submission_state.sched_disable_delay_ms = val; > - > - return 0; > -} > -DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_delay_ms_fops, > - guc_sched_disable_delay_ms_get, > - guc_sched_disable_delay_ms_set, "%lld\n"); > - > -static int guc_sched_disable_gucid_threshold_get(void *data, u64 > *val) -{ > - struct intel_guc *guc = data; > - > - if (!intel_guc_submission_is_used(guc)) > - return -ENODEV; > - > - *val = guc->submission_state.sched_disable_gucid_threshold; > - return 0; > -} > - > -static int guc_sched_disable_gucid_threshold_set(void *data, u64 val) > -{ > - struct intel_guc *guc = data; > - > - if (!intel_guc_submission_is_used(guc)) > - return -ENODEV; > - > - if (val > intel_guc_sched_disable_gucid_threshold_max(guc)) > - guc->submission_state.sched_disable_gucid_threshold = > - intel_guc_sched_disable_gucid_threshold_max(guc); > - else > - guc->submission_state.sched_disable_gucid_threshold = val; > - > - return 0; > -} > -DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_gucid_threshold_fops, > - guc_sched_disable_gucid_threshold_get, > - guc_sched_disable_gucid_threshold_set, "%lld\n"); > - > void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root) > { > static const struct intel_gt_debugfs_file files[] = { > { "guc_info", &guc_info_fops, NULL }, > { "guc_registered_contexts", &guc_registered_contexts_fops, NULL }, > { "guc_slpc_info", &guc_slpc_info_fops, &intel_eval_slpc_support}, > - { "guc_sched_disable_delay_ms", &guc_sched_disable_delay_ms_fops, NULL }, > - { "guc_sched_disable_gucid_threshold", &guc_sched_disable_gucid_threshold_fops, > - NULL }, > }; > > if (!intel_guc_is_supported(guc)) > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > index a0cebb4590e9..0d56b615bf78 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > @@ -65,13 +65,7 @@ > * corresponding G2H returns indicating the scheduling disable operation has > * completed it is safe to unpin the context. While a disable is in flight it > * isn't safe to resubmit the context so a fence is used to stall > all future > - * requests of that context until the G2H is returned. Because this > interaction > - * with the GuC takes a non-zero amount of time we delay the > disabling of > - * scheduling after the pin count goes to zero by a configurable > period of time > - * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a > window of > - * time to resubmit something on the context before doing this costly operation. > - * This delay is only done if the context isn't closed and the guc_id > usage is > - * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD). > + * requests of that context until the G2H is returned. > * > * Context deregistration: > * Before a context can be destroyed or if we steal its guc_id we > must @@ -1995,9 +1989,6 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) > if (unlikely(ret < 0)) > return ret; > > - if (!intel_context_is_parent(ce)) > - ++guc->submission_state.guc_ids_in_use; > - > ce->guc_id.id = ret; > return 0; > } > @@ -2007,16 +1998,14 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) > GEM_BUG_ON(intel_context_is_child(ce)); > > if (!context_guc_id_invalid(ce)) { > - if (intel_context_is_parent(ce)) { > + if (intel_context_is_parent(ce)) > bitmap_release_region(guc->submission_state.guc_ids_bitmap, > ce->guc_id.id, > order_base_2(ce->parallel.number_children > + 1)); > - } else { > - --guc->submission_state.guc_ids_in_use; > + else > ida_simple_remove(&guc->submission_state.guc_ids, > ce->guc_id.id); > - } > clr_ctx_id_mapping(guc, ce->guc_id.id); > set_context_guc_id_invalid(ce); > } > @@ -3004,98 +2993,41 @@ guc_context_revoke(struct intel_context *ce, struct i915_request *rq, > } > } > > -static void guc_context_sched_disable(struct intel_context *ce); > - > -static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce, > - unsigned long flags) > - __releases(ce->guc_state.lock) > +static void guc_context_sched_disable(struct intel_context *ce) > { > + struct intel_guc *guc = ce_to_guc(ce); > + unsigned long flags; > struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; > intel_wakeref_t wakeref; > + u16 guc_id; > > - lockdep_assert_held(&ce->guc_state.lock); > - > - spin_unlock_irqrestore(&ce->guc_state.lock, flags); > - > - with_intel_runtime_pm(runtime_pm, wakeref) > - guc_context_sched_disable(ce); > -} > - > -static bool bypass_sched_disable(struct intel_guc *guc, > - struct intel_context *ce) > -{ > - lockdep_assert_held(&ce->guc_state.lock); > GEM_BUG_ON(intel_context_is_child(ce)); > > - if (submission_disabled(guc) || context_guc_id_invalid(ce) || > - !ctx_id_mapped(guc, ce->guc_id.id)) { > - clr_context_enabled(ce); > - return true; > - } > - > - return !context_enabled(ce); > -} > - > -static void __delay_sched_disable(struct work_struct *wrk) -{ > - struct intel_context *ce = > - container_of(wrk, typeof(*ce), guc_state.sched_disable_delay.work); > - struct intel_guc *guc = ce_to_guc(ce); > - unsigned long flags; > - > spin_lock_irqsave(&ce->guc_state.lock, flags); > > - if (bypass_sched_disable(guc, ce)) { > - spin_unlock_irqrestore(&ce->guc_state.lock, flags); > - intel_context_sched_disable_unpin(ce); > - } else { > - do_sched_disable(guc, ce, flags); > - } > -} > - > -static bool guc_id_pressure(struct intel_guc *guc, struct > intel_context *ce) -{ > - /* > - * parent contexts are perma-pinned, if we are unpinning do schedule > - * disable immediately. > - */ > - if (intel_context_is_parent(ce)) > - return true; > - > /* > - * If we are beyond the threshold for avail guc_ids, do schedule disable immediately. > + * We have to check if the context has been disabled by another thread, > + * check if submssion has been disabled to seal a race with reset and > + * finally check if any more requests have been committed to the > + * context ensursing that a request doesn't slip through the > + * 'context_pending_disable' fence. > */ > - return guc->submission_state.guc_ids_in_use > > - guc->submission_state.sched_disable_gucid_threshold; > -} > - > -static void guc_context_sched_disable(struct intel_context *ce) -{ > - struct intel_guc *guc = ce_to_guc(ce); > - u64 delay = guc->submission_state.sched_disable_delay_ms; > - unsigned long flags; > - > - spin_lock_irqsave(&ce->guc_state.lock, flags); > - > - if (bypass_sched_disable(guc, ce)) { > - spin_unlock_irqrestore(&ce->guc_state.lock, flags); > - intel_context_sched_disable_unpin(ce); > - } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) && > - delay) { > + if (unlikely(!context_enabled(ce) || submission_disabled(guc) || > + context_has_committed_requests(ce))) { > + clr_context_enabled(ce); > spin_unlock_irqrestore(&ce->guc_state.lock, flags); > - mod_delayed_work(system_unbound_wq, > - &ce->guc_state.sched_disable_delay, > - msecs_to_jiffies(delay)); > - } else { > - do_sched_disable(guc, ce, flags); > + goto unpin; > } > -} > + guc_id = prep_context_pending_disable(ce); > > -static void guc_context_close(struct intel_context *ce) -{ > - if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && > - cancel_delayed_work(&ce->guc_state.sched_disable_delay)) > - __delay_sched_disable(&ce->guc_state.sched_disable_delay.work); > + spin_unlock_irqrestore(&ce->guc_state.lock, flags); > + > + with_intel_runtime_pm(runtime_pm, wakeref) > + __guc_context_sched_disable(guc, ce, guc_id); > + > + return; > +unpin: > + intel_context_sched_disable_unpin(ce); > } > > static inline void guc_lrc_desc_unpin(struct intel_context *ce) @@ > -3414,8 +3346,6 @@ static void remove_from_context(struct i915_request *rq) > static const struct intel_context_ops guc_context_ops = { > .alloc = guc_context_alloc, > > - .close = guc_context_close, > - > .pre_pin = guc_context_pre_pin, > .pin = guc_context_pin, > .unpin = guc_context_unpin, > @@ -3498,10 +3428,6 @@ static void guc_context_init(struct intel_context *ce) > rcu_read_unlock(); > > ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); > - > - INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay, > - __delay_sched_disable); > - > set_bit(CONTEXT_GUC_INIT, &ce->flags); > } > > @@ -3539,9 +3465,6 @@ static int guc_request_alloc(struct i915_request *rq) > if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) > guc_context_init(ce); > > - if (cancel_delayed_work(&ce->guc_state.sched_disable_delay)) > - intel_context_sched_disable_unpin(ce); > - > /* > * Call pin_guc_id here rather than in the pinning step as with > * dma_resv, contexts can be repeatedly pinned / unpinned trashing > the @@ -3672,8 +3595,6 @@ static int guc_virtual_context_alloc(struct intel_context *ce) > static const struct intel_context_ops virtual_guc_context_ops = { > .alloc = guc_virtual_context_alloc, > > - .close = guc_context_close, > - > .pre_pin = guc_virtual_context_pre_pin, > .pin = guc_virtual_context_pin, > .unpin = guc_virtual_context_unpin, @@ -3763,8 +3684,6 @@ static > void guc_child_context_destroy(struct kref *kref) > static const struct intel_context_ops virtual_parent_context_ops = { > .alloc = guc_virtual_context_alloc, > > - .close = guc_context_close, > - > .pre_pin = guc_context_pre_pin, > .pin = guc_parent_context_pin, > .unpin = guc_parent_context_unpin, > @@ -4295,26 +4214,6 @@ static bool __guc_submission_selected(struct intel_guc *guc) > return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; > } > > -int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc > *guc) -{ > - return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc); > -} > - > -/* > - * This default value of 33 milisecs (+1 milisec round up) ensures > 30fps or higher > - * workloads are able to enjoy the latency reduction when delaying > the schedule-disable > - * operation. This matches the 30fps game-render + encode (real > world) workload this > - * knob was tested against. > - */ > -#define SCHED_DISABLE_DELAY_MS 34 > - > -/* > - * A threshold of 75% is a reasonable starting point considering that > real world apps > - * generally don't get anywhere near this. > - */ > -#define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \ > - (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4) > - > void intel_guc_submission_init_early(struct intel_guc *guc) > { > xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); @@ -4331,10 > +4230,7 @@ void intel_guc_submission_init_early(struct intel_guc *guc) > spin_lock_init(&guc->timestamp.lock); > INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); > > - guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS; > guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; > - guc->submission_state.sched_disable_gucid_threshold = > - NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc); > guc->submission_supported = __guc_submission_supported(guc); > guc->submission_selected = __guc_submission_selected(guc); > } > diff --git a/drivers/gpu/drm/i915/i915_selftest.h > b/drivers/gpu/drm/i915/i915_selftest.h > index bdf3e22c0a34..f54de0499be7 100644 > --- a/drivers/gpu/drm/i915/i915_selftest.h > +++ b/drivers/gpu/drm/i915/i915_selftest.h > @@ -92,14 +92,12 @@ int __i915_subtests(const char *caller, > T, ARRAY_SIZE(T), data) > #define i915_live_subtests(T, data) ({ \ > typecheck(struct drm_i915_private *, data); \ > - (data)->gt[0]->uc.guc.submission_state.sched_disable_delay_ms = 0; \ > __i915_subtests(__func__, \ > __i915_live_setup, __i915_live_teardown, \ > T, ARRAY_SIZE(T), data); \ > }) > #define intel_gt_live_subtests(T, data) ({ \ > typecheck(struct intel_gt *, data); \ > - (data)->uc.guc.submission_state.sched_disable_delay_ms = 0; \ > __i915_subtests(__func__, \ > __intel_gt_live_setup, __intel_gt_live_teardown, \ > T, ARRAY_SIZE(T), data); \
I think i found the problem - will trybot next before reposting a new rev. was a terribly careless typo when rebasing from internal for drmtip: guc_context_sched_disable called do_sched_disable which was supposed to call __guc_context_sched_disable (if the context was really meant to be finally disabled) but instead called guc_context_sched_disable (no underscore) causing an infinite loop for mid-disabling. ...alan On Fri, 2022-08-19 at 16:47 +0000, Teres Alexis, Alan Previn wrote: > Will look into this - apologies for the trouble Matt. > ...alan > > -----Original Message----- > From: Harrison, John C <john.c.harrison@intel.com> > Sent: Friday, August 19, 2022 8:46 AM > To: Auld, Matthew <matthew.auld@intel.com>; intel-gfx@lists.freedesktop.org > Cc: Brost, Matthew <matthew.brost@intel.com>; Teres Alexis, Alan Previn <alan.previn.teres.alexis@intel.com> > Subject: Re: [PATCH] Revert "drm/i915/guc: Add delay to disable scheduling after pin count goes to zero" > > On 8/19/2022 05:39, Matthew Auld wrote: > > This reverts commit 6a079903847cce1dd06345127d2a32f26d2cd9c6. > > > > Everything in CI using GuC is now timing out[1], and killing the > > machine with this change (perhaps a deadlock?). CI was recently on > > fire due to some changes coming in from -rc1, so likely the pre-merge > > CI results for this series were invalid? For now just revert, unless > > GuC experts already have a fix in mind. > > > > [1] https://intel-gfx-ci.01.org/tree/drm-tip/index.html? > > > > Signed-off-by: Matthew Auld <matthew.auld@intel.com> > > Cc: Matthew Brost <matthew.brost@intel.com> > > Cc: Alan Previn <alan.previn.teres.alexis@intel.com> > > Cc: John Harrison <John.C.Harrison@Intel.com> > Reviewed-by: John Harrison <John.C.Harrison@Intel.com> > > Given that CI was claiming a pass for the original patch set, no we don't have a fix in mind. It is most frustrating when CI says all green if the entire universe is so broken that no tests were even running :(. > > John. > > > > --- > > drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- > > drivers/gpu/drm/i915/gt/intel_context.h | 8 - > > drivers/gpu/drm/i915/gt/intel_context_types.h | 7 - > > drivers/gpu/drm/i915/gt/uc/intel_guc.h | 17 +- > > .../gpu/drm/i915/gt/uc/intel_guc_debugfs.c | 60 ------- > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 154 +++--------------- > > drivers/gpu/drm/i915/i915_selftest.h | 2 - > > 7 files changed, 27 insertions(+), 223 deletions(-) > >
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index df7fd1b019ec..dabdfe09f5e5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1454,7 +1454,7 @@ static void engines_idle_release(struct i915_gem_context *ctx, int err; /* serialises with execbuf */ - intel_context_close(ce); + set_bit(CONTEXT_CLOSED_BIT, &ce->flags); if (!intel_context_pin_if_active(ce)) continue; diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index f96420f0b5bb..8e2d70630c49 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -276,14 +276,6 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce) return test_bit(CONTEXT_BARRIER_BIT, &ce->flags); } -static inline void intel_context_close(struct intel_context *ce) -{ - set_bit(CONTEXT_CLOSED_BIT, &ce->flags); - - if (ce->ops->close) - ce->ops->close(ce); -} - static inline bool intel_context_is_closed(const struct intel_context *ce) { return test_bit(CONTEXT_CLOSED_BIT, &ce->flags); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 86ac84e2edb9..04eacae1aca5 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -43,8 +43,6 @@ struct intel_context_ops { void (*revoke)(struct intel_context *ce, struct i915_request *rq, unsigned int preempt_timeout_ms); - void (*close)(struct intel_context *ce); - int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr); int (*pin)(struct intel_context *ce, void *vaddr); void (*unpin)(struct intel_context *ce); @@ -210,11 +208,6 @@ struct intel_context { * each priority bucket */ u32 prio_count[GUC_CLIENT_PRIORITY_NUM]; - /** - * @sched_disable_delay: worker to disable scheduling on this - * context - */ - struct delayed_work sched_disable_delay; } guc_state; struct { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 944b549b8797..804133df1ac9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -112,10 +112,6 @@ struct intel_guc { * refs */ struct list_head guc_id_list; - /** - * @guc_ids_in_use: Number single-lrc guc_ids in use - */ - u16 guc_ids_in_use; /** * @destroyed_contexts: list of contexts waiting to be destroyed * (deregistered with the GuC) @@ -136,16 +132,6 @@ struct intel_guc { * @reset_fail_mask: mask of engines that failed to reset */ intel_engine_mask_t reset_fail_mask; - /** - * @sched_disable_delay_ms: schedule disable delay, in ms, for - * contexts - */ - u64 sched_disable_delay_ms; - /** - * @sched_disable_gucid_threshold: threshold of min remaining available - * guc_ids before we start bypassing the schedule disable delay - */ - int sched_disable_gucid_threshold; } submission_state; /** @@ -475,10 +461,9 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc); void intel_guc_submission_cancel_requests(struct intel_guc *guc); void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); -void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p); void intel_guc_write_barrier(struct intel_guc *guc); -int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc); +void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c index c91b150bb7ac..25f09a420561 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c @@ -71,72 +71,12 @@ static bool intel_eval_slpc_support(void *data) return intel_guc_slpc_is_used(guc); } -static int guc_sched_disable_delay_ms_get(void *data, u64 *val) -{ - struct intel_guc *guc = data; - - if (!intel_guc_submission_is_used(guc)) - return -ENODEV; - - *val = guc->submission_state.sched_disable_delay_ms; - - return 0; -} - -static int guc_sched_disable_delay_ms_set(void *data, u64 val) -{ - struct intel_guc *guc = data; - - if (!intel_guc_submission_is_used(guc)) - return -ENODEV; - - guc->submission_state.sched_disable_delay_ms = val; - - return 0; -} -DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_delay_ms_fops, - guc_sched_disable_delay_ms_get, - guc_sched_disable_delay_ms_set, "%lld\n"); - -static int guc_sched_disable_gucid_threshold_get(void *data, u64 *val) -{ - struct intel_guc *guc = data; - - if (!intel_guc_submission_is_used(guc)) - return -ENODEV; - - *val = guc->submission_state.sched_disable_gucid_threshold; - return 0; -} - -static int guc_sched_disable_gucid_threshold_set(void *data, u64 val) -{ - struct intel_guc *guc = data; - - if (!intel_guc_submission_is_used(guc)) - return -ENODEV; - - if (val > intel_guc_sched_disable_gucid_threshold_max(guc)) - guc->submission_state.sched_disable_gucid_threshold = - intel_guc_sched_disable_gucid_threshold_max(guc); - else - guc->submission_state.sched_disable_gucid_threshold = val; - - return 0; -} -DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_gucid_threshold_fops, - guc_sched_disable_gucid_threshold_get, - guc_sched_disable_gucid_threshold_set, "%lld\n"); - void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root) { static const struct intel_gt_debugfs_file files[] = { { "guc_info", &guc_info_fops, NULL }, { "guc_registered_contexts", &guc_registered_contexts_fops, NULL }, { "guc_slpc_info", &guc_slpc_info_fops, &intel_eval_slpc_support}, - { "guc_sched_disable_delay_ms", &guc_sched_disable_delay_ms_fops, NULL }, - { "guc_sched_disable_gucid_threshold", &guc_sched_disable_gucid_threshold_fops, - NULL }, }; if (!intel_guc_is_supported(guc)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index a0cebb4590e9..0d56b615bf78 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -65,13 +65,7 @@ * corresponding G2H returns indicating the scheduling disable operation has * completed it is safe to unpin the context. While a disable is in flight it * isn't safe to resubmit the context so a fence is used to stall all future - * requests of that context until the G2H is returned. Because this interaction - * with the GuC takes a non-zero amount of time we delay the disabling of - * scheduling after the pin count goes to zero by a configurable period of time - * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of - * time to resubmit something on the context before doing this costly operation. - * This delay is only done if the context isn't closed and the guc_id usage is - * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD). + * requests of that context until the G2H is returned. * * Context deregistration: * Before a context can be destroyed or if we steal its guc_id we must @@ -1995,9 +1989,6 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) if (unlikely(ret < 0)) return ret; - if (!intel_context_is_parent(ce)) - ++guc->submission_state.guc_ids_in_use; - ce->guc_id.id = ret; return 0; } @@ -2007,16 +1998,14 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) GEM_BUG_ON(intel_context_is_child(ce)); if (!context_guc_id_invalid(ce)) { - if (intel_context_is_parent(ce)) { + if (intel_context_is_parent(ce)) bitmap_release_region(guc->submission_state.guc_ids_bitmap, ce->guc_id.id, order_base_2(ce->parallel.number_children + 1)); - } else { - --guc->submission_state.guc_ids_in_use; + else ida_simple_remove(&guc->submission_state.guc_ids, ce->guc_id.id); - } clr_ctx_id_mapping(guc, ce->guc_id.id); set_context_guc_id_invalid(ce); } @@ -3004,98 +2993,41 @@ guc_context_revoke(struct intel_context *ce, struct i915_request *rq, } } -static void guc_context_sched_disable(struct intel_context *ce); - -static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce, - unsigned long flags) - __releases(ce->guc_state.lock) +static void guc_context_sched_disable(struct intel_context *ce) { + struct intel_guc *guc = ce_to_guc(ce); + unsigned long flags; struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; intel_wakeref_t wakeref; + u16 guc_id; - lockdep_assert_held(&ce->guc_state.lock); - - spin_unlock_irqrestore(&ce->guc_state.lock, flags); - - with_intel_runtime_pm(runtime_pm, wakeref) - guc_context_sched_disable(ce); -} - -static bool bypass_sched_disable(struct intel_guc *guc, - struct intel_context *ce) -{ - lockdep_assert_held(&ce->guc_state.lock); GEM_BUG_ON(intel_context_is_child(ce)); - if (submission_disabled(guc) || context_guc_id_invalid(ce) || - !ctx_id_mapped(guc, ce->guc_id.id)) { - clr_context_enabled(ce); - return true; - } - - return !context_enabled(ce); -} - -static void __delay_sched_disable(struct work_struct *wrk) -{ - struct intel_context *ce = - container_of(wrk, typeof(*ce), guc_state.sched_disable_delay.work); - struct intel_guc *guc = ce_to_guc(ce); - unsigned long flags; - spin_lock_irqsave(&ce->guc_state.lock, flags); - if (bypass_sched_disable(guc, ce)) { - spin_unlock_irqrestore(&ce->guc_state.lock, flags); - intel_context_sched_disable_unpin(ce); - } else { - do_sched_disable(guc, ce, flags); - } -} - -static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce) -{ - /* - * parent contexts are perma-pinned, if we are unpinning do schedule - * disable immediately. - */ - if (intel_context_is_parent(ce)) - return true; - /* - * If we are beyond the threshold for avail guc_ids, do schedule disable immediately. + * We have to check if the context has been disabled by another thread, + * check if submssion has been disabled to seal a race with reset and + * finally check if any more requests have been committed to the + * context ensursing that a request doesn't slip through the + * 'context_pending_disable' fence. */ - return guc->submission_state.guc_ids_in_use > - guc->submission_state.sched_disable_gucid_threshold; -} - -static void guc_context_sched_disable(struct intel_context *ce) -{ - struct intel_guc *guc = ce_to_guc(ce); - u64 delay = guc->submission_state.sched_disable_delay_ms; - unsigned long flags; - - spin_lock_irqsave(&ce->guc_state.lock, flags); - - if (bypass_sched_disable(guc, ce)) { - spin_unlock_irqrestore(&ce->guc_state.lock, flags); - intel_context_sched_disable_unpin(ce); - } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) && - delay) { + if (unlikely(!context_enabled(ce) || submission_disabled(guc) || + context_has_committed_requests(ce))) { + clr_context_enabled(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); - mod_delayed_work(system_unbound_wq, - &ce->guc_state.sched_disable_delay, - msecs_to_jiffies(delay)); - } else { - do_sched_disable(guc, ce, flags); + goto unpin; } -} + guc_id = prep_context_pending_disable(ce); -static void guc_context_close(struct intel_context *ce) -{ - if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && - cancel_delayed_work(&ce->guc_state.sched_disable_delay)) - __delay_sched_disable(&ce->guc_state.sched_disable_delay.work); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + with_intel_runtime_pm(runtime_pm, wakeref) + __guc_context_sched_disable(guc, ce, guc_id); + + return; +unpin: + intel_context_sched_disable_unpin(ce); } static inline void guc_lrc_desc_unpin(struct intel_context *ce) @@ -3414,8 +3346,6 @@ static void remove_from_context(struct i915_request *rq) static const struct intel_context_ops guc_context_ops = { .alloc = guc_context_alloc, - .close = guc_context_close, - .pre_pin = guc_context_pre_pin, .pin = guc_context_pin, .unpin = guc_context_unpin, @@ -3498,10 +3428,6 @@ static void guc_context_init(struct intel_context *ce) rcu_read_unlock(); ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); - - INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay, - __delay_sched_disable); - set_bit(CONTEXT_GUC_INIT, &ce->flags); } @@ -3539,9 +3465,6 @@ static int guc_request_alloc(struct i915_request *rq) if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) guc_context_init(ce); - if (cancel_delayed_work(&ce->guc_state.sched_disable_delay)) - intel_context_sched_disable_unpin(ce); - /* * Call pin_guc_id here rather than in the pinning step as with * dma_resv, contexts can be repeatedly pinned / unpinned trashing the @@ -3672,8 +3595,6 @@ static int guc_virtual_context_alloc(struct intel_context *ce) static const struct intel_context_ops virtual_guc_context_ops = { .alloc = guc_virtual_context_alloc, - .close = guc_context_close, - .pre_pin = guc_virtual_context_pre_pin, .pin = guc_virtual_context_pin, .unpin = guc_virtual_context_unpin, @@ -3763,8 +3684,6 @@ static void guc_child_context_destroy(struct kref *kref) static const struct intel_context_ops virtual_parent_context_ops = { .alloc = guc_virtual_context_alloc, - .close = guc_context_close, - .pre_pin = guc_context_pre_pin, .pin = guc_parent_context_pin, .unpin = guc_parent_context_unpin, @@ -4295,26 +4214,6 @@ static bool __guc_submission_selected(struct intel_guc *guc) return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; } -int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc) -{ - return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc); -} - -/* - * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher - * workloads are able to enjoy the latency reduction when delaying the schedule-disable - * operation. This matches the 30fps game-render + encode (real world) workload this - * knob was tested against. - */ -#define SCHED_DISABLE_DELAY_MS 34 - -/* - * A threshold of 75% is a reasonable starting point considering that real world apps - * generally don't get anywhere near this. - */ -#define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \ - (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4) - void intel_guc_submission_init_early(struct intel_guc *guc) { xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); @@ -4331,10 +4230,7 @@ void intel_guc_submission_init_early(struct intel_guc *guc) spin_lock_init(&guc->timestamp.lock); INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); - guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS; guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; - guc->submission_state.sched_disable_gucid_threshold = - NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc); guc->submission_supported = __guc_submission_supported(guc); guc->submission_selected = __guc_submission_selected(guc); } diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index bdf3e22c0a34..f54de0499be7 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -92,14 +92,12 @@ int __i915_subtests(const char *caller, T, ARRAY_SIZE(T), data) #define i915_live_subtests(T, data) ({ \ typecheck(struct drm_i915_private *, data); \ - (data)->gt[0]->uc.guc.submission_state.sched_disable_delay_ms = 0; \ __i915_subtests(__func__, \ __i915_live_setup, __i915_live_teardown, \ T, ARRAY_SIZE(T), data); \ }) #define intel_gt_live_subtests(T, data) ({ \ typecheck(struct intel_gt *, data); \ - (data)->uc.guc.submission_state.sched_disable_delay_ms = 0; \ __i915_subtests(__func__, \ __intel_gt_live_setup, __intel_gt_live_teardown, \ T, ARRAY_SIZE(T), data); \
This reverts commit 6a079903847cce1dd06345127d2a32f26d2cd9c6. Everything in CI using GuC is now timing out[1], and killing the machine with this change (perhaps a deadlock?). CI was recently on fire due to some changes coming in from -rc1, so likely the pre-merge CI results for this series were invalid? For now just revert, unless GuC experts already have a fix in mind. [1] https://intel-gfx-ci.01.org/tree/drm-tip/index.html? Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Matthew Brost <matthew.brost@intel.com> Cc: Alan Previn <alan.previn.teres.alexis@intel.com> Cc: John Harrison <John.C.Harrison@Intel.com> --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/gt/intel_context.h | 8 - drivers/gpu/drm/i915/gt/intel_context_types.h | 7 - drivers/gpu/drm/i915/gt/uc/intel_guc.h | 17 +- .../gpu/drm/i915/gt/uc/intel_guc_debugfs.c | 60 ------- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 154 +++--------------- drivers/gpu/drm/i915/i915_selftest.h | 2 - 7 files changed, 27 insertions(+), 223 deletions(-)