Message ID | 20210826032327.18078-3-matthew.brost@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Clean up GuC CI failures, simplify locking, and kernel DOC | expand |
On 8/25/2021 8:23 PM, Matthew Brost wrote: > A small race that could result in incorrect accounting of the number > of outstanding G2H. Basically prior to this patch we did not increment > the number of outstanding G2H if we encoutered a GT reset while sending > a H2G. This was incorrect as the context state had already been updated > to anticipate a G2H response thus the counter should be incremented. > > Also always use helper when decrementing this value. > > Fixes: f4eb1f3fe946 ("drm/i915/guc: Ensure G2H response has space in buffer") > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > Cc: <stable@vger.kernel.org> > --- > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 23 ++++++++++--------- > 1 file changed, 12 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > index 69faa39da178..03a86da6011e 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > @@ -352,6 +352,12 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, > xa_unlock_irqrestore(&guc->context_lookup, flags); > } > > +static void decr_outstanding_submission_g2h(struct intel_guc *guc) > +{ > + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) > + wake_up_all(&guc->ct.wq); > +} > + > static int guc_submission_send_busy_loop(struct intel_guc *guc, > const u32 *action, > u32 len, > @@ -360,11 +366,12 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc, > { > int err; > > - err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); > - > - if (!err && g2h_len_dw) > + if (g2h_len_dw) > atomic_inc(&guc->outstanding_submission_g2h); > > + err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); > + GEM_BUG_ON(g2h_len_dw && err == -EBUSY); AFAICS having a return g2h is not tied to not returning EBUSY, the only way to avoid EBUSY seems to be for loop to be true. maybe have instead: GEM_BUG_ON(g2h_len_dw && !loop); earlier on? Daniele > + > return err; > } > > @@ -616,7 +623,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) > init_sched_state(ce); > > if (pending_enable || destroyed || deregister) { > - atomic_dec(&guc->outstanding_submission_g2h); > + decr_outstanding_submission_g2h(guc); > if (deregister) > guc_signal_context_fence(ce); > if (destroyed) { > @@ -635,7 +642,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) > intel_engine_signal_breadcrumbs(ce->engine); > } > intel_context_sched_disable_unpin(ce); > - atomic_dec(&guc->outstanding_submission_g2h); > + decr_outstanding_submission_g2h(guc); > spin_lock_irqsave(&ce->guc_state.lock, flags); > guc_blocked_fence_complete(ce); > spin_unlock_irqrestore(&ce->guc_state.lock, flags); > @@ -2583,12 +2590,6 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) > return ce; > } > > -static void decr_outstanding_submission_g2h(struct intel_guc *guc) > -{ > - if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) > - wake_up_all(&guc->ct.wq); > -} > - > int intel_guc_deregister_done_process_msg(struct intel_guc *guc, > const u32 *msg, > u32 len)
On Thu, Aug 26, 2021 at 04:09:59PM -0700, Daniele Ceraolo Spurio wrote: > > > On 8/25/2021 8:23 PM, Matthew Brost wrote: > > A small race that could result in incorrect accounting of the number > > of outstanding G2H. Basically prior to this patch we did not increment > > the number of outstanding G2H if we encoutered a GT reset while sending > > a H2G. This was incorrect as the context state had already been updated > > to anticipate a G2H response thus the counter should be incremented. > > > > Also always use helper when decrementing this value. > > > > Fixes: f4eb1f3fe946 ("drm/i915/guc: Ensure G2H response has space in buffer") > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > > Cc: <stable@vger.kernel.org> > > --- > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 23 ++++++++++--------- > > 1 file changed, 12 insertions(+), 11 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > index 69faa39da178..03a86da6011e 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > @@ -352,6 +352,12 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, > > xa_unlock_irqrestore(&guc->context_lookup, flags); > > } > > +static void decr_outstanding_submission_g2h(struct intel_guc *guc) > > +{ > > + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) > > + wake_up_all(&guc->ct.wq); > > +} > > + > > static int guc_submission_send_busy_loop(struct intel_guc *guc, > > const u32 *action, > > u32 len, > > @@ -360,11 +366,12 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc, > > { > > int err; > > - err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); > > - > > - if (!err && g2h_len_dw) > > + if (g2h_len_dw) > > atomic_inc(&guc->outstanding_submission_g2h); > > + err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); > > + GEM_BUG_ON(g2h_len_dw && err == -EBUSY); > > AFAICS having a return g2h is not tied to not returning EBUSY, the only way > to avoid EBUSY seems to be for loop to be true. maybe have instead: > > GEM_BUG_ON(g2h_len_dw && !loop); > > earlier on? > Yep, that is better. Can you respin this for me while I'm out? Matt > Daniele > > > + > > return err; > > } > > @@ -616,7 +623,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) > > init_sched_state(ce); > > if (pending_enable || destroyed || deregister) { > > - atomic_dec(&guc->outstanding_submission_g2h); > > + decr_outstanding_submission_g2h(guc); > > if (deregister) > > guc_signal_context_fence(ce); > > if (destroyed) { > > @@ -635,7 +642,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) > > intel_engine_signal_breadcrumbs(ce->engine); > > } > > intel_context_sched_disable_unpin(ce); > > - atomic_dec(&guc->outstanding_submission_g2h); > > + decr_outstanding_submission_g2h(guc); > > spin_lock_irqsave(&ce->guc_state.lock, flags); > > guc_blocked_fence_complete(ce); > > spin_unlock_irqrestore(&ce->guc_state.lock, flags); > > @@ -2583,12 +2590,6 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) > > return ce; > > } > > -static void decr_outstanding_submission_g2h(struct intel_guc *guc) > > -{ > > - if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) > > - wake_up_all(&guc->ct.wq); > > -} > > - > > int intel_guc_deregister_done_process_msg(struct intel_guc *guc, > > const u32 *msg, > > u32 len) >
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 69faa39da178..03a86da6011e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -352,6 +352,12 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, xa_unlock_irqrestore(&guc->context_lookup, flags); } +static void decr_outstanding_submission_g2h(struct intel_guc *guc) +{ + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) + wake_up_all(&guc->ct.wq); +} + static int guc_submission_send_busy_loop(struct intel_guc *guc, const u32 *action, u32 len, @@ -360,11 +366,12 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc, { int err; - err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); - - if (!err && g2h_len_dw) + if (g2h_len_dw) atomic_inc(&guc->outstanding_submission_g2h); + err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); + GEM_BUG_ON(g2h_len_dw && err == -EBUSY); + return err; } @@ -616,7 +623,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) init_sched_state(ce); if (pending_enable || destroyed || deregister) { - atomic_dec(&guc->outstanding_submission_g2h); + decr_outstanding_submission_g2h(guc); if (deregister) guc_signal_context_fence(ce); if (destroyed) { @@ -635,7 +642,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) intel_engine_signal_breadcrumbs(ce->engine); } intel_context_sched_disable_unpin(ce); - atomic_dec(&guc->outstanding_submission_g2h); + decr_outstanding_submission_g2h(guc); spin_lock_irqsave(&ce->guc_state.lock, flags); guc_blocked_fence_complete(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); @@ -2583,12 +2590,6 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) return ce; } -static void decr_outstanding_submission_g2h(struct intel_guc *guc) -{ - if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) - wake_up_all(&guc->ct.wq); -} - int intel_guc_deregister_done_process_msg(struct intel_guc *guc, const u32 *msg, u32 len)
A small race that could result in incorrect accounting of the number of outstanding G2H. Basically prior to this patch we did not increment the number of outstanding G2H if we encoutered a GT reset while sending a H2G. This was incorrect as the context state had already been updated to anticipate a G2H response thus the counter should be incremented. Also always use helper when decrementing this value. Fixes: f4eb1f3fe946 ("drm/i915/guc: Ensure G2H response has space in buffer") Signed-off-by: Matthew Brost <matthew.brost@intel.com> Cc: <stable@vger.kernel.org> --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-)