Message ID | 20220515060506.22084-1-vinay.belgaumkar@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915/guc/slpc: Use non-blocking H2G for waitboost | expand |
On Sat, 14 May 2022, Vinay Belgaumkar <vinay.belgaumkar@intel.com> wrote: > SLPC min/max frequency updates require H2G calls. We are seeing > timeouts when GuC channel is backed up and it is unable to respond > in a timely fashion causing warnings and affecting CI. > > This is seen when waitboosting happens during a stress test. > this patch updates the waitboost path to use a non-blocking > H2G call instead, which returns as soon as the message is > successfully transmitted. > > v2: Use drm_notice to report any errors that might occur while > sending the waitboost H2G request (Tvrtko) > > Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com> > --- > drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 44 +++++++++++++++++---- > 1 file changed, 36 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c > index 1db833da42df..e5e869c96262 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c > @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc) > return data->header.global_state; > } > > +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value) > +{ > + u32 request[] = { static const > + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, > + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), > + id, > + value, > + }; > + int ret; > + > + ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0); > + > + return ret > 0 ? -EPROTO : ret; > +} > + > +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value) > +{ > + struct intel_guc *guc = slpc_to_guc(slpc); > + > + GEM_BUG_ON(id >= SLPC_MAX_PARAM); > + > + return guc_action_slpc_set_param_nb(guc, id, value); > +} > + > static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) > { > u32 request[] = { Ditto here, and the whole gt/uc directory seems to have tons of these u32 action/request array variables on stack, with the required initialization, that could be in rodata. Please fix all of them. BR, Jani. > @@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) > */ > > with_intel_runtime_pm(&i915->runtime_pm, wakeref) { > - ret = slpc_set_param(slpc, > - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, > - freq); > - if (ret) > - i915_probe_error(i915, "Unable to force min freq to %u: %d", > - freq, ret); > + /* Non-blocking request will avoid stalls */ > + ret = slpc_set_param_nb(slpc, > + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, > + freq); > } > > return ret; > @@ -222,6 +244,8 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) > static void slpc_boost_work(struct work_struct *work) > { > struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work); > + struct drm_i915_private *i915 = slpc_to_i915(slpc); > + int err; > > /* > * Raise min freq to boost. It's possible that > @@ -231,8 +255,12 @@ static void slpc_boost_work(struct work_struct *work) > */ > mutex_lock(&slpc->lock); > if (atomic_read(&slpc->num_waiters)) { > - slpc_force_min_freq(slpc, slpc->boost_freq); > - slpc->num_boosts++; > + err = slpc_force_min_freq(slpc, slpc->boost_freq); > + if (!err) > + slpc->num_boosts++; > + else > + drm_notice(&i915->drm, "Failed to send waitboost request (%d)\n", > + err); > } > mutex_unlock(&slpc->lock); > }
On Mon, 16 May 2022, Jani Nikula <jani.nikula@linux.intel.com> wrote: > On Sat, 14 May 2022, Vinay Belgaumkar <vinay.belgaumkar@intel.com> wrote: >> SLPC min/max frequency updates require H2G calls. We are seeing >> timeouts when GuC channel is backed up and it is unable to respond >> in a timely fashion causing warnings and affecting CI. >> >> This is seen when waitboosting happens during a stress test. >> this patch updates the waitboost path to use a non-blocking >> H2G call instead, which returns as soon as the message is >> successfully transmitted. >> >> v2: Use drm_notice to report any errors that might occur while >> sending the waitboost H2G request (Tvrtko) >> >> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com> >> --- >> drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 44 +++++++++++++++++---- >> 1 file changed, 36 insertions(+), 8 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c >> index 1db833da42df..e5e869c96262 100644 >> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c >> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c >> @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc) >> return data->header.global_state; >> } >> >> +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value) >> +{ >> + u32 request[] = { > > static const *sigh* -ENOCOFFEE, please ignore the mail. BR, Jani. > >> + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, >> + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), >> + id, >> + value, >> + }; >> + int ret; >> + >> + ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0); >> + >> + return ret > 0 ? -EPROTO : ret; >> +} >> + >> +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value) >> +{ >> + struct intel_guc *guc = slpc_to_guc(slpc); >> + >> + GEM_BUG_ON(id >= SLPC_MAX_PARAM); >> + >> + return guc_action_slpc_set_param_nb(guc, id, value); >> +} >> + >> static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) >> { >> u32 request[] = { > > Ditto here, and the whole gt/uc directory seems to have tons of these > u32 action/request array variables on stack, with the required > initialization, that could be in rodata. > > Please fix all of them. > > BR, > Jani. > >> @@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) >> */ >> >> with_intel_runtime_pm(&i915->runtime_pm, wakeref) { >> - ret = slpc_set_param(slpc, >> - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, >> - freq); >> - if (ret) >> - i915_probe_error(i915, "Unable to force min freq to %u: %d", >> - freq, ret); >> + /* Non-blocking request will avoid stalls */ >> + ret = slpc_set_param_nb(slpc, >> + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, >> + freq); >> } >> >> return ret; >> @@ -222,6 +244,8 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) >> static void slpc_boost_work(struct work_struct *work) >> { >> struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work); >> + struct drm_i915_private *i915 = slpc_to_i915(slpc); >> + int err; >> >> /* >> * Raise min freq to boost. It's possible that >> @@ -231,8 +255,12 @@ static void slpc_boost_work(struct work_struct *work) >> */ >> mutex_lock(&slpc->lock); >> if (atomic_read(&slpc->num_waiters)) { >> - slpc_force_min_freq(slpc, slpc->boost_freq); >> - slpc->num_boosts++; >> + err = slpc_force_min_freq(slpc, slpc->boost_freq); >> + if (!err) >> + slpc->num_boosts++; >> + else >> + drm_notice(&i915->drm, "Failed to send waitboost request (%d)\n", >> + err); >> } >> mutex_unlock(&slpc->lock); >> }
On Sat, 14 May 2022 23:05:06 -0700, Vinay Belgaumkar wrote: > > SLPC min/max frequency updates require H2G calls. We are seeing > timeouts when GuC channel is backed up and it is unable to respond > in a timely fashion causing warnings and affecting CI. > > This is seen when waitboosting happens during a stress test. > this patch updates the waitboost path to use a non-blocking > H2G call instead, which returns as soon as the message is > successfully transmitted. Overall I think this patch is trying to paper over problems in the blocking H2G CT interface (specifically the 1 second timeout in wait_for_ct_request_update()). So I think we should address that problem in the interface directly rather than having each client (SLPC and any future client) work around the problem. Following points: 1. This patch seems to assume that it is 'ok' to ignore the return code from FW for a waitboost request (arguing waitboost is best effort so it's ok to 'fire and forget'). But the return code is still useful e.g. in cases where we see performance issues and want to go back and investigate if FW rejected any waitboost requests. 2. We are already seeing that a 1 second timeout is not sufficient. So why not simply increase that timeout? 3. In fact if we are saying that the CT interface is a "reliable" interface (implying no message loss), to ensure reliability that timeout should not simply be increased, it should be made "infinite" (in quotes). 4. Maybe it would have been best to not have a "blocking" H2G interface at all (with the wait in wait_for_ct_request_update()). Just have an asynchronous interface (which mirrors the actual interface between FW and i915) in which clients register callbacks which are invoked when FW responds. If this is too big a change we can probably continue with the current blocking interface after increasing the timeout as mentioned above. 5. Finally, the waitboost request is just the most likely to get stuck at the back of a full CT queue since it happens during normal operation. Actually any request, say one initiated from sysfs, can also get similarly stuck at the back of a full queue. So any solution should also address that situation (where the return code is needed and similarly for a future client of the "blocking" (REQUEST/RESPONSE) interface). Thanks. -- Ashutosh
On 5/16/2022 00:59, Jani Nikula wrote: > On Sat, 14 May 2022, Vinay Belgaumkar<vinay.belgaumkar@intel.com> wrote: >> SLPC min/max frequency updates require H2G calls. We are seeing >> timeouts when GuC channel is backed up and it is unable to respond >> in a timely fashion causing warnings and affecting CI. >> >> This is seen when waitboosting happens during a stress test. >> this patch updates the waitboost path to use a non-blocking >> H2G call instead, which returns as soon as the message is >> successfully transmitted. >> >> v2: Use drm_notice to report any errors that might occur while >> sending the waitboost H2G request (Tvrtko) >> >> Signed-off-by: Vinay Belgaumkar<vinay.belgaumkar@intel.com> >> --- >> drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 44 +++++++++++++++++---- >> 1 file changed, 36 insertions(+), 8 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c >> index 1db833da42df..e5e869c96262 100644 >> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c >> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c >> @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc) >> return data->header.global_state; >> } >> >> +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value) >> +{ >> + u32 request[] = { > static const > >> + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, >> + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), >> + id, >> + value, >> + }; >> + int ret; >> + >> + ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0); >> + >> + return ret > 0 ? -EPROTO : ret; >> +} >> + >> +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value) >> +{ >> + struct intel_guc *guc = slpc_to_guc(slpc); >> + >> + GEM_BUG_ON(id >= SLPC_MAX_PARAM); >> + >> + return guc_action_slpc_set_param_nb(guc, id, value); >> +} >> + >> static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) >> { >> u32 request[] = { > Ditto here, and the whole gt/uc directory seems to have tons of these > u32 action/request array variables on stack, with the required > initialization, that could be in rodata. > > Please fix all of them. > > BR, > Jani. But the only constant is the action code. Everything else is parameters and will be different on each call. You mean something like this? static const u32 template[] = { action, }; u32 *request = kmalloc_array(sizeof(*request), 4); memcpy(request, template, sizeof(*request) * 1); request[1] = param0; request[2] = param1; request[3] = param2; ret = send(request); kfree(request); return ret; Not seeing how that would be an improvement. It's a lot more code, a lot less readable, more prone to bugs due to incorrect structure sizes and/or params in the wrong place. The current version is easy to read and therefore to maintain, almost impossible to get wrong, and only puts a few words on the stack. I think the largest request is region of 15 words? I'm not seeing what the problem is. John. >> @@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) >> */ >> >> with_intel_runtime_pm(&i915->runtime_pm, wakeref) { >> - ret = slpc_set_param(slpc, >> - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, >> - freq); >> - if (ret) >> - i915_probe_error(i915, "Unable to force min freq to %u: %d", >> - freq, ret); >> + /* Non-blocking request will avoid stalls */ >> + ret = slpc_set_param_nb(slpc, >> + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, >> + freq); >> } >> >> return ret; >> @@ -222,6 +244,8 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) >> static void slpc_boost_work(struct work_struct *work) >> { >> struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work); >> + struct drm_i915_private *i915 = slpc_to_i915(slpc); >> + int err; >> >> /* >> * Raise min freq to boost. It's possible that >> @@ -231,8 +255,12 @@ static void slpc_boost_work(struct work_struct *work) >> */ >> mutex_lock(&slpc->lock); >> if (atomic_read(&slpc->num_waiters)) { >> - slpc_force_min_freq(slpc, slpc->boost_freq); >> - slpc->num_boosts++; >> + err = slpc_force_min_freq(slpc, slpc->boost_freq); >> + if (!err) >> + slpc->num_boosts++; >> + else >> + drm_notice(&i915->drm, "Failed to send waitboost request (%d)\n", >> + err); >> } >> mutex_unlock(&slpc->lock); >> }
On 6/7/2022 16:02, John Harrison wrote: > On 5/16/2022 00:59, Jani Nikula wrote: >> On Sat, 14 May 2022, Vinay Belgaumkar<vinay.belgaumkar@intel.com> wrote: >>> SLPC min/max frequency updates require H2G calls. We are seeing >>> timeouts when GuC channel is backed up and it is unable to respond >>> in a timely fashion causing warnings and affecting CI. >>> >>> This is seen when waitboosting happens during a stress test. >>> this patch updates the waitboost path to use a non-blocking >>> H2G call instead, which returns as soon as the message is >>> successfully transmitted. >>> >>> v2: Use drm_notice to report any errors that might occur while >>> sending the waitboost H2G request (Tvrtko) >>> >>> Signed-off-by: Vinay Belgaumkar<vinay.belgaumkar@intel.com> >>> --- >>> drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 44 +++++++++++++++++---- >>> 1 file changed, 36 insertions(+), 8 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c >>> index 1db833da42df..e5e869c96262 100644 >>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c >>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c >>> @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc) >>> return data->header.global_state; >>> } >>> >>> +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value) >>> +{ >>> + u32 request[] = { >> static const >> >>> + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, >>> + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), >>> + id, >>> + value, >>> + }; >>> + int ret; >>> + >>> + ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0); >>> + >>> + return ret > 0 ? -EPROTO : ret; >>> +} >>> + >>> +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value) >>> +{ >>> + struct intel_guc *guc = slpc_to_guc(slpc); >>> + >>> + GEM_BUG_ON(id >= SLPC_MAX_PARAM); >>> + >>> + return guc_action_slpc_set_param_nb(guc, id, value); >>> +} >>> + >>> static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) >>> { >>> u32 request[] = { >> Ditto here, and the whole gt/uc directory seems to have tons of these >> u32 action/request array variables on stack, with the required >> initialization, that could be in rodata. >> >> Please fix all of them. >> >> BR, >> Jani. > But the only constant is the action code. Everything else is > parameters and will be different on each call. > ... Oops. Just saw your follow up message. No worries! John.
On 6/7/2022 15:29, Dixit, Ashutosh wrote: > On Sat, 14 May 2022 23:05:06 -0700, Vinay Belgaumkar wrote: >> SLPC min/max frequency updates require H2G calls. We are seeing >> timeouts when GuC channel is backed up and it is unable to respond >> in a timely fashion causing warnings and affecting CI. >> >> This is seen when waitboosting happens during a stress test. >> this patch updates the waitboost path to use a non-blocking >> H2G call instead, which returns as soon as the message is >> successfully transmitted. > Overall I think this patch is trying to paper over problems in the blocking > H2G CT interface (specifically the 1 second timeout in > wait_for_ct_request_update()). So I think we should address that problem in > the interface directly rather than having each client (SLPC and any future > client) work around the problem. Following points: > > 1. This patch seems to assume that it is 'ok' to ignore the return code > from FW for a waitboost request (arguing waitboost is best effort so > it's ok to 'fire and forget'). But the return code is still useful > e.g. in cases where we see performance issues and want to go back and > investigate if FW rejected any waitboost requests. You still get errors reported in the GuC log. Indeed, some errors (or at least error reasons) are only visible in the log not in the return code. > > 2. We are already seeing that a 1 second timeout is not sufficient. So why > not simply increase that timeout? > > 3. In fact if we are saying that the CT interface is a "reliable" interface > (implying no message loss), to ensure reliability that timeout should > not simply be increased, it should be made "infinite" (in quotes). > > 4. Maybe it would have been best to not have a "blocking" H2G interface at > all (with the wait in wait_for_ct_request_update()). Just have an > asynchronous interface (which mirrors the actual interface between FW > and i915) in which clients register callbacks which are invoked when FW > responds. If this is too big a change we can probably continue with the > current blocking interface after increasing the timeout as mentioned > above. > > 5. Finally, the waitboost request is just the most likely to get stuck at > the back of a full CT queue since it happens during normal > operation. Actually any request, say one initiated from sysfs, can also > get similarly stuck at the back of a full queue. So any solution should > also address that situation (where the return code is needed and > similarly for a future client of the "blocking" (REQUEST/RESPONSE) > interface). The blocking interface is only intended for init time operations, not runtime. Stuff where the operation is meant to be synchronous and the KMD should not proceed until it has an ack back from the GuC that the update has taken place. All runtime operations are expected to be asynchronous. If a response is required, then it should be sent via an async callback. E.g. context de-registration is a 'fire and forget' H2G call but gets a 'deregistration complete' G2H notification when it is safe for the KMD to free up the associated storage. There is an 'errors only' H2G mechanism. That will not send an ack back in the case of a successful H2G but will send back an error notification in the case of a failure. All async H2Gs should really be using that mechanism. I think Michal W did post a patch for it and I was meant to be reviewing it but it dropped of my radar due to other higher priorities. John. > > Thanks. > -- > Ashutosh
On Tue, 07 Jun 2022, John Harrison <john.c.harrison@intel.com> wrote:
> Oops. Just saw your follow up message. No worries!
Again, sorry for the noise, and for wasting your time!
BR,
Jani.
On Tue, 07 Jun 2022 16:15:19 -0700, John Harrison wrote: > > On 6/7/2022 15:29, Dixit, Ashutosh wrote: > > On Sat, 14 May 2022 23:05:06 -0700, Vinay Belgaumkar wrote: > >> SLPC min/max frequency updates require H2G calls. We are seeing > >> timeouts when GuC channel is backed up and it is unable to respond > >> in a timely fashion causing warnings and affecting CI. > >> > >> This is seen when waitboosting happens during a stress test. > >> this patch updates the waitboost path to use a non-blocking > >> H2G call instead, which returns as soon as the message is > >> successfully transmitted. > > Overall I think this patch is trying to paper over problems in the blocking > > H2G CT interface (specifically the 1 second timeout in > > wait_for_ct_request_update()). So I think we should address that problem in > > the interface directly rather than having each client (SLPC and any future > > client) work around the problem. Following points: > > > > 1. This patch seems to assume that it is 'ok' to ignore the return code > > from FW for a waitboost request (arguing waitboost is best effort so > > it's ok to 'fire and forget'). But the return code is still useful > > e.g. in cases where we see performance issues and want to go back and > > investigate if FW rejected any waitboost requests. > > You still get errors reported in the GuC log. Indeed, some errors (or at > least error reasons) are only visible in the log not in the return code. OK, so we at least have this method for debug available. > > 2. We are already seeing that a 1 second timeout is not sufficient. So why > > not simply increase that timeout? > > > > 3. In fact if we are saying that the CT interface is a "reliable" interface > > (implying no message loss), to ensure reliability that timeout should > > not simply be increased, it should be made "infinite" (in quotes). > > > > 4. Maybe it would have been best to not have a "blocking" H2G interface at > > all (with the wait in wait_for_ct_request_update()). Just have an > > asynchronous interface (which mirrors the actual interface between FW > > and i915) in which clients register callbacks which are invoked when FW > > responds. If this is too big a change we can probably continue with the > > current blocking interface after increasing the timeout as mentioned > > above. > > > > 5. Finally, the waitboost request is just the most likely to get stuck at > > the back of a full CT queue since it happens during normal > > operation. Actually any request, say one initiated from sysfs, can also > > get similarly stuck at the back of a full queue. So any solution should > > also address that situation (where the return code is needed and > > similarly for a future client of the "blocking" (REQUEST/RESPONSE) > > interface). > The blocking interface is only intended for init time operations, not > runtime. In that case we should probably have code to enforce this in i915. > Stuff where the operation is meant to be synchronous and the KMD > should not proceed until it has an ack back from the GuC that the update > has taken place. All runtime operations are expected to be asynchronous. If > a response is required, then it should be sent via an async > callback. E.g. context de-registration is a 'fire and forget' H2G call but > gets a 'deregistration complete' G2H notification when it is safe for the > KMD to free up the associated storage. At present all GuC interactions in intel_guc_slpc.c (in i915) do *not* follow this. They use the REQUEST/RESPONSE FW interface which is pushed through the blocking H2G CT interface in i915. If we are serious about this this needs a GuC FW change to use bi-directional EVENT's used in the asynchronous interface (with corresponding changes in intel_guc_slpc.c). > There is an 'errors only' H2G mechanism. That will not send an ack back in > the case of a successful H2G but will send back an error notification in > the case of a failure. All async H2Gs should really be using that > mechanism. I think Michal W did post a patch for it and I was meant to be > reviewing it but it dropped of my radar due to other higher priorities. These I believe are referred to as FAST_REQUEST's in GuC FW. That success is not communicated back to the KMD might be an issue in cases where KMD needs to know whether a particular operation was successful (such as for operations initiated via sysfs). Thanks. -- Ashutosh
On Sat, 14 May 2022 23:05:06 -0700, Vinay Belgaumkar wrote: > > SLPC min/max frequency updates require H2G calls. We are seeing > timeouts when GuC channel is backed up and it is unable to respond > in a timely fashion causing warnings and affecting CI. > > This is seen when waitboosting happens during a stress test. > this patch updates the waitboost path to use a non-blocking > H2G call instead, which returns as soon as the message is > successfully transmitted. Overall I am ok moving waitboost to use the non-blocking H2G. We can consider increasing the timeout in wait_for_ct_request_update() to be a separate issue for blocking cases and we can handle that separately. Still there a couple of issues with this patch mentioned below. > v2: Use drm_notice to report any errors that might occur while > sending the waitboost H2G request (Tvrtko) > > Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com> > --- > drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 44 +++++++++++++++++---- > 1 file changed, 36 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c > index 1db833da42df..e5e869c96262 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c > @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc) > return data->header.global_state; > } > > +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value) > +{ > + u32 request[] = { > + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, > + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), > + id, > + value, > + }; > + int ret; > + > + ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0); > + > + return ret > 0 ? -EPROTO : ret; > +} > + > +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value) > +{ > + struct intel_guc *guc = slpc_to_guc(slpc); > + > + GEM_BUG_ON(id >= SLPC_MAX_PARAM); > + > + return guc_action_slpc_set_param_nb(guc, id, value); > +} > + > static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) > { > u32 request[] = { > @@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) > */ > > with_intel_runtime_pm(&i915->runtime_pm, wakeref) { > - ret = slpc_set_param(slpc, > - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, > - freq); > - if (ret) > - i915_probe_error(i915, "Unable to force min freq to %u: %d", > - freq, ret); > + /* Non-blocking request will avoid stalls */ > + ret = slpc_set_param_nb(slpc, > + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, > + freq); > } > > return ret; > @@ -222,6 +244,8 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) > static void slpc_boost_work(struct work_struct *work) > { > struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work); > + struct drm_i915_private *i915 = slpc_to_i915(slpc); > + int err; > > /* > * Raise min freq to boost. It's possible that > @@ -231,8 +255,12 @@ static void slpc_boost_work(struct work_struct *work) > */ > mutex_lock(&slpc->lock); > if (atomic_read(&slpc->num_waiters)) { > - slpc_force_min_freq(slpc, slpc->boost_freq); > - slpc->num_boosts++; > + err = slpc_force_min_freq(slpc, slpc->boost_freq); > + if (!err) > + slpc->num_boosts++; > + else > + drm_notice(&i915->drm, "Failed to send waitboost request (%d)\n", > + err); The issue I have is what happens when we de-boost (restore min freq to its previous value in intel_guc_slpc_dec_waiters()). It would seem that that call is fairly important to get the min freq down when there are no pending requests. Therefore what do we do in that case? This is the function: void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc) { mutex_lock(&slpc->lock); if (atomic_dec_and_test(&slpc->num_waiters)) slpc_force_min_freq(slpc, slpc->min_freq_softlimit); mutex_unlock(&slpc->lock); } 1. First it would seem that at the minimum we need a similar drm_notice() in intel_guc_slpc_dec_waiters(). That would mean we need to put the drm_notice() back in slpc_force_min_freq() (replacing i915_probe_error()) rather than in slpc_boost_work() above? 2. Further, if de-boosting is important then maybe as was being discussed in v1 of this patch (see the bottom of https://patchwork.freedesktop.org/patch/485004/?series=103598&rev=1) do we need to use intel_guc_send_busy_loop() in the intel_guc_slpc_dec_waiters() code path? At least we need to do 1. But for 2. we might as well just put intel_guc_send_busy_loop() in guc_action_slpc_set_param_nb()? In both cases (boost and de-boost) intel_guc_send_busy_loop() would be called from a work item so looks doable (the way we were previously doing the blocking call from the two places). Thoughts? Thanks. -- Ashutosh
On 6/21/2022 5:26 PM, Dixit, Ashutosh wrote: > On Sat, 14 May 2022 23:05:06 -0700, Vinay Belgaumkar wrote: >> SLPC min/max frequency updates require H2G calls. We are seeing >> timeouts when GuC channel is backed up and it is unable to respond >> in a timely fashion causing warnings and affecting CI. >> >> This is seen when waitboosting happens during a stress test. >> this patch updates the waitboost path to use a non-blocking >> H2G call instead, which returns as soon as the message is >> successfully transmitted. > Overall I am ok moving waitboost to use the non-blocking H2G. We can > consider increasing the timeout in wait_for_ct_request_update() to be a > separate issue for blocking cases and we can handle that separately. > > Still there a couple of issues with this patch mentioned below. > >> v2: Use drm_notice to report any errors that might occur while >> sending the waitboost H2G request (Tvrtko) >> >> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com> >> --- >> drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 44 +++++++++++++++++---- >> 1 file changed, 36 insertions(+), 8 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c >> index 1db833da42df..e5e869c96262 100644 >> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c >> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c >> @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc) >> return data->header.global_state; >> } >> >> +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value) >> +{ >> + u32 request[] = { >> + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, >> + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), >> + id, >> + value, >> + }; >> + int ret; >> + >> + ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0); >> + >> + return ret > 0 ? -EPROTO : ret; >> +} >> + >> +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value) >> +{ >> + struct intel_guc *guc = slpc_to_guc(slpc); >> + >> + GEM_BUG_ON(id >= SLPC_MAX_PARAM); >> + >> + return guc_action_slpc_set_param_nb(guc, id, value); >> +} >> + >> static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) >> { >> u32 request[] = { >> @@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) >> */ >> >> with_intel_runtime_pm(&i915->runtime_pm, wakeref) { >> - ret = slpc_set_param(slpc, >> - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, >> - freq); >> - if (ret) >> - i915_probe_error(i915, "Unable to force min freq to %u: %d", >> - freq, ret); >> + /* Non-blocking request will avoid stalls */ >> + ret = slpc_set_param_nb(slpc, >> + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, >> + freq); >> } >> >> return ret; >> @@ -222,6 +244,8 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) >> static void slpc_boost_work(struct work_struct *work) >> { >> struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work); >> + struct drm_i915_private *i915 = slpc_to_i915(slpc); >> + int err; >> >> /* >> * Raise min freq to boost. It's possible that >> @@ -231,8 +255,12 @@ static void slpc_boost_work(struct work_struct *work) >> */ >> mutex_lock(&slpc->lock); >> if (atomic_read(&slpc->num_waiters)) { >> - slpc_force_min_freq(slpc, slpc->boost_freq); >> - slpc->num_boosts++; >> + err = slpc_force_min_freq(slpc, slpc->boost_freq); >> + if (!err) >> + slpc->num_boosts++; >> + else >> + drm_notice(&i915->drm, "Failed to send waitboost request (%d)\n", >> + err); > The issue I have is what happens when we de-boost (restore min freq to its > previous value in intel_guc_slpc_dec_waiters()). It would seem that that > call is fairly important to get the min freq down when there are no pending > requests. Therefore what do we do in that case? > > This is the function: > > void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc) > { > mutex_lock(&slpc->lock); > if (atomic_dec_and_test(&slpc->num_waiters)) > slpc_force_min_freq(slpc, slpc->min_freq_softlimit); > mutex_unlock(&slpc->lock); > } > > > 1. First it would seem that at the minimum we need a similar drm_notice() > in intel_guc_slpc_dec_waiters(). That would mean we need to put the > drm_notice() back in slpc_force_min_freq() (replacing > i915_probe_error()) rather than in slpc_boost_work() above? Sure. > > 2. Further, if de-boosting is important then maybe as was being discussed > in v1 of this patch (see the bottom of > https://patchwork.freedesktop.org/patch/485004/?series=103598&rev=1) do > we need to use intel_guc_send_busy_loop() in the > intel_guc_slpc_dec_waiters() code path? Using a busy_loop here would essentially be the same as blocking, right? And it could still fail/timeout with blocking as well (which is the problem we are trying to solve here). De-boosting is important, but in the worst case scenario, lets say this request was not processed by GuC. This would happen only if the system were really busy, which would mean there is a high likelihood we would boost/de-boost again anyways and it would probably go through at that point. Thanks, Vinay. > > At least we need to do 1. But for 2. we might as well just put > intel_guc_send_busy_loop() in guc_action_slpc_set_param_nb()? In both cases > (boost and de-boost) intel_guc_send_busy_loop() would be called from a work > item so looks doable (the way we were previously doing the blocking call > from the two places). Thoughts? > > Thanks. > -- > Ashutosh
On Wed, 22 Jun 2022 13:30:23 -0700, Belgaumkar, Vinay wrote: > On 6/21/2022 5:26 PM, Dixit, Ashutosh wrote: > > On Sat, 14 May 2022 23:05:06 -0700, Vinay Belgaumkar wrote: > > The issue I have is what happens when we de-boost (restore min freq to its > > previous value in intel_guc_slpc_dec_waiters()). It would seem that that > > call is fairly important to get the min freq down when there are no pending > > requests. Therefore what do we do in that case? > > > > This is the function: > > > > void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc) > > { > > mutex_lock(&slpc->lock); > > if (atomic_dec_and_test(&slpc->num_waiters)) > > slpc_force_min_freq(slpc, slpc->min_freq_softlimit); > > mutex_unlock(&slpc->lock); > > } > > > > > > 1. First it would seem that at the minimum we need a similar drm_notice() > > in intel_guc_slpc_dec_waiters(). That would mean we need to put the > > drm_notice() back in slpc_force_min_freq() (replacing > > i915_probe_error()) rather than in slpc_boost_work() above? > Sure. > > > > 2. Further, if de-boosting is important then maybe as was being discussed > > in v1 of this patch (see the bottom of > > https://patchwork.freedesktop.org/patch/485004/?series=103598&rev=1) do > > we need to use intel_guc_send_busy_loop() in the > > intel_guc_slpc_dec_waiters() code path? > > Using a busy_loop here would essentially be the same as blocking, right? Well blocking waits for a response from GuC (so all previous requests need to be processed by GuC) whereas busy_loop() just waits for space to be available at the back of the queue (so just a few, or maybe just one, request have to be processed by GuC). > And it could still fail/timeout with blocking as well (which is the problem > we are trying to solve here). intel_guc_send_busy_loop() has an infinite wait without a drm_err()!! :) > De-boosting is important, but in the worst case scenario, lets say this > request was not processed by GuC. This would happen only if the system > were really busy, which would mean there is a high likelihood we would > boost/de-boost again anyways and it would probably go through at that > point. Not sure of this. The system was busy but now might have gone idle which is why we are trying to de-boost. But GuC queue might still be full so we may drop the de-boost request. Or if the system has gone really idle there will be space in the GuC queue. Also the problem with intel_guc_send_busy_loop() is that it just has a sleep in it, so others might be adding requests in the GuC queue while busy_loop() was sleeping (to avoid such situations we'd need a SW queue in front of the real GuC queue). So I am ok if we don't want to add intel_guc_send_busy_loop() for now and "wait and watch". Unless John suggests otherwise since I don't have any idea how likely is this to happen. If we change drm_notice to drm_err the CI will quick tell us if this happening. Anyway, so at least let's move drm_notice (or drm_err) into slpc_force_min_freq() and I can ok the patch. Thanks. > > At least we need to do 1. But for 2. we might as well just put > > intel_guc_send_busy_loop() in guc_action_slpc_set_param_nb()? In both cases > > (boost and de-boost) intel_guc_send_busy_loop() would be called from a work > > item so looks doable (the way we were previously doing the blocking call > > from the two places). Thoughts? > > > > Thanks. > > -- > > Ashutosh
On 22/06/2022 22:28, Dixit, Ashutosh wrote: > On Wed, 22 Jun 2022 13:30:23 -0700, Belgaumkar, Vinay wrote: >> On 6/21/2022 5:26 PM, Dixit, Ashutosh wrote: >>> On Sat, 14 May 2022 23:05:06 -0700, Vinay Belgaumkar wrote: >>> The issue I have is what happens when we de-boost (restore min freq to its >>> previous value in intel_guc_slpc_dec_waiters()). It would seem that that >>> call is fairly important to get the min freq down when there are no pending >>> requests. Therefore what do we do in that case? >>> >>> This is the function: >>> >>> void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc) >>> { >>> mutex_lock(&slpc->lock); >>> if (atomic_dec_and_test(&slpc->num_waiters)) >>> slpc_force_min_freq(slpc, slpc->min_freq_softlimit); >>> mutex_unlock(&slpc->lock); >>> } >>> >>> >>> 1. First it would seem that at the minimum we need a similar drm_notice() >>> in intel_guc_slpc_dec_waiters(). That would mean we need to put the >>> drm_notice() back in slpc_force_min_freq() (replacing >>> i915_probe_error()) rather than in slpc_boost_work() above? >> Sure. >>> >>> 2. Further, if de-boosting is important then maybe as was being discussed >>> in v1 of this patch (see the bottom of >>> https://patchwork.freedesktop.org/patch/485004/?series=103598&rev=1) do >>> we need to use intel_guc_send_busy_loop() in the >>> intel_guc_slpc_dec_waiters() code path? >> >> Using a busy_loop here would essentially be the same as blocking, right? > > Well blocking waits for a response from GuC (so all previous requests need > to be processed by GuC) whereas busy_loop() just waits for space to be > available at the back of the queue (so just a few, or maybe just one, > request have to be processed by GuC). > >> And it could still fail/timeout with blocking as well (which is the problem >> we are trying to solve here). > > intel_guc_send_busy_loop() has an infinite wait without a drm_err()!! :) > >> De-boosting is important, but in the worst case scenario, lets say this >> request was not processed by GuC. This would happen only if the system >> were really busy, which would mean there is a high likelihood we would >> boost/de-boost again anyways and it would probably go through at that >> point. > > Not sure of this. The system was busy but now might have gone idle which is > why we are trying to de-boost. But GuC queue might still be full so we may > drop the de-boost request. Or if the system has gone really idle there will > be space in the GuC queue. > > Also the problem with intel_guc_send_busy_loop() is that it just has a > sleep in it, so others might be adding requests in the GuC queue while > busy_loop() was sleeping (to avoid such situations we'd need a SW queue in > front of the real GuC queue). > > So I am ok if we don't want to add intel_guc_send_busy_loop() for now and > "wait and watch". Unless John suggests otherwise since I don't have any > idea how likely is this to happen. If we change drm_notice to drm_err the > CI will quick tell us if this happening. > > Anyway, so at least let's move drm_notice (or drm_err) into > slpc_force_min_freq() and I can ok the patch. Thanks. I got a bit lost but I thought I suggested notice level? Is it the same log message you are discussing here? If so, I don't think it is an error strictly speaking but just an unexpected condition which should be noted (claim being it should never ever happen outside IGT). Maybe warning if you think notice is too low level? Regards, Tvrtko > >>> At least we need to do 1. But for 2. we might as well just put >>> intel_guc_send_busy_loop() in guc_action_slpc_set_param_nb()? In both cases >>> (boost and de-boost) intel_guc_send_busy_loop() would be called from a work >>> item so looks doable (the way we were previously doing the blocking call >>> from the two places). Thoughts? >>> >>> Thanks. >>> -- >>> Ashutosh
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 1db833da42df..e5e869c96262 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc) return data->header.global_state; } +static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value) +{ + u32 request[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), + id, + value, + }; + int ret; + + ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0); + + return ret > 0 ? -EPROTO : ret; +} + +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + + GEM_BUG_ON(id >= SLPC_MAX_PARAM); + + return guc_action_slpc_set_param_nb(guc, id, value); +} + static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) { u32 request[] = { @@ -208,12 +232,10 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) */ with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - ret = slpc_set_param(slpc, - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, - freq); - if (ret) - i915_probe_error(i915, "Unable to force min freq to %u: %d", - freq, ret); + /* Non-blocking request will avoid stalls */ + ret = slpc_set_param_nb(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + freq); } return ret; @@ -222,6 +244,8 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) static void slpc_boost_work(struct work_struct *work) { struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + int err; /* * Raise min freq to boost. It's possible that @@ -231,8 +255,12 @@ static void slpc_boost_work(struct work_struct *work) */ mutex_lock(&slpc->lock); if (atomic_read(&slpc->num_waiters)) { - slpc_force_min_freq(slpc, slpc->boost_freq); - slpc->num_boosts++; + err = slpc_force_min_freq(slpc, slpc->boost_freq); + if (!err) + slpc->num_boosts++; + else + drm_notice(&i915->drm, "Failed to send waitboost request (%d)\n", + err); } mutex_unlock(&slpc->lock); }
SLPC min/max frequency updates require H2G calls. We are seeing timeouts when GuC channel is backed up and it is unable to respond in a timely fashion causing warnings and affecting CI. This is seen when waitboosting happens during a stress test. this patch updates the waitboost path to use a non-blocking H2G call instead, which returns as soon as the message is successfully transmitted. v2: Use drm_notice to report any errors that might occur while sending the waitboost H2G request (Tvrtko) Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com> --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 44 +++++++++++++++++---- 1 file changed, 36 insertions(+), 8 deletions(-)