Message ID | 20180509080644.GA76874@joelaf.mtv.corp.google.com (mailing list archive) |
---|---|
State | RFC, archived |
Headers | show |
On Wed, May 9, 2018 at 10:06 AM, Joel Fernandes <joel@joelfernandes.org> wrote: > On Wed, May 09, 2018 at 08:45:30AM +0200, Juri Lelli wrote: >> On 08/05/18 21:54, Joel Fernandes wrote: >> >> [...] >> >> > Just for discussion sake, is there any need for work_in_progress? If we can >> > queue multiple work say kthread_queue_work can handle it, then just queuing >> > works whenever they are available should be Ok and the kthread loop can >> > handle them. __cpufreq_driver_target is also protected by the work lock if >> > there is any concern that can have races... only thing is rate-limiting of >> > the requests, but we are doing a rate limiting, just not for the "DL >> > increased utilization" type requests (which I don't think we are doing at the >> > moment for urgent DL requests anyway). >> > >> > Following is an untested diff to show the idea. What do you think? >> > >> > thanks, >> > >> > - Joel >> > >> > ----8<--- >> > diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c >> > index d2c6083304b4..862634ff4bf3 100644 >> > --- a/kernel/sched/cpufreq_schedutil.c >> > +++ b/kernel/sched/cpufreq_schedutil.c >> > @@ -38,7 +38,6 @@ struct sugov_policy { >> > struct mutex work_lock; >> > struct kthread_worker worker; >> > struct task_struct *thread; >> > - bool work_in_progress; >> > >> > bool need_freq_update; >> > }; >> > @@ -92,16 +91,8 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) >> > !cpufreq_can_do_remote_dvfs(sg_policy->policy)) >> > return false; >> > >> > - if (sg_policy->work_in_progress) >> > - return false; >> > - >> > if (unlikely(sg_policy->need_freq_update)) { >> > sg_policy->need_freq_update = false; >> > - /* >> > - * This happens when limits change, so forget the previous >> > - * next_freq value and force an update. >> > - */ >> > - sg_policy->next_freq = UINT_MAX; >> > return true; >> > } >> > >> > @@ -129,7 +120,6 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, >> > policy->cur = next_freq; >> > trace_cpu_frequency(next_freq, smp_processor_id()); >> > } else { >> > - sg_policy->work_in_progress = true; >> > irq_work_queue(&sg_policy->irq_work); >> >> Isn't this potentially introducing unneeded irq pressure (and doing the >> whole wakeup the kthread thing), while the already active kthread could >> simply handle multiple back-to-back requests before going to sleep? > > How about this? Will use the latest request, and also doesn't do unnecessary > irq_work_queue: > > (untested) > -----8<-------- > diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c > index d2c6083304b4..6a3e42b01f52 100644 > --- a/kernel/sched/cpufreq_schedutil.c > +++ b/kernel/sched/cpufreq_schedutil.c > @@ -38,7 +38,7 @@ struct sugov_policy { > struct mutex work_lock; > struct kthread_worker worker; > struct task_struct *thread; > - bool work_in_progress; > + bool work_in_progress; /* Has kthread been kicked */ > > bool need_freq_update; > }; > @@ -92,9 +92,6 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) > !cpufreq_can_do_remote_dvfs(sg_policy->policy)) > return false; > > - if (sg_policy->work_in_progress) > - return false; > - Why this change? Doing the below is rather pointless if work_in_progress is set, isn't it? You'll drop the results of it on the floor going forward anyway then AFAICS. > if (unlikely(sg_policy->need_freq_update)) { > sg_policy->need_freq_update = false; > /* > @@ -129,8 +126,11 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, > policy->cur = next_freq; > trace_cpu_frequency(next_freq, smp_processor_id()); > } else { > - sg_policy->work_in_progress = true; > - irq_work_queue(&sg_policy->irq_work); > + /* work_in_progress helps us not queue unnecessarily */ > + if (!sg_policy->work_in_progress) { > + sg_policy->work_in_progress = true; > + irq_work_queue(&sg_policy->irq_work); > + } > } > } > > @@ -381,13 +381,23 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) > static void sugov_work(struct kthread_work *work) > { > struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); > + unsigned int freq; > + > + /* > + * Hold sg_policy->update_lock just enough to handle the case where: > + * if sg_policy->next_freq is updated before work_in_progress is set to > + * false, we may miss queueing the new update request since > + * work_in_progress would appear to be true. > + */ > + raw_spin_lock(&sg_policy->update_lock); > + freq = sg_policy->next_freq; > + sg_policy->work_in_progress = false; > + raw_spin_unlock(&sg_policy->update_lock); > > mutex_lock(&sg_policy->work_lock); > - __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq, > + __cpufreq_driver_target(sg_policy->policy, freq, > CPUFREQ_RELATION_L); > mutex_unlock(&sg_policy->work_lock); > - > - sg_policy->work_in_progress = false; > } > > static void sugov_irq_work(struct irq_work *irq_work)
On 09-05-18, 10:30, Rafael J. Wysocki wrote: > On Wed, May 9, 2018 at 10:06 AM, Joel Fernandes <joel@joelfernandes.org> wrote: > > How about this? Will use the latest request, and also doesn't do unnecessary > > irq_work_queue: I almost wrote the same stuff before I went for lunch :) > > (untested) > > -----8<-------- > > diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c > > index d2c6083304b4..6a3e42b01f52 100644 > > --- a/kernel/sched/cpufreq_schedutil.c > > +++ b/kernel/sched/cpufreq_schedutil.c > > @@ -38,7 +38,7 @@ struct sugov_policy { > > struct mutex work_lock; > > struct kthread_worker worker; > > struct task_struct *thread; > > - bool work_in_progress; > > + bool work_in_progress; /* Has kthread been kicked */ > > > > bool need_freq_update; > > }; > > @@ -92,9 +92,6 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) > > !cpufreq_can_do_remote_dvfs(sg_policy->policy)) > > return false; > > > > - if (sg_policy->work_in_progress) > > - return false; > > - > > Why this change? > > Doing the below is rather pointless if work_in_progress is set, isn't it? > > You'll drop the results of it on the floor going forward anyway then AFAICS. > > > if (unlikely(sg_policy->need_freq_update)) { > > sg_policy->need_freq_update = false; > > /* > > @@ -129,8 +126,11 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, > > policy->cur = next_freq; > > trace_cpu_frequency(next_freq, smp_processor_id()); > > } else { > > - sg_policy->work_in_progress = true; > > - irq_work_queue(&sg_policy->irq_work); > > + /* work_in_progress helps us not queue unnecessarily */ > > + if (!sg_policy->work_in_progress) { > > + sg_policy->work_in_progress = true; > > + irq_work_queue(&sg_policy->irq_work); > > + } > > } > > } Right, none of the above changes are required now. > > @@ -381,13 +381,23 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) > > static void sugov_work(struct kthread_work *work) > > { > > struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); > > + unsigned int freq; > > + > > + /* > > + * Hold sg_policy->update_lock just enough to handle the case where: > > + * if sg_policy->next_freq is updated before work_in_progress is set to > > + * false, we may miss queueing the new update request since > > + * work_in_progress would appear to be true. > > + */ > > + raw_spin_lock(&sg_policy->update_lock); > > + freq = sg_policy->next_freq; > > + sg_policy->work_in_progress = false; > > + raw_spin_unlock(&sg_policy->update_lock); One problem we still have is that sg_policy->update_lock is only used in the shared policy case and not in the single CPU per policy case, so the race isn't solved there yet. > > mutex_lock(&sg_policy->work_lock); > > - __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq, > > + __cpufreq_driver_target(sg_policy->policy, freq, > > CPUFREQ_RELATION_L); > > mutex_unlock(&sg_policy->work_lock); > > - > > - sg_policy->work_in_progress = false; > > } > > > > static void sugov_irq_work(struct irq_work *irq_work)
On Wed, May 09, 2018 at 10:30:37AM +0200, Rafael J. Wysocki wrote: > On Wed, May 9, 2018 at 10:06 AM, Joel Fernandes <joel@joelfernandes.org> wrote: > > On Wed, May 09, 2018 at 08:45:30AM +0200, Juri Lelli wrote: > >> On 08/05/18 21:54, Joel Fernandes wrote: > >> > >> [...] > >> > >> > Just for discussion sake, is there any need for work_in_progress? If we can > >> > queue multiple work say kthread_queue_work can handle it, then just queuing > >> > works whenever they are available should be Ok and the kthread loop can > >> > handle them. __cpufreq_driver_target is also protected by the work lock if > >> > there is any concern that can have races... only thing is rate-limiting of > >> > the requests, but we are doing a rate limiting, just not for the "DL > >> > increased utilization" type requests (which I don't think we are doing at the > >> > moment for urgent DL requests anyway). > >> > > >> > Following is an untested diff to show the idea. What do you think? > >> > > >> > thanks, > >> > > >> > - Joel > >> > > >> > ----8<--- > >> > diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c > >> > index d2c6083304b4..862634ff4bf3 100644 > >> > --- a/kernel/sched/cpufreq_schedutil.c > >> > +++ b/kernel/sched/cpufreq_schedutil.c > >> > @@ -38,7 +38,6 @@ struct sugov_policy { > >> > struct mutex work_lock; > >> > struct kthread_worker worker; > >> > struct task_struct *thread; > >> > - bool work_in_progress; > >> > > >> > bool need_freq_update; > >> > }; > >> > @@ -92,16 +91,8 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) > >> > !cpufreq_can_do_remote_dvfs(sg_policy->policy)) > >> > return false; > >> > > >> > - if (sg_policy->work_in_progress) > >> > - return false; > >> > - > >> > if (unlikely(sg_policy->need_freq_update)) { > >> > sg_policy->need_freq_update = false; > >> > - /* > >> > - * This happens when limits change, so forget the previous > >> > - * next_freq value and force an update. > >> > - */ > >> > - sg_policy->next_freq = UINT_MAX; > >> > return true; > >> > } > >> > > >> > @@ -129,7 +120,6 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, > >> > policy->cur = next_freq; > >> > trace_cpu_frequency(next_freq, smp_processor_id()); > >> > } else { > >> > - sg_policy->work_in_progress = true; > >> > irq_work_queue(&sg_policy->irq_work); > >> > >> Isn't this potentially introducing unneeded irq pressure (and doing the > >> whole wakeup the kthread thing), while the already active kthread could > >> simply handle multiple back-to-back requests before going to sleep? > > > > How about this? Will use the latest request, and also doesn't do unnecessary > > irq_work_queue: > > > > (untested) > > -----8<-------- > > diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c > > index d2c6083304b4..6a3e42b01f52 100644 > > --- a/kernel/sched/cpufreq_schedutil.c > > +++ b/kernel/sched/cpufreq_schedutil.c > > @@ -38,7 +38,7 @@ struct sugov_policy { > > struct mutex work_lock; > > struct kthread_worker worker; > > struct task_struct *thread; > > - bool work_in_progress; > > + bool work_in_progress; /* Has kthread been kicked */ > > > > bool need_freq_update; > > }; > > @@ -92,9 +92,6 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) > > !cpufreq_can_do_remote_dvfs(sg_policy->policy)) > > return false; > > > > - if (sg_policy->work_in_progress) > > - return false; > > - > > Why this change? > > Doing the below is rather pointless if work_in_progress is set, isn't it? The issue being discussed is that if a work was already in progress, then new frequency updates will be dropped. So say even if DL increased in utilization, nothing will happen because if work_in_progress = true and need_freq_update = true, we would skip an update. In this diff, I am allowing the frequency request to be possible while work_in_progress is true. In the end the latest update will be picked. > > You'll drop the results of it on the floor going forward anyway then AFAICS. Why? If sg_policy->need_freq_update = true, sugov_should_update_freq() will return true. thanks, - Joel > > > if (unlikely(sg_policy->need_freq_update)) { > > sg_policy->need_freq_update = false; > > /* > > @@ -129,8 +126,11 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, > > policy->cur = next_freq; > > trace_cpu_frequency(next_freq, smp_processor_id()); > > } else { > > - sg_policy->work_in_progress = true; > > - irq_work_queue(&sg_policy->irq_work); > > + /* work_in_progress helps us not queue unnecessarily */ > > + if (!sg_policy->work_in_progress) { > > + sg_policy->work_in_progress = true; > > + irq_work_queue(&sg_policy->irq_work); > > + } > > } > > } > > > > @@ -381,13 +381,23 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) > > static void sugov_work(struct kthread_work *work) > > { > > struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); > > + unsigned int freq; > > + > > + /* > > + * Hold sg_policy->update_lock just enough to handle the case where: > > + * if sg_policy->next_freq is updated before work_in_progress is set to > > + * false, we may miss queueing the new update request since > > + * work_in_progress would appear to be true. > > + */ > > + raw_spin_lock(&sg_policy->update_lock); > > + freq = sg_policy->next_freq; > > + sg_policy->work_in_progress = false; > > + raw_spin_unlock(&sg_policy->update_lock); > > > > mutex_lock(&sg_policy->work_lock); > > - __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq, > > + __cpufreq_driver_target(sg_policy->policy, freq, > > CPUFREQ_RELATION_L); > > mutex_unlock(&sg_policy->work_lock); > > - > > - sg_policy->work_in_progress = false; > > } > > > > static void sugov_irq_work(struct irq_work *irq_work)
On Wed, May 09, 2018 at 02:10:01PM +0530, Viresh Kumar wrote: > On 09-05-18, 10:30, Rafael J. Wysocki wrote: > > On Wed, May 9, 2018 at 10:06 AM, Joel Fernandes <joel@joelfernandes.org> wrote: > > > How about this? Will use the latest request, and also doesn't do unnecessary > > > irq_work_queue: > > I almost wrote the same stuff before I went for lunch :) Oh :) > > > (untested) > > > -----8<-------- > > > diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c > > > index d2c6083304b4..6a3e42b01f52 100644 > > > --- a/kernel/sched/cpufreq_schedutil.c > > > +++ b/kernel/sched/cpufreq_schedutil.c > > > @@ -38,7 +38,7 @@ struct sugov_policy { > > > struct mutex work_lock; > > > struct kthread_worker worker; > > > struct task_struct *thread; > > > - bool work_in_progress; > > > + bool work_in_progress; /* Has kthread been kicked */ > > > > > > bool need_freq_update; > > > }; > > > @@ -92,9 +92,6 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) > > > !cpufreq_can_do_remote_dvfs(sg_policy->policy)) > > > return false; > > > > > > - if (sg_policy->work_in_progress) > > > - return false; > > > - > > > > Why this change? > > > > Doing the below is rather pointless if work_in_progress is set, isn't it? > > > > You'll drop the results of it on the floor going forward anyway then AFAICS. > > > > > if (unlikely(sg_policy->need_freq_update)) { > > > sg_policy->need_freq_update = false; > > > /* > > > @@ -129,8 +126,11 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, > > > policy->cur = next_freq; > > > trace_cpu_frequency(next_freq, smp_processor_id()); > > > } else { > > > - sg_policy->work_in_progress = true; > > > - irq_work_queue(&sg_policy->irq_work); > > > + /* work_in_progress helps us not queue unnecessarily */ > > > + if (!sg_policy->work_in_progress) { > > > + sg_policy->work_in_progress = true; > > > + irq_work_queue(&sg_policy->irq_work); > > > + } > > > } > > > } > > Right, none of the above changes are required now. I didn't follow what you mean the changes are not required? I was developing against Linus mainline. Also I replied to Rafael's comment in the other thread. > > > > @@ -381,13 +381,23 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) > > > static void sugov_work(struct kthread_work *work) > > > { > > > struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); > > > + unsigned int freq; > > > + > > > + /* > > > + * Hold sg_policy->update_lock just enough to handle the case where: > > > + * if sg_policy->next_freq is updated before work_in_progress is set to > > > + * false, we may miss queueing the new update request since > > > + * work_in_progress would appear to be true. > > > + */ > > > + raw_spin_lock(&sg_policy->update_lock); > > > + freq = sg_policy->next_freq; > > > + sg_policy->work_in_progress = false; > > > + raw_spin_unlock(&sg_policy->update_lock); > > One problem we still have is that sg_policy->update_lock is only used > in the shared policy case and not in the single CPU per policy case, > so the race isn't solved there yet. True.. I can make the single CPU case acquire the update_lock very briefly around sugov_update_commit call in sugov_update_single. Also I think the lock acquiral from sugov_work running in the kthread context should be a raw_spin_lock_irqsave.. thanks, - Joel
On Wed, May 9, 2018 at 10:51 AM, Joel Fernandes <joel@joelfernandes.org> wrote: > On Wed, May 09, 2018 at 10:30:37AM +0200, Rafael J. Wysocki wrote: >> On Wed, May 9, 2018 at 10:06 AM, Joel Fernandes <joel@joelfernandes.org> wrote: >> > On Wed, May 09, 2018 at 08:45:30AM +0200, Juri Lelli wrote: >> >> On 08/05/18 21:54, Joel Fernandes wrote: >> >> >> >> [...] >> >> >> >> > Just for discussion sake, is there any need for work_in_progress? If we can >> >> > queue multiple work say kthread_queue_work can handle it, then just queuing >> >> > works whenever they are available should be Ok and the kthread loop can >> >> > handle them. __cpufreq_driver_target is also protected by the work lock if >> >> > there is any concern that can have races... only thing is rate-limiting of >> >> > the requests, but we are doing a rate limiting, just not for the "DL >> >> > increased utilization" type requests (which I don't think we are doing at the >> >> > moment for urgent DL requests anyway). >> >> > >> >> > Following is an untested diff to show the idea. What do you think? >> >> > >> >> > thanks, >> >> > >> >> > - Joel >> >> > >> >> > ----8<--- >> >> > diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c >> >> > index d2c6083304b4..862634ff4bf3 100644 >> >> > --- a/kernel/sched/cpufreq_schedutil.c >> >> > +++ b/kernel/sched/cpufreq_schedutil.c >> >> > @@ -38,7 +38,6 @@ struct sugov_policy { >> >> > struct mutex work_lock; >> >> > struct kthread_worker worker; >> >> > struct task_struct *thread; >> >> > - bool work_in_progress; >> >> > >> >> > bool need_freq_update; >> >> > }; >> >> > @@ -92,16 +91,8 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) >> >> > !cpufreq_can_do_remote_dvfs(sg_policy->policy)) >> >> > return false; >> >> > >> >> > - if (sg_policy->work_in_progress) >> >> > - return false; >> >> > - >> >> > if (unlikely(sg_policy->need_freq_update)) { >> >> > sg_policy->need_freq_update = false; >> >> > - /* >> >> > - * This happens when limits change, so forget the previous >> >> > - * next_freq value and force an update. >> >> > - */ >> >> > - sg_policy->next_freq = UINT_MAX; >> >> > return true; >> >> > } >> >> > >> >> > @@ -129,7 +120,6 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, >> >> > policy->cur = next_freq; >> >> > trace_cpu_frequency(next_freq, smp_processor_id()); >> >> > } else { >> >> > - sg_policy->work_in_progress = true; >> >> > irq_work_queue(&sg_policy->irq_work); >> >> >> >> Isn't this potentially introducing unneeded irq pressure (and doing the >> >> whole wakeup the kthread thing), while the already active kthread could >> >> simply handle multiple back-to-back requests before going to sleep? >> > >> > How about this? Will use the latest request, and also doesn't do unnecessary >> > irq_work_queue: >> > >> > (untested) >> > -----8<-------- >> > diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c >> > index d2c6083304b4..6a3e42b01f52 100644 >> > --- a/kernel/sched/cpufreq_schedutil.c >> > +++ b/kernel/sched/cpufreq_schedutil.c >> > @@ -38,7 +38,7 @@ struct sugov_policy { >> > struct mutex work_lock; >> > struct kthread_worker worker; >> > struct task_struct *thread; >> > - bool work_in_progress; >> > + bool work_in_progress; /* Has kthread been kicked */ >> > >> > bool need_freq_update; >> > }; >> > @@ -92,9 +92,6 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) >> > !cpufreq_can_do_remote_dvfs(sg_policy->policy)) >> > return false; >> > >> > - if (sg_policy->work_in_progress) >> > - return false; >> > - >> >> Why this change? >> >> Doing the below is rather pointless if work_in_progress is set, isn't it? > > The issue being discussed is that if a work was already in progress, then new > frequency updates will be dropped. So say even if DL increased in > utilization, nothing will happen because if work_in_progress = true and > need_freq_update = true, we would skip an update. In this diff, I am > allowing the frequency request to be possible while work_in_progress is true. > In the end the latest update will be picked. I'm not sure if taking new requests with the irq_work in flight is a good idea. >> >> You'll drop the results of it on the floor going forward anyway then AFAICS. > > Why? Because you cannot queue up a new irq_work before the previous one is complete?
On 09-05-18, 02:02, Joel Fernandes wrote: > On Wed, May 09, 2018 at 02:10:01PM +0530, Viresh Kumar wrote: > > Right, none of the above changes are required now. > > I didn't follow what you mean the changes are not required? I was developing > against Linus mainline. Also I replied to Rafael's comment in the other > thread. At least for the shared policy case the entire sequence of sugov_should_update_freq() followed by sugov_update_commit() is executed from within spinlock protected region and you are using the same lock below. And so either the above two routines or the kthread routine below will execute at a given point of time. So in case kthread has started doing the update and acquired the lock, the util update handler will wait until the time work_in_progress is set to false, that's not a problem we are trying to solve here. And if kthread hasn't acquired the lock yet and util handler has started executing sugov_should_update_freq() .... And ^^^ this is where I understood that your earlier change is actually required, so that we accumulate the latest updated next_freq value. And with all that we wouldn't require a while loop in the kthread code. > > > > @@ -381,13 +381,23 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) > > > > static void sugov_work(struct kthread_work *work) > > > > { > > > > struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); > > > > + unsigned int freq; > > > > + > > > > + /* > > > > + * Hold sg_policy->update_lock just enough to handle the case where: > > > > + * if sg_policy->next_freq is updated before work_in_progress is set to > > > > + * false, we may miss queueing the new update request since > > > > + * work_in_progress would appear to be true. > > > > + */ > > > > + raw_spin_lock(&sg_policy->update_lock); > > > > + freq = sg_policy->next_freq; > > > > + sg_policy->work_in_progress = false; > > > > + raw_spin_unlock(&sg_policy->update_lock); > > > > One problem we still have is that sg_policy->update_lock is only used > > in the shared policy case and not in the single CPU per policy case, > > so the race isn't solved there yet. > > True.. I can make the single CPU case acquire the update_lock very briefly > around sugov_update_commit call in sugov_update_single. Rafael was very clear from the beginning that he wouldn't allow a spin lock in the un-shared policy case :)
On Wed, May 09, 2018 at 11:06:24AM +0200, Rafael J. Wysocki wrote: > On Wed, May 9, 2018 at 10:51 AM, Joel Fernandes <joel@joelfernandes.org> wrote: > > On Wed, May 09, 2018 at 10:30:37AM +0200, Rafael J. Wysocki wrote: > >> On Wed, May 9, 2018 at 10:06 AM, Joel Fernandes <joel@joelfernandes.org> wrote: > >> > On Wed, May 09, 2018 at 08:45:30AM +0200, Juri Lelli wrote: > >> >> On 08/05/18 21:54, Joel Fernandes wrote: > >> >> > >> >> [...] > >> >> > >> >> > Just for discussion sake, is there any need for work_in_progress? If we can > >> >> > queue multiple work say kthread_queue_work can handle it, then just queuing > >> >> > works whenever they are available should be Ok and the kthread loop can > >> >> > handle them. __cpufreq_driver_target is also protected by the work lock if > >> >> > there is any concern that can have races... only thing is rate-limiting of > >> >> > the requests, but we are doing a rate limiting, just not for the "DL > >> >> > increased utilization" type requests (which I don't think we are doing at the > >> >> > moment for urgent DL requests anyway). > >> >> > > >> >> > Following is an untested diff to show the idea. What do you think? > >> >> > > >> >> > thanks, > >> >> > > >> >> > - Joel > >> >> > > >> >> > ----8<--- > >> >> > diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c > >> >> > index d2c6083304b4..862634ff4bf3 100644 > >> >> > --- a/kernel/sched/cpufreq_schedutil.c > >> >> > +++ b/kernel/sched/cpufreq_schedutil.c > >> >> > @@ -38,7 +38,6 @@ struct sugov_policy { > >> >> > struct mutex work_lock; > >> >> > struct kthread_worker worker; > >> >> > struct task_struct *thread; > >> >> > - bool work_in_progress; > >> >> > > >> >> > bool need_freq_update; > >> >> > }; > >> >> > @@ -92,16 +91,8 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) > >> >> > !cpufreq_can_do_remote_dvfs(sg_policy->policy)) > >> >> > return false; > >> >> > > >> >> > - if (sg_policy->work_in_progress) > >> >> > - return false; > >> >> > - > >> >> > if (unlikely(sg_policy->need_freq_update)) { > >> >> > sg_policy->need_freq_update = false; > >> >> > - /* > >> >> > - * This happens when limits change, so forget the previous > >> >> > - * next_freq value and force an update. > >> >> > - */ > >> >> > - sg_policy->next_freq = UINT_MAX; > >> >> > return true; > >> >> > } > >> >> > > >> >> > @@ -129,7 +120,6 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, > >> >> > policy->cur = next_freq; > >> >> > trace_cpu_frequency(next_freq, smp_processor_id()); > >> >> > } else { > >> >> > - sg_policy->work_in_progress = true; > >> >> > irq_work_queue(&sg_policy->irq_work); > >> >> > >> >> Isn't this potentially introducing unneeded irq pressure (and doing the > >> >> whole wakeup the kthread thing), while the already active kthread could > >> >> simply handle multiple back-to-back requests before going to sleep? > >> > > >> > How about this? Will use the latest request, and also doesn't do unnecessary > >> > irq_work_queue: > >> > > >> > (untested) > >> > -----8<-------- > >> > diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c > >> > index d2c6083304b4..6a3e42b01f52 100644 > >> > --- a/kernel/sched/cpufreq_schedutil.c > >> > +++ b/kernel/sched/cpufreq_schedutil.c > >> > @@ -38,7 +38,7 @@ struct sugov_policy { > >> > struct mutex work_lock; > >> > struct kthread_worker worker; > >> > struct task_struct *thread; > >> > - bool work_in_progress; > >> > + bool work_in_progress; /* Has kthread been kicked */ > >> > > >> > bool need_freq_update; > >> > }; > >> > @@ -92,9 +92,6 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) > >> > !cpufreq_can_do_remote_dvfs(sg_policy->policy)) > >> > return false; > >> > > >> > - if (sg_policy->work_in_progress) > >> > - return false; > >> > - > >> > >> Why this change? > >> > >> Doing the below is rather pointless if work_in_progress is set, isn't it? > > > > The issue being discussed is that if a work was already in progress, then new > > frequency updates will be dropped. So say even if DL increased in > > utilization, nothing will happen because if work_in_progress = true and > > need_freq_update = true, we would skip an update. In this diff, I am > > allowing the frequency request to be possible while work_in_progress is true. > > In the end the latest update will be picked. > > I'm not sure if taking new requests with the irq_work in flight is a good idea. That's the point of the original $SUBJECT patch posted by Claudio :) In that you can see if urgent_request, then work_in_progress isn't checked. Also I don't see why we cannot do this with this small tweak as in my diff. It solves a real problem seen with frequency updates done with the slow-switch as we discussed at OSPM. But let me know if I missed your point or something ;) > > >> > >> You'll drop the results of it on the floor going forward anyway then AFAICS. > > > > Why? > > Because you cannot queue up a new irq_work before the previous one is complete? We are not doing that. If you see in my diff, I am not queuing an irq_work if one was already queued. What we're allowing is an update to next_freq. We still use work_in_progress but don't use it to ban all incoming update requests as done previously. Instead we use work_in_progress to make sure that we dont unnecessarily increase the irq pressure and have excessive wake ups (as Juri suggested). I can clean it up and post it as a patch next week after some testing incase that's less confusing. This week I'm actually on vacation and the diff was pure vacation hacking ;-) thanks, - Joel
On Wed, May 9, 2018 at 11:39 AM, Joel Fernandes <joel@joelfernandes.org> wrote: > On Wed, May 09, 2018 at 11:06:24AM +0200, Rafael J. Wysocki wrote: >> On Wed, May 9, 2018 at 10:51 AM, Joel Fernandes <joel@joelfernandes.org> wrote: >> > On Wed, May 09, 2018 at 10:30:37AM +0200, Rafael J. Wysocki wrote: >> >> On Wed, May 9, 2018 at 10:06 AM, Joel Fernandes <joel@joelfernandes.org> wrote: >> >> > On Wed, May 09, 2018 at 08:45:30AM +0200, Juri Lelli wrote: >> >> >> On 08/05/18 21:54, Joel Fernandes wrote: >> >> >> >> >> >> [...] >> >> >> >> >> >> > Just for discussion sake, is there any need for work_in_progress? If we can >> >> >> > queue multiple work say kthread_queue_work can handle it, then just queuing >> >> >> > works whenever they are available should be Ok and the kthread loop can >> >> >> > handle them. __cpufreq_driver_target is also protected by the work lock if >> >> >> > there is any concern that can have races... only thing is rate-limiting of >> >> >> > the requests, but we are doing a rate limiting, just not for the "DL >> >> >> > increased utilization" type requests (which I don't think we are doing at the >> >> >> > moment for urgent DL requests anyway). >> >> >> > >> >> >> > Following is an untested diff to show the idea. What do you think? >> >> >> > >> >> >> > thanks, >> >> >> > >> >> >> > - Joel >> >> >> > >> >> >> > ----8<--- >> >> >> > diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c >> >> >> > index d2c6083304b4..862634ff4bf3 100644 >> >> >> > --- a/kernel/sched/cpufreq_schedutil.c >> >> >> > +++ b/kernel/sched/cpufreq_schedutil.c >> >> >> > @@ -38,7 +38,6 @@ struct sugov_policy { >> >> >> > struct mutex work_lock; >> >> >> > struct kthread_worker worker; >> >> >> > struct task_struct *thread; >> >> >> > - bool work_in_progress; >> >> >> > >> >> >> > bool need_freq_update; >> >> >> > }; >> >> >> > @@ -92,16 +91,8 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) >> >> >> > !cpufreq_can_do_remote_dvfs(sg_policy->policy)) >> >> >> > return false; >> >> >> > >> >> >> > - if (sg_policy->work_in_progress) >> >> >> > - return false; >> >> >> > - >> >> >> > if (unlikely(sg_policy->need_freq_update)) { >> >> >> > sg_policy->need_freq_update = false; >> >> >> > - /* >> >> >> > - * This happens when limits change, so forget the previous >> >> >> > - * next_freq value and force an update. >> >> >> > - */ >> >> >> > - sg_policy->next_freq = UINT_MAX; >> >> >> > return true; >> >> >> > } >> >> >> > >> >> >> > @@ -129,7 +120,6 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, >> >> >> > policy->cur = next_freq; >> >> >> > trace_cpu_frequency(next_freq, smp_processor_id()); >> >> >> > } else { >> >> >> > - sg_policy->work_in_progress = true; >> >> >> > irq_work_queue(&sg_policy->irq_work); >> >> >> >> >> >> Isn't this potentially introducing unneeded irq pressure (and doing the >> >> >> whole wakeup the kthread thing), while the already active kthread could >> >> >> simply handle multiple back-to-back requests before going to sleep? >> >> > >> >> > How about this? Will use the latest request, and also doesn't do unnecessary >> >> > irq_work_queue: >> >> > >> >> > (untested) >> >> > -----8<-------- >> >> > diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c >> >> > index d2c6083304b4..6a3e42b01f52 100644 >> >> > --- a/kernel/sched/cpufreq_schedutil.c >> >> > +++ b/kernel/sched/cpufreq_schedutil.c >> >> > @@ -38,7 +38,7 @@ struct sugov_policy { >> >> > struct mutex work_lock; >> >> > struct kthread_worker worker; >> >> > struct task_struct *thread; >> >> > - bool work_in_progress; >> >> > + bool work_in_progress; /* Has kthread been kicked */ >> >> > >> >> > bool need_freq_update; >> >> > }; >> >> > @@ -92,9 +92,6 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) >> >> > !cpufreq_can_do_remote_dvfs(sg_policy->policy)) >> >> > return false; >> >> > >> >> > - if (sg_policy->work_in_progress) >> >> > - return false; >> >> > - >> >> >> >> Why this change? >> >> >> >> Doing the below is rather pointless if work_in_progress is set, isn't it? >> > >> > The issue being discussed is that if a work was already in progress, then new >> > frequency updates will be dropped. So say even if DL increased in >> > utilization, nothing will happen because if work_in_progress = true and >> > need_freq_update = true, we would skip an update. In this diff, I am >> > allowing the frequency request to be possible while work_in_progress is true. >> > In the end the latest update will be picked. >> >> I'm not sure if taking new requests with the irq_work in flight is a good idea. > > That's the point of the original $SUBJECT patch posted by Claudio :) In that > you can see if urgent_request, then work_in_progress isn't checked. > > Also I don't see why we cannot do this with this small tweak as in my diff. > It solves a real problem seen with frequency updates done with the > slow-switch as we discussed at OSPM. OK > But let me know if I missed your point or something ;) > >> >> >> >> >> You'll drop the results of it on the floor going forward anyway then AFAICS. >> > >> > Why? >> >> Because you cannot queue up a new irq_work before the previous one is complete? > > We are not doing that. If you see in my diff, I am not queuing an irq_work if > one was already queued. What we're allowing is an update to next_freq. We > still use work_in_progress but don't use it to ban all incoming update > requests as done previously. Instead we use work_in_progress to make sure > that we dont unnecessarily increase the irq pressure and have excessive wake > ups (as Juri suggested). > > I can clean it up and post it as a patch next week after some testing incase > that's less confusing. Yeah, that would help. :-) > This week I'm actually on vacation and the diff was pure vacation hacking ;-) No worries. Thanks, Rafael
On Wed, May 09, 2018 at 02:58:23PM +0530, Viresh Kumar wrote: > On 09-05-18, 02:02, Joel Fernandes wrote: > > On Wed, May 09, 2018 at 02:10:01PM +0530, Viresh Kumar wrote: > > > Right, none of the above changes are required now. > > > > I didn't follow what you mean the changes are not required? I was developing > > against Linus mainline. Also I replied to Rafael's comment in the other > > thread. > > At least for the shared policy case the entire sequence of > sugov_should_update_freq() followed by sugov_update_commit() is > executed from within spinlock protected region and you are using the > same lock below. And so either the above two routines or the kthread > routine below will execute at a given point of time. > > So in case kthread has started doing the update and acquired the lock, > the util update handler will wait until the time work_in_progress is > set to false, that's not a problem we are trying to solve here. > > And if kthread hasn't acquired the lock yet and util handler has > started executing sugov_should_update_freq() .... > > And ^^^ this is where I understood that your earlier change is > actually required, so that we accumulate the latest updated next_freq > value. > > And with all that we wouldn't require a while loop in the kthread > code. Oh yeah, totally. So I think we are on the same page now about that. > > > > > @@ -381,13 +381,23 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) > > > > > static void sugov_work(struct kthread_work *work) > > > > > { > > > > > struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); > > > > > + unsigned int freq; > > > > > + > > > > > + /* > > > > > + * Hold sg_policy->update_lock just enough to handle the case where: > > > > > + * if sg_policy->next_freq is updated before work_in_progress is set to > > > > > + * false, we may miss queueing the new update request since > > > > > + * work_in_progress would appear to be true. > > > > > + */ > > > > > + raw_spin_lock(&sg_policy->update_lock); > > > > > + freq = sg_policy->next_freq; > > > > > + sg_policy->work_in_progress = false; > > > > > + raw_spin_unlock(&sg_policy->update_lock); > > > > > > One problem we still have is that sg_policy->update_lock is only used > > > in the shared policy case and not in the single CPU per policy case, > > > so the race isn't solved there yet. > > > > True.. I can make the single CPU case acquire the update_lock very briefly > > around sugov_update_commit call in sugov_update_single. > > Rafael was very clear from the beginning that he wouldn't allow a spin > lock in the un-shared policy case :) That's fair. Probably we can just not do this trickery at all for the single case for now, incase work_in_progress is set. That way we still get the benefit for the shared case, and the single case isn't changed from what it is today. thanks, - Joel
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index d2c6083304b4..6a3e42b01f52 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -38,7 +38,7 @@ struct sugov_policy { struct mutex work_lock; struct kthread_worker worker; struct task_struct *thread; - bool work_in_progress; + bool work_in_progress; /* Has kthread been kicked */ bool need_freq_update; }; @@ -92,9 +92,6 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) !cpufreq_can_do_remote_dvfs(sg_policy->policy)) return false; - if (sg_policy->work_in_progress) - return false; - if (unlikely(sg_policy->need_freq_update)) { sg_policy->need_freq_update = false; /* @@ -129,8 +126,11 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, policy->cur = next_freq; trace_cpu_frequency(next_freq, smp_processor_id()); } else { - sg_policy->work_in_progress = true; - irq_work_queue(&sg_policy->irq_work); + /* work_in_progress helps us not queue unnecessarily */ + if (!sg_policy->work_in_progress) { + sg_policy->work_in_progress = true; + irq_work_queue(&sg_policy->irq_work); + } } } @@ -381,13 +381,23 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) static void sugov_work(struct kthread_work *work) { struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); + unsigned int freq; + + /* + * Hold sg_policy->update_lock just enough to handle the case where: + * if sg_policy->next_freq is updated before work_in_progress is set to + * false, we may miss queueing the new update request since + * work_in_progress would appear to be true. + */ + raw_spin_lock(&sg_policy->update_lock); + freq = sg_policy->next_freq; + sg_policy->work_in_progress = false; + raw_spin_unlock(&sg_policy->update_lock); mutex_lock(&sg_policy->work_lock); - __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq, + __cpufreq_driver_target(sg_policy->policy, freq, CPUFREQ_RELATION_L); mutex_unlock(&sg_policy->work_lock); - - sg_policy->work_in_progress = false; } static void sugov_irq_work(struct irq_work *irq_work)