@@ -29,4 +29,12 @@ extern int sysctl_numa_balancing_mode;
#define sysctl_numa_balancing_mode 0
#endif
+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
+extern unsigned int sysctl_sched_place_lag_enabled;
+extern unsigned int sysctl_sched_run_to_parity_enabled;
+#else
+#define sysctl_sched_place_lag_enabled 0
+#define sysctl_sched_run_to_parity_enabled 0
+#endif
+
#endif /* _LINUX_SCHED_SYSCTL_H */
@@ -134,6 +134,19 @@ const_debug unsigned int sysctl_sched_features =
0;
#undef SCHED_FEAT
+#ifdef CONFIG_SYSCTL
+/*
+ * Using the avg_vruntime, do the right thing and preserve lag across
+ * sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.
+ */
+__read_mostly unsigned int sysctl_sched_place_lag_enabled = 0;
+/*
+ * Inhibit (wakeup) preemption until the current task has either matched the
+ * 0-lag point or until is has exhausted it's slice.
+ */
+__read_mostly unsigned int sysctl_sched_run_to_parity_enabled = 0;
+#endif
+
/*
* Print a warning if need_resched is set for the given duration (if
* LATENCY_WARN is enabled).
@@ -925,7 +925,8 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
* Once selected, run a task until it either becomes non-eligible or
* until it gets a new slice. See the HACK in set_next_entity().
*/
- if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline)
+ if (sysctl_sched_run_to_parity_enabled &&
+ curr && curr->vlag == curr->deadline)
return curr;
/* Pick the leftmost entity if it's eligible */
@@ -5280,7 +5281,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
*
* EEVDF: placement strategy #1 / #2
*/
- if (sched_feat(PLACE_LAG) && cfs_rq->nr_running && se->vlag) {
+ if (sysctl_sched_place_lag_enabled && cfs_rq->nr_running && se->vlag) {
struct sched_entity *curr = cfs_rq->curr;
unsigned long load;
@@ -1,10 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Using the avg_vruntime, do the right thing and preserve lag across
- * sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.
- */
-SCHED_FEAT(PLACE_LAG, false)
/*
* Give new tasks half a slice to ease into the competition.
*/
@@ -13,11 +8,6 @@ SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
* Preserve relative virtual deadline on 'migration'.
*/
SCHED_FEAT(PLACE_REL_DEADLINE, true)
-/*
- * Inhibit (wakeup) preemption until the current task has either matched the
- * 0-lag point or until is has exhausted it's slice.
- */
-SCHED_FEAT(RUN_TO_PARITY, false)
/*
* Allow wakeup of tasks with a shorter slice to cancel RUN_TO_PARITY for
* current.
@@ -2029,6 +2029,26 @@ static struct ctl_table kern_table[] = {
.extra2 = SYSCTL_INT_MAX,
},
#endif
+#ifdef CONFIG_SCHED_DEBUG
+ {
+ .procname = "sched_place_lag_enabled",
+ .data = &sysctl_sched_place_lag_enabled,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "sched_run_to_parity_enabled",
+ .data = &sysctl_sched_run_to_parity_enabled,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#endif
};
static struct ctl_table vm_table[] = {
These two scheduler features have a high impact on performance for some database workloads. Move them to sysctl as they are likely to be modified and persisted across reboots. Cc: <stable@vger.kernel.org> # 6.6.x Fixes: 86bfbb7ce4f6 ("sched/fair: Add lag based placement") Fixes: 63304558ba5d ("sched/eevdf: Curb wakeup-preemption") Signed-off-by: Cristian Prundeanu <cpru@amazon.com> --- include/linux/sched/sysctl.h | 8 ++++++++ kernel/sched/core.c | 13 +++++++++++++ kernel/sched/fair.c | 5 +++-- kernel/sched/features.h | 10 ---------- kernel/sysctl.c | 20 ++++++++++++++++++++ 5 files changed, 44 insertions(+), 12 deletions(-)