@@ -927,6 +927,15 @@ config SCHED_MC
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
+config SCHED_ITMT
+ bool "Intel Turbo Boost Max Technology (ITMT) scheduler support"
+ depends on SCHED_MC && CPU_SUP_INTEL && X86_INTEL_PSTATE
+ ---help---
+ ITMT enabled scheduler support improves the CPU scheduler's decision
+ to move tasks to cpu core that can be boosted to a higher frequency
+ than others. It will have better performance at a cost of slightly
+ increased overhead in task migrations. If unsure say N here.
+
source "kernel/Kconfig.preempt"
config UP_LATE_INIT
@@ -150,4 +150,26 @@ int x86_pci_root_bus_node(int bus);
void x86_pci_root_bus_resources(int bus, struct list_head *resources);
extern bool x86_topology_update;
+
+#ifdef CONFIG_SCHED_ITMT
+#include <asm/percpu.h>
+
+DECLARE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
+
+/* Interface to set priority of a cpu */
+void sched_set_itmt_core_prio(int prio, int core_cpu);
+
+/* Interface to notify scheduler that system supports ITMT */
+void sched_set_itmt_support(bool itmt_supported);
+
+#else /* CONFIG_SCHED_ITMT */
+
+static inline void sched_set_itmt_core_prio(int prio, int core_cpu)
+{
+}
+static inline void sched_set_itmt_support(bool itmt_supported)
+{
+}
+#endif /* CONFIG_SCHED_ITMT */
+
#endif /* _ASM_X86_TOPOLOGY_H */
@@ -124,6 +124,7 @@ obj-$(CONFIG_EFI) += sysfb_efi.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
obj-$(CONFIG_TRACING) += tracepoint.o
+obj-$(CONFIG_SCHED_ITMT) += itmt.o
###
# 64 bit specific files
new file mode 100644
@@ -0,0 +1,95 @@
+/*
+ * itmt.c: Support Intel Turbo Boost Max Technology 3.0
+ *
+ * (C) Copyright 2016 Intel Corporation
+ * Author: Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
+ * the maximum turbo frequencies of some cores in a CPU package may be
+ * higher than for the other cores in the same package. In that case,
+ * better performance can be achieved by making the scheduler prefer
+ * to run tasks on the CPUs with higher max turbo frequencies.
+ *
+ * This file provides functions and data structures for enabling the
+ * scheduler to favor scheduling on cores can be boosted to a higher
+ * frequency under ITMT.
+ */
+
+#include <linux/sched.h>
+#include <linux/cpumask.h>
+#include <linux/cpuset.h>
+#include <asm/mutex.h>
+#include <linux/sched.h>
+#include <linux/sysctl.h>
+#include <linux/nodemask.h>
+
+static DEFINE_MUTEX(itmt_update_mutex);
+
+/* Boolean to track if system has ITMT capabilities */
+static bool __read_mostly sched_itmt_capable;
+
+/**
+ * sched_set_itmt_support - Indicate platform support ITMT
+ * @itmt_supported: indicate platform's CPU has ITMT capability
+ *
+ * This function is used by the OS to indicate to scheduler if the platform
+ * is capable of supporting the ITMT feature.
+ *
+ * The current scheme has the pstate driver detects if the system
+ * is ITMT capable and call set_sched_itmt.
+ *
+ * This must be done only after sched_set_itmt_core_prio
+ * has been called to set the cpus' priorities.
+ */
+void sched_set_itmt_support(bool itmt_supported)
+{
+ mutex_lock(&itmt_update_mutex);
+
+ if (itmt_supported != sched_itmt_capable)
+ sched_itmt_capable = itmt_supported;
+
+ mutex_unlock(&itmt_update_mutex);
+}
+
+DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
+int arch_asym_cpu_priority(int cpu)
+{
+ return per_cpu(sched_core_priority, cpu);
+}
+
+/**
+ * sched_set_itmt_core_prio - Set CPU priority based on ITMT
+ * @prio: Priority of cpu core
+ * @core_cpu: The cpu number associated with the core
+ *
+ * The pstate driver will find out the max boost frequency
+ * and call this function to set a priority proportional
+ * to the max boost frequency. CPU with higher boost
+ * frequency will receive higher priority.
+ *
+ * No need to rebuild sched domain after updating
+ * the CPU priorities. The sched domains have no
+ * dependency on CPU priorities.
+ */
+void sched_set_itmt_core_prio(int prio, int core_cpu)
+{
+ int cpu, i = 1;
+
+ for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
+ int smt_prio;
+
+ /*
+ * Ensure that the siblings are moved to the end
+ * of the priority chain and only used when
+ * all other high priority cpus are out of capacity.
+ */
+ smt_prio = prio * smp_num_siblings / i;
+ i++;
+ per_cpu(sched_core_priority, cpu) = smt_prio;
+ }
+}