Message ID | 20240806085320.63514-3-yangyicong@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Support SMT control on arm64 | expand |
On 06/08/2024 10:53, Yicong Yang wrote: > From: Yicong Yang <yangyicong@hisilicon.com> > > On building the topology from the devicetree, we've already > gotten the SMT thread number of each core. Update the largest > SMT thread number to enable the SMT control. Do we have SMT Device Tree (DT) systems out there? But you right that DT at least supports SMT. > Signed-off-by: Yicong Yang <yangyicong@hisilicon.com> > --- > drivers/base/arch_topology.c | 13 +++++++++++++ > 1 file changed, 13 insertions(+) > > diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c > index 75fcb75d5515..95513abd664f 100644 > --- a/drivers/base/arch_topology.c > +++ b/drivers/base/arch_topology.c > @@ -11,6 +11,7 @@ > #include <linux/cleanup.h> > #include <linux/cpu.h> > #include <linux/cpufreq.h> > +#include <linux/cpu_smt.h> > #include <linux/device.h> > #include <linux/of.h> > #include <linux/slab.h> > @@ -531,6 +532,16 @@ static int __init get_cpu_for_node(struct device_node *node) > return cpu; > } > > +static void __init update_smt_num_threads(unsigned int num_threads) > +{ > + static unsigned int max_smt_thread_num = 1; > + > + if (num_threads > max_smt_thread_num) { > + max_smt_thread_num = num_threads; > + cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num); > + } This could theoretically (unlikely though) call cpu_smt_set_num_threads() multiple times (on heterogeneous systems with different numbers of SMT threads). > +} > + > static int __init parse_core(struct device_node *core, int package_id, > int cluster_id, int core_id) > { > @@ -561,6 +572,8 @@ static int __init parse_core(struct device_node *core, int package_id, > i++; > } while (1); > > + update_smt_num_threads(i); > + > cpu = get_cpu_for_node(core); > if (cpu >= 0) { > if (!leaf) { Why not simply do this: -->8-- diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 75fcb75d5515..806537419715 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -30,6 +30,7 @@ static struct cpumask scale_freq_counters_mask; static bool scale_freq_invariant; DEFINE_PER_CPU(unsigned long, capacity_freq_ref) = 1; EXPORT_PER_CPU_SYMBOL_GPL(capacity_freq_ref); +static unsigned int max_smt_thread_num = 1; static bool supports_scale_freq_counters(const struct cpumask *cpus) { @@ -577,6 +578,9 @@ static int __init parse_core(struct device_node *core, int package_id, return -EINVAL; } + if (max_smt_thread_num < i) + max_smt_thread_num = i; + return 0; } @@ -673,6 +677,9 @@ static int __init parse_socket(struct device_node *socket) if (!has_socket) ret = parse_cluster(socket, 0, -1, 0); + if (max_smt_thread_num > 1) + cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num); + return ret; }
On 2024/8/16 23:55, Dietmar Eggemann wrote: > On 06/08/2024 10:53, Yicong Yang wrote: >> From: Yicong Yang <yangyicong@hisilicon.com> >> >> On building the topology from the devicetree, we've already >> gotten the SMT thread number of each core. Update the largest >> SMT thread number to enable the SMT control. > > Do we have SMT Device Tree (DT) systems out there? But you right that DT > at least supports SMT. > My system's based on ACPI. For DT part it's emulated and tested on the QEMU VM. >> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com> >> --- >> drivers/base/arch_topology.c | 13 +++++++++++++ >> 1 file changed, 13 insertions(+) >> >> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c >> index 75fcb75d5515..95513abd664f 100644 >> --- a/drivers/base/arch_topology.c >> +++ b/drivers/base/arch_topology.c >> @@ -11,6 +11,7 @@ >> #include <linux/cleanup.h> >> #include <linux/cpu.h> >> #include <linux/cpufreq.h> >> +#include <linux/cpu_smt.h> >> #include <linux/device.h> >> #include <linux/of.h> >> #include <linux/slab.h> >> @@ -531,6 +532,16 @@ static int __init get_cpu_for_node(struct device_node *node) >> return cpu; >> } >> >> +static void __init update_smt_num_threads(unsigned int num_threads) >> +{ >> + static unsigned int max_smt_thread_num = 1; >> + >> + if (num_threads > max_smt_thread_num) { >> + max_smt_thread_num = num_threads; >> + cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num); >> + } > > This could theoretically (unlikely though) call > cpu_smt_set_num_threads() multiple times (on heterogeneous systems with > different numbers of SMT threads). Yes indeed. Was doing this purposely since I think this doing nothing unexpectedly but only update the max threads recorded in the framework. >> +} >> + >> static int __init parse_core(struct device_node *core, int package_id, >> int cluster_id, int core_id) >> { >> @@ -561,6 +572,8 @@ static int __init parse_core(struct device_node *core, int package_id, >> i++; >> } while (1); >> >> + update_smt_num_threads(i); >> + >> cpu = get_cpu_for_node(core); >> if (cpu >= 0) { >> if (!leaf) { > > Why not simply do this: > > -->8-- > > diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c > index 75fcb75d5515..806537419715 100644 > --- a/drivers/base/arch_topology.c > +++ b/drivers/base/arch_topology.c > @@ -30,6 +30,7 @@ static struct cpumask scale_freq_counters_mask; > static bool scale_freq_invariant; > DEFINE_PER_CPU(unsigned long, capacity_freq_ref) = 1; > EXPORT_PER_CPU_SYMBOL_GPL(capacity_freq_ref); > +static unsigned int max_smt_thread_num = 1; > This fine with me and this avoid calling cpu_smt_set_num_threads() multiple times. We can switch to this implementation. Thanks. > static bool supports_scale_freq_counters(const struct cpumask *cpus) > { > @@ -577,6 +578,9 @@ static int __init parse_core(struct device_node *core, int package_id, > return -EINVAL; > } > > + if (max_smt_thread_num < i) > + max_smt_thread_num = i; > + > return 0; > } > > @@ -673,6 +677,9 @@ static int __init parse_socket(struct device_node *socket) > if (!has_socket) > ret = parse_cluster(socket, 0, -1, 0); > > + if (max_smt_thread_num > 1) > + cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num); > + > return ret; > } > > > . >
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 75fcb75d5515..95513abd664f 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -11,6 +11,7 @@ #include <linux/cleanup.h> #include <linux/cpu.h> #include <linux/cpufreq.h> +#include <linux/cpu_smt.h> #include <linux/device.h> #include <linux/of.h> #include <linux/slab.h> @@ -531,6 +532,16 @@ static int __init get_cpu_for_node(struct device_node *node) return cpu; } +static void __init update_smt_num_threads(unsigned int num_threads) +{ + static unsigned int max_smt_thread_num = 1; + + if (num_threads > max_smt_thread_num) { + max_smt_thread_num = num_threads; + cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num); + } +} + static int __init parse_core(struct device_node *core, int package_id, int cluster_id, int core_id) { @@ -561,6 +572,8 @@ static int __init parse_core(struct device_node *core, int package_id, i++; } while (1); + update_smt_num_threads(i); + cpu = get_cpu_for_node(core); if (cpu >= 0) { if (!leaf) {