diff mbox series

[v5,2/4] arch_topology: Support SMT control for OF based system

Message ID 20240806085320.63514-3-yangyicong@huawei.com (mailing list archive)
State New, archived
Headers show
Series Support SMT control on arm64 | expand

Commit Message

Yicong Yang Aug. 6, 2024, 8:53 a.m. UTC
From: Yicong Yang <yangyicong@hisilicon.com>

On building the topology from the devicetree, we've already
gotten the SMT thread number of each core. Update the largest
SMT thread number to enable the SMT control.

Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
---
 drivers/base/arch_topology.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

Comments

Dietmar Eggemann Aug. 16, 2024, 3:55 p.m. UTC | #1
On 06/08/2024 10:53, Yicong Yang wrote:
> From: Yicong Yang <yangyicong@hisilicon.com>
> 
> On building the topology from the devicetree, we've already
> gotten the SMT thread number of each core. Update the largest
> SMT thread number to enable the SMT control.

Do we have SMT Device Tree (DT) systems out there? But you right that DT
at least supports SMT.

> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
> ---
>  drivers/base/arch_topology.c | 13 +++++++++++++
>  1 file changed, 13 insertions(+)
> 
> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
> index 75fcb75d5515..95513abd664f 100644
> --- a/drivers/base/arch_topology.c
> +++ b/drivers/base/arch_topology.c
> @@ -11,6 +11,7 @@
>  #include <linux/cleanup.h>
>  #include <linux/cpu.h>
>  #include <linux/cpufreq.h>
> +#include <linux/cpu_smt.h>
>  #include <linux/device.h>
>  #include <linux/of.h>
>  #include <linux/slab.h>
> @@ -531,6 +532,16 @@ static int __init get_cpu_for_node(struct device_node *node)
>  	return cpu;
>  }
>  
> +static void __init update_smt_num_threads(unsigned int num_threads)
> +{
> +	static unsigned int max_smt_thread_num = 1;
> +
> +	if (num_threads > max_smt_thread_num) {
> +		max_smt_thread_num = num_threads;
> +		cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num);
> +	}

This could theoretically (unlikely though) call
cpu_smt_set_num_threads() multiple times (on heterogeneous systems with
different numbers of SMT threads).
> +}
> +
>  static int __init parse_core(struct device_node *core, int package_id,
>  			     int cluster_id, int core_id)
>  {
> @@ -561,6 +572,8 @@ static int __init parse_core(struct device_node *core, int package_id,
>  		i++;
>  	} while (1);
>  
> +	update_smt_num_threads(i);
> +
>  	cpu = get_cpu_for_node(core);
>  	if (cpu >= 0) {
>  		if (!leaf) {

Why not simply do this:

-->8--

diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 75fcb75d5515..806537419715 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -30,6 +30,7 @@ static struct cpumask scale_freq_counters_mask;
 static bool scale_freq_invariant;
 DEFINE_PER_CPU(unsigned long, capacity_freq_ref) = 1;
 EXPORT_PER_CPU_SYMBOL_GPL(capacity_freq_ref);
+static unsigned int max_smt_thread_num = 1;
 
 static bool supports_scale_freq_counters(const struct cpumask *cpus)
 {
@@ -577,6 +578,9 @@ static int __init parse_core(struct device_node *core, int package_id,
 		return -EINVAL;
 	}
 
+	if (max_smt_thread_num < i)
+		max_smt_thread_num = i;
+
 	return 0;
 }
 
@@ -673,6 +677,9 @@ static int __init parse_socket(struct device_node *socket)
 	if (!has_socket)
 		ret = parse_cluster(socket, 0, -1, 0);
 
+	if (max_smt_thread_num > 1)
+		cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num);
+
 	return ret;
 }
Yicong Yang Aug. 19, 2024, 7:18 a.m. UTC | #2
On 2024/8/16 23:55, Dietmar Eggemann wrote:
> On 06/08/2024 10:53, Yicong Yang wrote:
>> From: Yicong Yang <yangyicong@hisilicon.com>
>>
>> On building the topology from the devicetree, we've already
>> gotten the SMT thread number of each core. Update the largest
>> SMT thread number to enable the SMT control.
> 
> Do we have SMT Device Tree (DT) systems out there? But you right that DT
> at least supports SMT.
> 

My system's based on ACPI. For DT part it's emulated and tested on the QEMU VM.

>> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
>> ---
>>  drivers/base/arch_topology.c | 13 +++++++++++++
>>  1 file changed, 13 insertions(+)
>>
>> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
>> index 75fcb75d5515..95513abd664f 100644
>> --- a/drivers/base/arch_topology.c
>> +++ b/drivers/base/arch_topology.c
>> @@ -11,6 +11,7 @@
>>  #include <linux/cleanup.h>
>>  #include <linux/cpu.h>
>>  #include <linux/cpufreq.h>
>> +#include <linux/cpu_smt.h>
>>  #include <linux/device.h>
>>  #include <linux/of.h>
>>  #include <linux/slab.h>
>> @@ -531,6 +532,16 @@ static int __init get_cpu_for_node(struct device_node *node)
>>  	return cpu;
>>  }
>>  
>> +static void __init update_smt_num_threads(unsigned int num_threads)
>> +{
>> +	static unsigned int max_smt_thread_num = 1;
>> +
>> +	if (num_threads > max_smt_thread_num) {
>> +		max_smt_thread_num = num_threads;
>> +		cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num);
>> +	}
> 
> This could theoretically (unlikely though) call
> cpu_smt_set_num_threads() multiple times (on heterogeneous systems with
> different numbers of SMT threads).

Yes indeed. Was doing this purposely since I think this doing nothing unexpectedly but
only update the max threads recorded in the framework.

>> +}
>> +
>>  static int __init parse_core(struct device_node *core, int package_id,
>>  			     int cluster_id, int core_id)
>>  {
>> @@ -561,6 +572,8 @@ static int __init parse_core(struct device_node *core, int package_id,
>>  		i++;
>>  	} while (1);
>>  
>> +	update_smt_num_threads(i);
>> +
>>  	cpu = get_cpu_for_node(core);
>>  	if (cpu >= 0) {
>>  		if (!leaf) {
> 
> Why not simply do this:
> 
> -->8--
> 
> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
> index 75fcb75d5515..806537419715 100644
> --- a/drivers/base/arch_topology.c
> +++ b/drivers/base/arch_topology.c
> @@ -30,6 +30,7 @@ static struct cpumask scale_freq_counters_mask;
>  static bool scale_freq_invariant;
>  DEFINE_PER_CPU(unsigned long, capacity_freq_ref) = 1;
>  EXPORT_PER_CPU_SYMBOL_GPL(capacity_freq_ref);
> +static unsigned int max_smt_thread_num = 1;
>  

This fine with me and this avoid calling cpu_smt_set_num_threads() multiple
times. We can switch to this implementation.

Thanks.

>  static bool supports_scale_freq_counters(const struct cpumask *cpus)
>  {
> @@ -577,6 +578,9 @@ static int __init parse_core(struct device_node *core, int package_id,
>  		return -EINVAL;
>  	}
>  
> +	if (max_smt_thread_num < i)
> +		max_smt_thread_num = i;
> +
>  	return 0;
>  }
>  
> @@ -673,6 +677,9 @@ static int __init parse_socket(struct device_node *socket)
>  	if (!has_socket)
>  		ret = parse_cluster(socket, 0, -1, 0);
>  
> +	if (max_smt_thread_num > 1)
> +		cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num);
> +
>  	return ret;
>  }
> 
> 
> .
>
diff mbox series

Patch

diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 75fcb75d5515..95513abd664f 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -11,6 +11,7 @@ 
 #include <linux/cleanup.h>
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
+#include <linux/cpu_smt.h>
 #include <linux/device.h>
 #include <linux/of.h>
 #include <linux/slab.h>
@@ -531,6 +532,16 @@  static int __init get_cpu_for_node(struct device_node *node)
 	return cpu;
 }
 
+static void __init update_smt_num_threads(unsigned int num_threads)
+{
+	static unsigned int max_smt_thread_num = 1;
+
+	if (num_threads > max_smt_thread_num) {
+		max_smt_thread_num = num_threads;
+		cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num);
+	}
+}
+
 static int __init parse_core(struct device_node *core, int package_id,
 			     int cluster_id, int core_id)
 {
@@ -561,6 +572,8 @@  static int __init parse_core(struct device_node *core, int package_id,
 		i++;
 	} while (1);
 
+	update_smt_num_threads(i);
+
 	cpu = get_cpu_for_node(core);
 	if (cpu >= 0) {
 		if (!leaf) {