diff mbox series

[v2,2/2] cpufreq: amd-pstate: change cpu freq transition delay for some models

Message ID b2c8fb2da41f9fb21f095f67d99cbdbd0aa34091.1716356681.git.Xiaojian.Du@amd.com (mailing list archive)
State Superseded, archived
Headers show
Series [v2,1/2] x86/cpufeatures: Add AMD FAST CPPC feature flag | expand

Commit Message

Du, Xiaojian May 22, 2024, 5:50 a.m. UTC
Some of AMD ZEN4 APU/CPU have support for adjusting the CPU core
clock more quickly and presicely according to CPU work loading.
This is advertised by the Fast CPPC x86 feature.
This change will only be effective in the *passive mode* of
AMD pstate driver. From the test results of different
transition delay values, 600us is chosen to make a balance
between performance and power consumption.

Some test results on AMD Ryzen 7840HS(Phoenix) APU:

1. Tbench
(Energy less is better, Throughput more is better,
PPW--Performance per Watt more is better)
============= =================== ============== =============== ============== =============== ============== =============== ===============
 Trans Delay   Tbench              governor:schedutil, 3-iterations average
============= =================== ============== =============== ============== =============== ============== =============== ===============
 1000us        Clients             1              2               4              8              12             16              32
               Energy/Joules       2010           2804            8768           17171          16170          15132           15027
               Throughput/(MB/s)   114            259             1041           3010           3135           4851            4605
               PPW                 0.0567         0.0923          0.1187         0.1752         0.1938         0.3205          0.3064
 600us         Clients             1              2               4              8              12             16              32
               Energy/Joules       2115  (5.22%)  2388  (-14.84%) 10700(22.03%)  16716 (-2.65%) 15939 (-1.43%) 15053 (-0.52%)  15083 (0.37% )
               Throughput/(MB/s)   122   (7.02%)  234   (-9.65% ) 1188 (14.12%)  3003  (-0.23%) 3143  (0.26% ) 4842  (-0.19%)  4603  (-0.04%)
               PPW                 0.0576(1.59%)  0.0979(6.07%  ) 0.111(-6.49%)  0.1796(2.51% ) 0.1971(1.70% ) 0.3216(0.34% )  0.3051(-0.42%)
============= =================== ============== ================ ============= =============== ============== =============== ===============

2.Dbench
(Energy less is better, Throughput more is better,
PPW--Performance per Watt more is better)
============= =================== ============== =============== ============== =============== ============== =============== ===============
 Trans Delay   Dbench              governor:schedutil, 3-iterations average
============= =================== ============== =============== ============== =============== ============== =============== ===============
 1000us        Clients             1             2               4              8               12             16              32
               Energy/Joules       4890          3779            3567           5157            5611           6500            8163
               Throughput/(MB/s)   327           167             220            577             775            938             1397
               PPW                 0.0668        0.0441          0.0616         0.1118          0.1381         0.1443          0.1711
 600us         Clients             1             2               4              8               12             16              32
               Energy/Joules       4915  (0.51%) 4912  (29.98%)  3506  (-1.71%) 4907  (-4.85% ) 5011 (-10.69%) 5672  (-12.74%) 8141  (-0.27%)
               Throughput/(MB/s)   348   (6.42%) 284   (70.06%)  220   (0.00% ) 518   (-10.23%) 712  (-8.13% ) 854   (-8.96% ) 1475  (5.58% )
               PPW                 0.0708(5.99%) 0.0578(31.07%)  0.0627(1.79% ) 0.1055(-5.64% ) 0.142(2.82%  ) 0.1505(4.30%  ) 0.1811(5.84% )
============= =================== ============== =============== ============== =============== ============== =============== ===============

3.Hackbench(less time is better)
============= =========================== ==========================
  hackbench     governor:schedutil
============= =========================== ==========================
  Trans Delay   Process Mode Ave time(s)  Thread Mode Ave time(s)
  1000us        14.484                      14.484
  600us         14.418(-0.46%)              15.41(+6.39%)
============= =========================== ==========================

4.Perf_sched_bench(less time is better)
============= =================== ============== ============== ============== =============== =============== =============
 Trans Delay  perf_sched_bench    governor:schedutil
============= =================== ============== ============== ============== =============== =============== =============
  1000us        Groups             1             2              4              8               12              24
                AveTime(s)        1.64          2.851          5.878          11.636          16.093          26.395
  600us         Groups             1             2              4              8               12              24
                AveTime(s)        1.69(3.05%)   2.845(-0.21%)  5.843(-0.60%)  11.576(-0.52%)  16.092(-0.01%)  26.32(-0.28%)
============= ================== ============== ============== ============== =============== =============== ==============

5.Sysbench(higher is better)
============= ================== ============== ================= ============== ================ =============== =================
  Sysbench    governor:schedutil
============= ================== ============== ================= ============== ================ =============== =================
  1000us      Thread             1               2                4              8                12               24
              Ave events         6020.98         12273.39         24119.82       46171.57         47074.37         47831.72
  600us       Thread             1               2                4              8                12               24
              Ave events         6154.82(2.22%)  12271.63(-0.01%) 24392.5(1.13%) 46117.64(-0.12%) 46852.19(-0.47%) 47678.92(-0.32%)
============= ================== ============== ================= ============== ================ =============== =================

In conclusion, a shorter transition delay
of cpu clock will make a quite positive effect to improve PPW on Dbench test,
in the meanwhile , keep stable performance on Tbench,
Hackbench, Perf_sched_bench and Sysbench.

Signed-off-by: Xiaojian Du <Xiaojian.Du@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Reviewed-by: Perry Yuan <perry.yuan@amd.com>
---
 drivers/cpufreq/amd-pstate.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

Comments

kernel test robot May 22, 2024, 6:11 p.m. UTC | #1
Hi Xiaojian,

kernel test robot noticed the following build warnings:

[auto build test WARNING on tip/master]
[also build test WARNING on rafael-pm/linux-next rafael-pm/bleeding-edge linus/master next-20240522]
[cannot apply to tip/x86/core tip/auto-latest v6.9]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Xiaojian-Du/cpufreq-amd-pstate-change-cpu-freq-transition-delay-for-some-models/20240522-135241
base:   tip/master
patch link:    https://lore.kernel.org/r/b2c8fb2da41f9fb21f095f67d99cbdbd0aa34091.1716356681.git.Xiaojian.Du%40amd.com
patch subject: [PATCH v2 2/2] cpufreq: amd-pstate: change cpu freq transition delay for some models
config: x86_64-defconfig (https://download.01.org/0day-ci/archive/20240523/202405230126.Ww25ogId-lkp@intel.com/config)
compiler: gcc-13 (Ubuntu 13.2.0-4ubuntu3) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240523/202405230126.Ww25ogId-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202405230126.Ww25ogId-lkp@intel.com/

All warnings (new ones prefixed by >>):

   drivers/cpufreq/amd-pstate.c: In function 'amd_pstate_get_transition_delay_us':
>> drivers/cpufreq/amd-pstate.c:821:12: warning: suggest explicit braces to avoid ambiguous 'else' [-Wdangling-else]
     821 |         if (transition_delay_ns == CPUFREQ_ETERNAL)
         |            ^


vim +/else +821 drivers/cpufreq/amd-pstate.c

e571a5e2068ef5 Meng Li     2024-01-19  811  
5131a3ca3518d7 Perry Yuan  2024-04-30  812  /*
069a2bb8c48c43 Perry Yuan  2024-04-25  813   * Get pstate transition delay time from ACPI tables that firmware set
069a2bb8c48c43 Perry Yuan  2024-04-25  814   * instead of using hardcode value directly.
069a2bb8c48c43 Perry Yuan  2024-04-25  815   */
069a2bb8c48c43 Perry Yuan  2024-04-25  816  static u32 amd_pstate_get_transition_delay_us(unsigned int cpu)
069a2bb8c48c43 Perry Yuan  2024-04-25  817  {
069a2bb8c48c43 Perry Yuan  2024-04-25  818  	u32 transition_delay_ns;
069a2bb8c48c43 Perry Yuan  2024-04-25  819  
069a2bb8c48c43 Perry Yuan  2024-04-25  820  	transition_delay_ns = cppc_get_transition_latency(cpu);
069a2bb8c48c43 Perry Yuan  2024-04-25 @821  	if (transition_delay_ns == CPUFREQ_ETERNAL)
be020c2c1f8622 Xiaojian Du 2024-05-22  822  		if (cpu_feature_enabled(X86_FEATURE_FAST_CPPC))
be020c2c1f8622 Xiaojian Du 2024-05-22  823  			return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY;
be020c2c1f8622 Xiaojian Du 2024-05-22  824  		else
069a2bb8c48c43 Perry Yuan  2024-04-25  825  			return AMD_PSTATE_TRANSITION_DELAY;
069a2bb8c48c43 Perry Yuan  2024-04-25  826  
069a2bb8c48c43 Perry Yuan  2024-04-25  827  	return transition_delay_ns / NSEC_PER_USEC;
069a2bb8c48c43 Perry Yuan  2024-04-25  828  }
069a2bb8c48c43 Perry Yuan  2024-04-25  829
kernel test robot May 22, 2024, 7:45 p.m. UTC | #2
Hi Xiaojian,

kernel test robot noticed the following build warnings:

[auto build test WARNING on tip/master]
[also build test WARNING on rafael-pm/linux-next rafael-pm/bleeding-edge linus/master next-20240522]
[cannot apply to tip/x86/core tip/auto-latest v6.9]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Xiaojian-Du/cpufreq-amd-pstate-change-cpu-freq-transition-delay-for-some-models/20240522-135241
base:   tip/master
patch link:    https://lore.kernel.org/r/b2c8fb2da41f9fb21f095f67d99cbdbd0aa34091.1716356681.git.Xiaojian.Du%40amd.com
patch subject: [PATCH v2 2/2] cpufreq: amd-pstate: change cpu freq transition delay for some models
config: x86_64-randconfig-006-20240522 (https://download.01.org/0day-ci/archive/20240523/202405230325.UPlOikDm-lkp@intel.com/config)
compiler: clang version 18.1.5 (https://github.com/llvm/llvm-project 617a15a9eac96088ae5e9134248d8236e34b91b1)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240523/202405230325.UPlOikDm-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202405230325.UPlOikDm-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/cpufreq/amd-pstate.c:824:3: warning: add explicit braces to avoid dangling else [-Wdangling-else]
     824 |                 else
         |                 ^
   1 warning generated.


vim +824 drivers/cpufreq/amd-pstate.c

   811	
   812	/*
   813	 * Get pstate transition delay time from ACPI tables that firmware set
   814	 * instead of using hardcode value directly.
   815	 */
   816	static u32 amd_pstate_get_transition_delay_us(unsigned int cpu)
   817	{
   818		u32 transition_delay_ns;
   819	
   820		transition_delay_ns = cppc_get_transition_latency(cpu);
   821		if (transition_delay_ns == CPUFREQ_ETERNAL)
   822			if (cpu_feature_enabled(X86_FEATURE_FAST_CPPC))
   823				return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY;
 > 824			else
   825				return AMD_PSTATE_TRANSITION_DELAY;
   826	
   827		return transition_delay_ns / NSEC_PER_USEC;
   828	}
   829
diff mbox series

Patch

diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 6a342b0c0140..572064a7682f 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -50,6 +50,7 @@ 
 
 #define AMD_PSTATE_TRANSITION_LATENCY	20000
 #define AMD_PSTATE_TRANSITION_DELAY	1000
+#define AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY 600
 #define CPPC_HIGHEST_PERF_PERFORMANCE	196
 #define CPPC_HIGHEST_PERF_DEFAULT	166
 
@@ -818,7 +819,10 @@  static u32 amd_pstate_get_transition_delay_us(unsigned int cpu)
 
 	transition_delay_ns = cppc_get_transition_latency(cpu);
 	if (transition_delay_ns == CPUFREQ_ETERNAL)
-		return AMD_PSTATE_TRANSITION_DELAY;
+		if (cpu_feature_enabled(X86_FEATURE_FAST_CPPC))
+			return AMD_PSTATE_FAST_CPPC_TRANSITION_DELAY;
+		else
+			return AMD_PSTATE_TRANSITION_DELAY;
 
 	return transition_delay_ns / NSEC_PER_USEC;
 }