diff mbox series

[RFC] msr: judge the return val of function rdmsrl_on_cpu() by WARN_ON

Message ID 20230629072754.39844-1-lizhe.67@bytedance.com (mailing list archive)
State New, archived
Headers show
Series [RFC] msr: judge the return val of function rdmsrl_on_cpu() by WARN_ON | expand

Commit Message

lizhe.67@bytedance.com June 29, 2023, 7:27 a.m. UTC
From: Li Zhe <lizhe.67@bytedance.com>

There are ten places call rdmsrl_on_cpu() in the current code without
judging the return value. This may introduce a potential bug. For example,
inj_bank_set() may return -EINVAL, show_base_frequency() may show an error
freq value, intel_pstate_hwp_set() may write an error value to the related
msr register and so on. But rdmsrl_on_cpu() do rarely returns an error, so
it seems that add a WARN_ON is enough for debugging.

Signed-off-by: Li Zhe <lizhe.67@bytedance.com>
---
 arch/x86/kernel/cpu/mce/inject.c |  2 +-
 drivers/cpufreq/intel_pstate.c   | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

Comments

H. Peter Anvin June 29, 2023, 3:13 p.m. UTC | #1
On June 29, 2023 12:27:54 AM PDT, lizhe.67@bytedance.com wrote:
>From: Li Zhe <lizhe.67@bytedance.com>
>
>There are ten places call rdmsrl_on_cpu() in the current code without
>judging the return value. This may introduce a potential bug. For example,
>inj_bank_set() may return -EINVAL, show_base_frequency() may show an error
>freq value, intel_pstate_hwp_set() may write an error value to the related
>msr register and so on. But rdmsrl_on_cpu() do rarely returns an error, so
>it seems that add a WARN_ON is enough for debugging.
>
>Signed-off-by: Li Zhe <lizhe.67@bytedance.com>
>---
> arch/x86/kernel/cpu/mce/inject.c |  2 +-
> drivers/cpufreq/intel_pstate.c   | 18 +++++++++---------
> 2 files changed, 10 insertions(+), 10 deletions(-)
>
>diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
>index 12cf2e7ca33c..0a34057f4fc6 100644
>--- a/arch/x86/kernel/cpu/mce/inject.c
>+++ b/arch/x86/kernel/cpu/mce/inject.c
>@@ -587,7 +587,7 @@ static int inj_bank_set(void *data, u64 val)
> 	u64 cap;
> 
> 	/* Get bank count on target CPU so we can handle non-uniform values. */
>-	rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap);
>+	WARN_ON(rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap));
> 	n_banks = cap & MCG_BANKCNT_MASK;
> 
> 	if (val >= n_banks) {
>diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
>index 2548ec92faa2..fe2bdb38d6a0 100644
>--- a/drivers/cpufreq/intel_pstate.c
>+++ b/drivers/cpufreq/intel_pstate.c
>@@ -859,7 +859,7 @@ static ssize_t show_base_frequency(struct cpufreq_policy *policy, char *buf)
> 	if (ratio <= 0) {
> 		u64 cap;
> 
>-		rdmsrl_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap);
>+		WARN_ON(rdmsrl_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap));
> 		ratio = HWP_GUARANTEED_PERF(cap);
> 	}
> 
>@@ -883,7 +883,7 @@ static void __intel_pstate_get_hwp_cap(struct cpudata *cpu)
> {
> 	u64 cap;
> 
>-	rdmsrl_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap);
>+	WARN_ON(rdmsrl_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap));
> 	WRITE_ONCE(cpu->hwp_cap_cached, cap);
> 	cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(cap);
> 	cpu->pstate.turbo_pstate = HWP_HIGHEST_PERF(cap);
>@@ -920,7 +920,7 @@ static void intel_pstate_hwp_set(unsigned int cpu)
> 	if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
> 		min = max;
> 
>-	rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
>+	WARN_ON(rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value));
> 
> 	value &= ~HWP_MIN_PERF(~0L);
> 	value |= HWP_MIN_PERF(min);
>@@ -1802,7 +1802,7 @@ static int core_get_min_pstate(int cpu)
> {
> 	u64 value;
> 
>-	rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value);
>+	WARN_ON(rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value));
> 	return (value >> 40) & 0xFF;
> }
> 
>@@ -1810,7 +1810,7 @@ static int core_get_max_pstate_physical(int cpu)
> {
> 	u64 value;
> 
>-	rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value);
>+	WARN_ON(rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value));
> 	return (value >> 8) & 0xFF;
> }
> 
>@@ -1855,7 +1855,7 @@ static int core_get_max_pstate(int cpu)
> 	int tdp_ratio;
> 	int err;
> 
>-	rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &plat_info);
>+	WARN_ON(rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &plat_info));
> 	max_pstate = (plat_info >> 8) & 0xFF;
> 
> 	tdp_ratio = core_get_tdp_ratio(cpu, plat_info);
>@@ -1887,7 +1887,7 @@ static int core_get_turbo_pstate(int cpu)
> 	u64 value;
> 	int nont, ret;
> 
>-	rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value);
>+	WARN_ON(rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value));
> 	nont = core_get_max_pstate(cpu);
> 	ret = (value) & 255;
> 	if (ret <= nont)
>@@ -1921,7 +1921,7 @@ static int knl_get_turbo_pstate(int cpu)
> 	u64 value;
> 	int nont, ret;
> 
>-	rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value);
>+	WARN_ON(rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value));
> 	nont = core_get_max_pstate(cpu);
> 	ret = (((value) >> 8) & 0xFF);
> 	if (ret <= nont)
>@@ -2974,7 +2974,7 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
> 
> 		intel_pstate_get_hwp_cap(cpu);
> 
>-		rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value);
>+		WARN_ON(rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value));
> 		WRITE_ONCE(cpu->hwp_req_cached, value);
> 
> 		cpu->epp_cached = intel_pstate_get_epp(cpu, value);

Be careful here: if a return value of zero is acceptable as an equivalent of no return, the code is correct, as we always return zero if the MSR faults.
Thomas Gleixner June 29, 2023, 11:03 p.m. UTC | #2
Li!

On Thu, Jun 29 2023 at 15:27, lizhe.67@bytedance.com wrote:

> There are ten places call rdmsrl_on_cpu() in the current code without
> judging the return value. This may introduce a potential bug. For example,
> inj_bank_set() may return -EINVAL, show_base_frequency() may show an error
> freq value, intel_pstate_hwp_set() may write an error value to the related
> msr register and so on. But rdmsrl_on_cpu() do rarely returns an error, so
> it seems that add a WARN_ON is enough for debugging.

Can you please structure your changelogs as documented in:

  https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#changelog

instead of providing a big lump of words?

> There are ten places call rdmsrl_on_cpu() in the current code without
> judging the return value.

Return values are not judged. They are either ignored or checked/evaluated.

> This may introduce a potential bug.

Sure. Anything which does not check a return value from a function might
be a bug, but you have to look at each instance whether its a bug or
not.

> For example, inj_bank_set() may return -EINVAL, show_base_frequency()
> may show an error freq value, intel_pstate_hwp_set() may write an
> error value to the related msr register and so on.  But
> rdmsrl_on_cpu() do rarely returns an error, so it seems that add a
> WARN_ON is enough for debugging.

This is hillarious at best.

  1) It does not matter at all whether that function returns an error rarely
     or not.

  2) Adding WARN_ON() without justification at each call site is not
     enough. Neither for debugging nor for real world usage.

You have to come up with individual patches for each callsite to add the
WARN_ON() and in each patch you have to explain why it is justified and
why there is no other solution, e.g. taking an error exit path.

Just slapping WARN_ON()'s into the code without any deeper analysis is
worse than the current state of the code.

If you have identified a real world problem at any of these call sites
then adding a WARN_ON() does not solve it at all.

I'm looking forward to your profound anlysis of each of these "problems".

Thanks,

        tglx
lizhe.67@bytedance.com June 30, 2023, 2:42 a.m. UTC | #3
On 29 Jun 2023 08:13:27 -0700, hpa@zytor.com wrote:
>>There are ten places call rdmsrl_on_cpu() in the current code without
>>judging the return value. This may introduce a potential bug. For example,
>>inj_bank_set() may return -EINVAL, show_base_frequency() may show an error
>>freq value, intel_pstate_hwp_set() may write an error value to the related
>>msr register and so on. But rdmsrl_on_cpu() do rarely returns an error, so
>>it seems that add a WARN_ON is enough for debugging.
>>
>>Signed-off-by: Li Zhe <lizhe.67@bytedance.com>
>>---
>> arch/x86/kernel/cpu/mce/inject.c |  2 +-
>> drivers/cpufreq/intel_pstate.c   | 18 +++++++++---------
>> 2 files changed, 10 insertions(+), 10 deletions(-)
>>
>>diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
>>index 12cf2e7ca33c..0a34057f4fc6 100644
>>--- a/arch/x86/kernel/cpu/mce/inject.c
>>+++ b/arch/x86/kernel/cpu/mce/inject.c
>>@@ -587,7 +587,7 @@ static int inj_bank_set(void *data, u64 val)
>> 	u64 cap;
>> 
>> 	/* Get bank count on target CPU so we can handle non-uniform values. */
>>-	rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap);
>>+	WARN_ON(rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap));
>> 	n_banks = cap & MCG_BANKCNT_MASK;
>> 
>> 	if (val >= n_banks) {
>>diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
>>index 2548ec92faa2..fe2bdb38d6a0 100644
>>--- a/drivers/cpufreq/intel_pstate.c
>>+++ b/drivers/cpufreq/intel_pstate.c
>>@@ -859,7 +859,7 @@ static ssize_t show_base_frequency(struct cpufreq_policy *policy, char *buf)
>> 	if (ratio <= 0) {
>> 		u64 cap;
>> 
>>-		rdmsrl_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap);
>>+		WARN_ON(rdmsrl_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap));
>> 		ratio = HWP_GUARANTEED_PERF(cap);
>> 	}
>> 
>>@@ -883,7 +883,7 @@ static void __intel_pstate_get_hwp_cap(struct cpudata *cpu)
>> {
>> 	u64 cap;
>> 
>>-	rdmsrl_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap);
>>+	WARN_ON(rdmsrl_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap));
>> 	WRITE_ONCE(cpu->hwp_cap_cached, cap);
>> 	cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(cap);
>> 	cpu->pstate.turbo_pstate = HWP_HIGHEST_PERF(cap);
>>@@ -920,7 +920,7 @@ static void intel_pstate_hwp_set(unsigned int cpu)
>> 	if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
>> 		min = max;
>> 
>>-	rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
>>+	WARN_ON(rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value));
>> 
>> 	value &= ~HWP_MIN_PERF(~0L);
>> 	value |= HWP_MIN_PERF(min);
>>@@ -1802,7 +1802,7 @@ static int core_get_min_pstate(int cpu)
>> {
>> 	u64 value;
>> 
>>-	rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value);
>>+	WARN_ON(rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value));
>> 	return (value >> 40) & 0xFF;
>> }
>> 
>>@@ -1810,7 +1810,7 @@ static int core_get_max_pstate_physical(int cpu)
>> {
>> 	u64 value;
>> 
>>-	rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value);
>>+	WARN_ON(rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value));
>> 	return (value >> 8) & 0xFF;
>> }
>> 
>>@@ -1855,7 +1855,7 @@ static int core_get_max_pstate(int cpu)
>> 	int tdp_ratio;
>> 	int err;
>> 
>>-	rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &plat_info);
>>+	WARN_ON(rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &plat_info));
>> 	max_pstate = (plat_info >> 8) & 0xFF;
>> 
>> 	tdp_ratio = core_get_tdp_ratio(cpu, plat_info);
>>@@ -1887,7 +1887,7 @@ static int core_get_turbo_pstate(int cpu)
>> 	u64 value;
>> 	int nont, ret;
>> 
>>-	rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value);
>>+	WARN_ON(rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value));
>> 	nont = core_get_max_pstate(cpu);
>> 	ret = (value) & 255;
>> 	if (ret <= nont)
>>@@ -1921,7 +1921,7 @@ static int knl_get_turbo_pstate(int cpu)
>> 	u64 value;
>> 	int nont, ret;
>> 
>>-	rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value);
>>+	WARN_ON(rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value));
>> 	nont = core_get_max_pstate(cpu);
>> 	ret = (((value) >> 8) & 0xFF);
>> 	if (ret <= nont)
>>@@ -2974,7 +2974,7 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
>> 
>> 		intel_pstate_get_hwp_cap(cpu);
>> 
>>-		rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value);
>>+		WARN_ON(rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value));
>> 		WRITE_ONCE(cpu->hwp_req_cached, value);
>> 
>> 		cpu->epp_cached = intel_pstate_get_epp(cpu, value);
>
>Be careful here: if a return value of zero is acceptable as an equivalent of no return, the code is correct, as we always return zero if the MSR faults.

Thanks for your advice. I will take tglx@linutronix.de's advice to analysis each callsite.
lizhe.67@bytedance.com June 30, 2023, 2:45 a.m. UTC | #4
On Fri, 30 Jun 2023 at 01:03:22 +0200, tglx@linutronix.de wrote:
>Li!
>
>On Thu, Jun 29 2023 at 15:27, lizhe.67@bytedance.com wrote:
>
>> There are ten places call rdmsrl_on_cpu() in the current code without
>> judging the return value. This may introduce a potential bug. For example,
>> inj_bank_set() may return -EINVAL, show_base_frequency() may show an error
>> freq value, intel_pstate_hwp_set() may write an error value to the related
>> msr register and so on. But rdmsrl_on_cpu() do rarely returns an error, so
>> it seems that add a WARN_ON is enough for debugging.
>
>Can you please structure your changelogs as documented in:
>
>  https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#changelog
>
>instead of providing a big lump of words?
>
>> There are ten places call rdmsrl_on_cpu() in the current code without
>> judging the return value.
>
>Return values are not judged. They are either ignored or checked/evaluated.
>
>> This may introduce a potential bug.
>
>Sure. Anything which does not check a return value from a function might
>be a bug, but you have to look at each instance whether its a bug or
>not.
>
>> For example, inj_bank_set() may return -EINVAL, show_base_frequency()
>> may show an error freq value, intel_pstate_hwp_set() may write an
>> error value to the related msr register and so on.  But
>> rdmsrl_on_cpu() do rarely returns an error, so it seems that add a
>> WARN_ON is enough for debugging.
>
>This is hillarious at best.
>
>  1) It does not matter at all whether that function returns an error rarely
>     or not.
>
>  2) Adding WARN_ON() without justification at each call site is not
>     enough. Neither for debugging nor for real world usage.
>
>You have to come up with individual patches for each callsite to add the
>WARN_ON() and in each patch you have to explain why it is justified and
>why there is no other solution, e.g. taking an error exit path.
>
>Just slapping WARN_ON()'s into the code without any deeper analysis is
>worse than the current state of the code.
>
>If you have identified a real world problem at any of these call sites
>then adding a WARN_ON() does not solve it at all.
>
>I'm looking forward to your profound anlysis of each of these "problems".
>
>Thanks,
>
>        tglx

Thanks for all your advice. I will analysis each of these "problems".
diff mbox series

Patch

diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 12cf2e7ca33c..0a34057f4fc6 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -587,7 +587,7 @@  static int inj_bank_set(void *data, u64 val)
 	u64 cap;
 
 	/* Get bank count on target CPU so we can handle non-uniform values. */
-	rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap);
+	WARN_ON(rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap));
 	n_banks = cap & MCG_BANKCNT_MASK;
 
 	if (val >= n_banks) {
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 2548ec92faa2..fe2bdb38d6a0 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -859,7 +859,7 @@  static ssize_t show_base_frequency(struct cpufreq_policy *policy, char *buf)
 	if (ratio <= 0) {
 		u64 cap;
 
-		rdmsrl_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap);
+		WARN_ON(rdmsrl_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap));
 		ratio = HWP_GUARANTEED_PERF(cap);
 	}
 
@@ -883,7 +883,7 @@  static void __intel_pstate_get_hwp_cap(struct cpudata *cpu)
 {
 	u64 cap;
 
-	rdmsrl_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap);
+	WARN_ON(rdmsrl_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap));
 	WRITE_ONCE(cpu->hwp_cap_cached, cap);
 	cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(cap);
 	cpu->pstate.turbo_pstate = HWP_HIGHEST_PERF(cap);
@@ -920,7 +920,7 @@  static void intel_pstate_hwp_set(unsigned int cpu)
 	if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
 		min = max;
 
-	rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
+	WARN_ON(rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value));
 
 	value &= ~HWP_MIN_PERF(~0L);
 	value |= HWP_MIN_PERF(min);
@@ -1802,7 +1802,7 @@  static int core_get_min_pstate(int cpu)
 {
 	u64 value;
 
-	rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value);
+	WARN_ON(rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value));
 	return (value >> 40) & 0xFF;
 }
 
@@ -1810,7 +1810,7 @@  static int core_get_max_pstate_physical(int cpu)
 {
 	u64 value;
 
-	rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value);
+	WARN_ON(rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value));
 	return (value >> 8) & 0xFF;
 }
 
@@ -1855,7 +1855,7 @@  static int core_get_max_pstate(int cpu)
 	int tdp_ratio;
 	int err;
 
-	rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &plat_info);
+	WARN_ON(rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &plat_info));
 	max_pstate = (plat_info >> 8) & 0xFF;
 
 	tdp_ratio = core_get_tdp_ratio(cpu, plat_info);
@@ -1887,7 +1887,7 @@  static int core_get_turbo_pstate(int cpu)
 	u64 value;
 	int nont, ret;
 
-	rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value);
+	WARN_ON(rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value));
 	nont = core_get_max_pstate(cpu);
 	ret = (value) & 255;
 	if (ret <= nont)
@@ -1921,7 +1921,7 @@  static int knl_get_turbo_pstate(int cpu)
 	u64 value;
 	int nont, ret;
 
-	rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value);
+	WARN_ON(rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value));
 	nont = core_get_max_pstate(cpu);
 	ret = (((value) >> 8) & 0xFF);
 	if (ret <= nont)
@@ -2974,7 +2974,7 @@  static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 		intel_pstate_get_hwp_cap(cpu);
 
-		rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value);
+		WARN_ON(rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value));
 		WRITE_ONCE(cpu->hwp_req_cached, value);
 
 		cpu->epp_cached = intel_pstate_get_epp(cpu, value);