diff mbox series

[v3,06/14,RESEND] xen/x86: Tweak PDC bits when using HWP

Message ID 20230501193034.88575-7-jandryuk@gmail.com (mailing list archive)
State Superseded
Headers show
Series Intel Hardware P-States (HWP) support | expand

Commit Message

Jason Andryuk May 1, 2023, 7:30 p.m. UTC
Qubes testing of HWP support had a report of a laptop, Thinkpad X1
Carbon Gen 4 with a Skylake processor, locking up during boot when HWP
is enabled.  A user found a kernel bug that seems to be the same issue:
https://bugzilla.kernel.org/show_bug.cgi?id=110941.

That bug was fixed by Linux commit a21211672c9a ("ACPI / processor:
Request native thermal interrupt handling via _OSC").  The tl;dr is SMM
crashes when it receives thermal interrupts, so Linux calls the ACPI
_OSC method to take over interrupt handling.

The Linux fix looks at the CPU features to decide whether or not to call
_OSC with bit 12 set to take over native interrupt handling.  Xen needs
some way to communicate HWP to Dom0 for making an equivalent call.

Xen exposes modified PDC bits via the platform_op set_pminfo hypercall.
Expand that to set bit 12 when HWP is present and in use.

Any generated interrupt would be handled by Xen's thermal drive, which
clears the status.

Bit 12 isn't named in the linux header and is open coded in Linux's
usage.

This will need a corresponding linux patch to pick up and apply the PDC
bits.

Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
---
New in v3

 xen/arch/x86/acpi/cpufreq/hwp.c           | 16 +++++++++++-----
 xen/arch/x86/acpi/lib.c                   |  5 +++++
 xen/arch/x86/cpu/mcheck/mce_intel.c       |  6 ++++++
 xen/arch/x86/include/asm/msr-index.h      |  1 +
 xen/include/acpi/cpufreq/processor_perf.h |  1 +
 xen/include/acpi/pdc_intel.h              |  1 +
 6 files changed, 25 insertions(+), 5 deletions(-)

Comments

Jan Beulich May 8, 2023, 9:53 a.m. UTC | #1
On 01.05.2023 21:30, Jason Andryuk wrote:
> --- a/xen/arch/x86/acpi/cpufreq/hwp.c
> +++ b/xen/arch/x86/acpi/cpufreq/hwp.c
> @@ -13,6 +13,8 @@
>  #include <asm/msr.h>
>  #include <acpi/cpufreq/cpufreq.h>
>  
> +static bool hwp_in_use;

__ro_after_init again, please.

> --- a/xen/include/acpi/pdc_intel.h
> +++ b/xen/include/acpi/pdc_intel.h
> @@ -17,6 +17,7 @@
>  #define ACPI_PDC_C_C1_FFH		(0x0100)
>  #define ACPI_PDC_C_C2C3_FFH		(0x0200)
>  #define ACPI_PDC_SMP_P_HWCOORD		(0x0800)
> +#define ACPI_PDC_CPPC_NTV_INT		(0x1000)

I can probably live with NTV (albeit I'd prefer NATIVE), but INT is too
ambiguous for my taste: Can at least that become INTR, please?

With at least the minimal adjustments
Reviewed-by: Jan Beulich <jbeulich@suse.com>

Jan
Jason Andryuk May 10, 2023, 2:08 p.m. UTC | #2
On Mon, May 8, 2023 at 5:53 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 01.05.2023 21:30, Jason Andryuk wrote:
> > --- a/xen/arch/x86/acpi/cpufreq/hwp.c
> > +++ b/xen/arch/x86/acpi/cpufreq/hwp.c
> > @@ -13,6 +13,8 @@
> >  #include <asm/msr.h>
> >  #include <acpi/cpufreq/cpufreq.h>
> >
> > +static bool hwp_in_use;
>
> __ro_after_init again, please.

Of course.  (I'd already made the change locally after the earlier ones.)

> > --- a/xen/include/acpi/pdc_intel.h
> > +++ b/xen/include/acpi/pdc_intel.h
> > @@ -17,6 +17,7 @@
> >  #define ACPI_PDC_C_C1_FFH            (0x0100)
> >  #define ACPI_PDC_C_C2C3_FFH          (0x0200)
> >  #define ACPI_PDC_SMP_P_HWCOORD               (0x0800)
> > +#define ACPI_PDC_CPPC_NTV_INT                (0x1000)
>
> I can probably live with NTV (albeit I'd prefer NATIVE), but INT is too
> ambiguous for my taste: Can at least that become INTR, please?

Sounds good.  I'm switching to ACPI_PDC_CPPC_NATIVE_INTR.

> With at least the minimal adjustments
> Reviewed-by: Jan Beulich <jbeulich@suse.com>

Thank you.

Regards,
Jason
diff mbox series

Patch

diff --git a/xen/arch/x86/acpi/cpufreq/hwp.c b/xen/arch/x86/acpi/cpufreq/hwp.c
index 57f13867d3..f84abe1386 100644
--- a/xen/arch/x86/acpi/cpufreq/hwp.c
+++ b/xen/arch/x86/acpi/cpufreq/hwp.c
@@ -13,6 +13,8 @@ 
 #include <asm/msr.h>
 #include <acpi/cpufreq/cpufreq.h>
 
+static bool hwp_in_use;
+
 static bool feature_hwp;
 static bool feature_hwp_notification;
 static bool feature_hwp_activity_window;
@@ -117,10 +119,14 @@  static int __init cf_check cpufreq_gov_hwp_init(void)
 }
 __initcall(cpufreq_gov_hwp_init);
 
+bool hwp_active(void)
+{
+    return hwp_in_use;
+}
+
 bool __init hwp_available(void)
 {
     unsigned int eax, ecx, unused;
-    bool use_hwp;
 
     if ( boot_cpu_data.cpuid_level < CPUID_PM_LEAF )
     {
@@ -173,13 +179,13 @@  bool __init hwp_available(void)
     hwp_verbose("HWP: HW_FEEDBACK %ssupported\n",
                 (eax & CPUID6_EAX_HW_FEEDBACK) ? "" : "not ");
 
-    use_hwp = feature_hwp && opt_cpufreq_hwp;
-    cpufreq_governor_internal = use_hwp;
+    hwp_in_use = feature_hwp && opt_cpufreq_hwp;
+    cpufreq_governor_internal = hwp_in_use;
 
-    if ( use_hwp )
+    if ( hwp_in_use )
         hwp_info("Using HWP for cpufreq\n");
 
-    return use_hwp;
+    return hwp_in_use;
 }
 
 static void hdc_set_pkg_hdc_ctl(bool val)
diff --git a/xen/arch/x86/acpi/lib.c b/xen/arch/x86/acpi/lib.c
index 43831b92d1..20d6115ba9 100644
--- a/xen/arch/x86/acpi/lib.c
+++ b/xen/arch/x86/acpi/lib.c
@@ -26,6 +26,8 @@ 
 #include <asm/fixmap.h>
 #include <asm/mwait.h>
 
+#include <acpi/cpufreq/processor_perf.h>
+
 u32 __read_mostly acpi_smi_cmd;
 u8 __read_mostly acpi_enable_value;
 u8 __read_mostly acpi_disable_value;
@@ -140,5 +142,8 @@  int arch_acpi_set_pdc_bits(u32 acpi_id, u32 *pdc, u32 mask)
 	    !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
 		pdc[2] &= ~(ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH);
 
+	if (hwp_active())
+		pdc[2] |= ACPI_PDC_CPPC_NTV_INT;
+
 	return 0;
 }
diff --git a/xen/arch/x86/cpu/mcheck/mce_intel.c b/xen/arch/x86/cpu/mcheck/mce_intel.c
index 2f23f02923..d430342924 100644
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
@@ -15,6 +15,9 @@ 
 #include <asm/p2m.h>
 #include <asm/mce.h>
 #include <asm/apic.h>
+
+#include <acpi/cpufreq/processor_perf.h>
+
 #include "mce.h"
 #include "x86_mca.h"
 #include "barrier.h"
@@ -64,6 +67,9 @@  static void cf_check intel_thermal_interrupt(struct cpu_user_regs *regs)
 
     ack_APIC_irq();
 
+    if ( hwp_active() )
+        wrmsr_safe(MSR_IA32_HWP_STATUS, 0);
+
     if ( NOW() < per_cpu(next, cpu) )
         return;
 
diff --git a/xen/arch/x86/include/asm/msr-index.h b/xen/arch/x86/include/asm/msr-index.h
index a2a22339e4..f5269022da 100644
--- a/xen/arch/x86/include/asm/msr-index.h
+++ b/xen/arch/x86/include/asm/msr-index.h
@@ -157,6 +157,7 @@ 
 #define MSR_IA32_HWP_CAPABILITIES           0x00000771
 #define MSR_IA32_HWP_INTERRUPT              0x00000773
 #define MSR_IA32_HWP_REQUEST                0x00000774
+#define MSR_IA32_HWP_STATUS                 0x00000777
 
 #define MSR_X2APIC_FIRST                    0x00000800
 #define MSR_X2APIC_LAST                     0x000008ff
diff --git a/xen/include/acpi/cpufreq/processor_perf.h b/xen/include/acpi/cpufreq/processor_perf.h
index b751ca4937..dd8ec36ba7 100644
--- a/xen/include/acpi/cpufreq/processor_perf.h
+++ b/xen/include/acpi/cpufreq/processor_perf.h
@@ -8,6 +8,7 @@ 
 #define XEN_PX_INIT 0x80000000
 
 bool hwp_available(void);
+bool hwp_active(void);
 int hwp_register_driver(void);
 
 int powernow_cpufreq_init(void);
diff --git a/xen/include/acpi/pdc_intel.h b/xen/include/acpi/pdc_intel.h
index 4fb719d6f5..e8332898fc 100644
--- a/xen/include/acpi/pdc_intel.h
+++ b/xen/include/acpi/pdc_intel.h
@@ -17,6 +17,7 @@ 
 #define ACPI_PDC_C_C1_FFH		(0x0100)
 #define ACPI_PDC_C_C2C3_FFH		(0x0200)
 #define ACPI_PDC_SMP_P_HWCOORD		(0x0800)
+#define ACPI_PDC_CPPC_NTV_INT		(0x1000)
 
 #define ACPI_PDC_EST_CAPABILITY_SMP	(ACPI_PDC_SMP_C1PT | \
 					 ACPI_PDC_C_C1_HALT | \