Message ID | 20220520133746.66142-3-roger.pau@citrix.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | x86/lbr: handle lack of model-specific LBRs | expand |
On 20.05.2022 15:37, Roger Pau Monne wrote: > --- a/xen/arch/x86/include/asm/msr-index.h > +++ b/xen/arch/x86/include/asm/msr-index.h > @@ -139,6 +139,24 @@ > #define PASID_PASID_MASK 0x000fffff > #define PASID_VALID (_AC(1, ULL) << 31) > > +#define MSR_ARCH_LBR_CTL 0x000014ce > +#define ARCH_LBR_CTL_LBREN (_AC(1, ULL) << 0) > +#define ARCH_LBR_CTL_OS (_AC(1, ULL) << 1) Bits 2 and 3 also have meaning (USR and CALL_STACK) according to ISE version 44. If it was intentional that you omitted those (perhaps you intended to add only the bits you actually use right away), it would have been nice if you said so in the description. > --- a/xen/arch/x86/traps.c > +++ b/xen/arch/x86/traps.c > @@ -1963,6 +1963,29 @@ void do_device_not_available(struct cpu_user_regs *regs) > #endif > } > > +static bool enable_lbr(void) > +{ > + uint64_t debugctl; > + > + wrmsrl(MSR_IA32_DEBUGCTLMSR, IA32_DEBUGCTLMSR_LBR); > + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); > + if ( !(debugctl & IA32_DEBUGCTLMSR_LBR) ) > + { > + /* > + * CPUs with no model-specific LBRs always return DEBUGCTLMSR.LBR > + * == 0, attempt to set arch LBR if available. > + */ > + if ( !boot_cpu_has(X86_FEATURE_ARCH_LBR) ) > + return false; > + > + /* Note that LASTINT{FROMIP,TOIP} matches LER_{FROM_IP,TO_IP} */ > + wrmsrl(MSR_ARCH_LBR_CTL, ARCH_LBR_CTL_LBREN | ARCH_LBR_CTL_OS | > + ARCH_LBR_CTL_RECORD_ALL); > + } > + > + return true; > +} Would it make sense to try architectural LBRs first? > @@ -1997,7 +2020,7 @@ void do_debug(struct cpu_user_regs *regs) > > /* #DB automatically disabled LBR. Reinstate it if debugging Xen. */ > if ( cpu_has_xen_lbr ) > - wrmsrl(MSR_IA32_DEBUGCTLMSR, IA32_DEBUGCTLMSR_LBR); > + enable_lbr(); > > if ( !guest_mode(regs) ) > { > @@ -2179,8 +2202,8 @@ void percpu_traps_init(void) > if ( !ler_msr && (ler_msr = calc_ler_msr()) ) > setup_force_cpu_cap(X86_FEATURE_XEN_LBR); > > - if ( cpu_has_xen_lbr ) > - wrmsrl(MSR_IA32_DEBUGCTLMSR, IA32_DEBUGCTLMSR_LBR); > + if ( cpu_has_xen_lbr && !enable_lbr() ) > + printk(XENLOG_ERR "CPU#%u: failed to enable LBR\n", smp_processor_id()); > } Isn't enable_lbr() failing a strong indication that we shouldn't have set XEN_LBR just before this? IOW doesn't this want re-arranging such that the feature bit and maybe also ler_msr (albeit some care would be required there; in fact I think this is broken for the case of running on non-{Intel,AMD,Hygon} CPUs [or unrecognized models] but opt_ler being true) remain unset in that case? As there's no good place to ask the VMX-related question, it needs to go here: Aiui with this patch in place VMX guests will be run with Xen's choice of LBR_CTL. That's different from DebugCtl, which - being part of the VMCS - would be loaded by the CPU. Such a difference, if intended, would imo again want pointing out in the description. Jan
On Mon, May 30, 2022 at 05:31:18PM +0200, Jan Beulich wrote: > On 20.05.2022 15:37, Roger Pau Monne wrote: > > --- a/xen/arch/x86/include/asm/msr-index.h > > +++ b/xen/arch/x86/include/asm/msr-index.h > > @@ -139,6 +139,24 @@ > > #define PASID_PASID_MASK 0x000fffff > > #define PASID_VALID (_AC(1, ULL) << 31) > > > > +#define MSR_ARCH_LBR_CTL 0x000014ce > > +#define ARCH_LBR_CTL_LBREN (_AC(1, ULL) << 0) > > +#define ARCH_LBR_CTL_OS (_AC(1, ULL) << 1) > > Bits 2 and 3 also have meaning (USR and CALL_STACK) according to > ISE version 44. If it was intentional that you omitted those > (perhaps you intended to add only the bits you actually use right > away), it would have been nice if you said so in the description. Yes, I've only added the bits used. I could add all if that's better. > > --- a/xen/arch/x86/traps.c > > +++ b/xen/arch/x86/traps.c > > @@ -1963,6 +1963,29 @@ void do_device_not_available(struct cpu_user_regs *regs) > > #endif > > } > > > > +static bool enable_lbr(void) > > +{ > > + uint64_t debugctl; > > + > > + wrmsrl(MSR_IA32_DEBUGCTLMSR, IA32_DEBUGCTLMSR_LBR); > > + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); > > + if ( !(debugctl & IA32_DEBUGCTLMSR_LBR) ) > > + { > > + /* > > + * CPUs with no model-specific LBRs always return DEBUGCTLMSR.LBR > > + * == 0, attempt to set arch LBR if available. > > + */ > > + if ( !boot_cpu_has(X86_FEATURE_ARCH_LBR) ) > > + return false; > > + > > + /* Note that LASTINT{FROMIP,TOIP} matches LER_{FROM_IP,TO_IP} */ > > + wrmsrl(MSR_ARCH_LBR_CTL, ARCH_LBR_CTL_LBREN | ARCH_LBR_CTL_OS | > > + ARCH_LBR_CTL_RECORD_ALL); > > + } > > + > > + return true; > > +} > > Would it make sense to try architectural LBRs first? I didn't want to change behavior for existing platforms that have both architectural and model specific LBRs. > > @@ -1997,7 +2020,7 @@ void do_debug(struct cpu_user_regs *regs) > > > > /* #DB automatically disabled LBR. Reinstate it if debugging Xen. */ > > if ( cpu_has_xen_lbr ) > > - wrmsrl(MSR_IA32_DEBUGCTLMSR, IA32_DEBUGCTLMSR_LBR); > > + enable_lbr(); > > > > if ( !guest_mode(regs) ) > > { > > @@ -2179,8 +2202,8 @@ void percpu_traps_init(void) > > if ( !ler_msr && (ler_msr = calc_ler_msr()) ) > > setup_force_cpu_cap(X86_FEATURE_XEN_LBR); > > > > - if ( cpu_has_xen_lbr ) > > - wrmsrl(MSR_IA32_DEBUGCTLMSR, IA32_DEBUGCTLMSR_LBR); > > + if ( cpu_has_xen_lbr && !enable_lbr() ) > > + printk(XENLOG_ERR "CPU#%u: failed to enable LBR\n", smp_processor_id()); > > } > > Isn't enable_lbr() failing a strong indication that we shouldn't have > set XEN_LBR just before this? So I've now added extra checks in calc_ler_msr() so that it only returns != 0 when there's LBR support (either model specific or architectural). > IOW doesn't this want re-arranging such > that the feature bit and maybe also ler_msr (albeit some care would > be required there; in fact I think this is broken for the case of > running on non-{Intel,AMD,Hygon} CPUs [or unrecognized models] but > opt_ler being true) remain unset in that case? opt_ler will be set to false if calc_ler_msr() return 0, which is the case for non-{Intel,AMD,Hygon} or unrecognized models. > As there's no good place to ask the VMX-related question, it needs to > go here: Aiui with this patch in place VMX guests will be run with > Xen's choice of LBR_CTL. That's different from DebugCtl, which - being > part of the VMCS - would be loaded by the CPU. Such a difference, if > intended, would imo again want pointing out in the description. LBR_CTL will only be set by Xen when the CPU only supports architectural LBRs (no model specific LBR support at all), and in that case LBR support won't be exposed to guests, as the ARCH_LBR CPUID is not exposed, neither are guests allowed access to ARCH_LBR_CTL. Note that in such scenario also setting DebugCtl.LBR has not effect, as there's no model specific LBR support, and the hardware will just ignore the bit. Also none of the LBR MSRs are exposed to guests either. I can try to clarify all the above in the commit message. Thanks, Roger.
On 01.07.2022 17:39, Roger Pau Monné wrote: > On Mon, May 30, 2022 at 05:31:18PM +0200, Jan Beulich wrote: >> On 20.05.2022 15:37, Roger Pau Monne wrote: >>> --- a/xen/arch/x86/include/asm/msr-index.h >>> +++ b/xen/arch/x86/include/asm/msr-index.h >>> @@ -139,6 +139,24 @@ >>> #define PASID_PASID_MASK 0x000fffff >>> #define PASID_VALID (_AC(1, ULL) << 31) >>> >>> +#define MSR_ARCH_LBR_CTL 0x000014ce >>> +#define ARCH_LBR_CTL_LBREN (_AC(1, ULL) << 0) >>> +#define ARCH_LBR_CTL_OS (_AC(1, ULL) << 1) >> >> Bits 2 and 3 also have meaning (USR and CALL_STACK) according to >> ISE version 44. If it was intentional that you omitted those >> (perhaps you intended to add only the bits you actually use right >> away), it would have been nice if you said so in the description. > > Yes, I've only added the bits used. I could add all if that's better. Personally I'd slightly prefer if you added all. But if you don't, which is also okay, please make this explicit in the description. >>> --- a/xen/arch/x86/traps.c >>> +++ b/xen/arch/x86/traps.c >>> @@ -1963,6 +1963,29 @@ void do_device_not_available(struct cpu_user_regs *regs) >>> #endif >>> } >>> >>> +static bool enable_lbr(void) >>> +{ >>> + uint64_t debugctl; >>> + >>> + wrmsrl(MSR_IA32_DEBUGCTLMSR, IA32_DEBUGCTLMSR_LBR); >>> + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); >>> + if ( !(debugctl & IA32_DEBUGCTLMSR_LBR) ) >>> + { >>> + /* >>> + * CPUs with no model-specific LBRs always return DEBUGCTLMSR.LBR >>> + * == 0, attempt to set arch LBR if available. >>> + */ >>> + if ( !boot_cpu_has(X86_FEATURE_ARCH_LBR) ) >>> + return false; >>> + >>> + /* Note that LASTINT{FROMIP,TOIP} matches LER_{FROM_IP,TO_IP} */ >>> + wrmsrl(MSR_ARCH_LBR_CTL, ARCH_LBR_CTL_LBREN | ARCH_LBR_CTL_OS | >>> + ARCH_LBR_CTL_RECORD_ALL); >>> + } >>> + >>> + return true; >>> +} >> >> Would it make sense to try architectural LBRs first? > > I didn't want to change behavior for existing platforms that have > both architectural and model specific LBRs. Are there such platforms? While it may not be said explicitly, so far I took it that the LBR format indicator being 0x3f was connected to arch LBR being available. >> As there's no good place to ask the VMX-related question, it needs to >> go here: Aiui with this patch in place VMX guests will be run with >> Xen's choice of LBR_CTL. That's different from DebugCtl, which - being >> part of the VMCS - would be loaded by the CPU. Such a difference, if >> intended, would imo again want pointing out in the description. > > LBR_CTL will only be set by Xen when the CPU only supports > architectural LBRs (no model specific LBR support at all), and in that > case LBR support won't be exposed to guests, as the ARCH_LBR CPUID is > not exposed, neither are guests allowed access to ARCH_LBR_CTL. > > Note that in such scenario also setting DebugCtl.LBR has not effect, as > there's no model specific LBR support, and the hardware will just > ignore the bit. Also none of the LBR MSRs are exposed to guests > either. My question wasn't about guest support, but about us (perhaps mistakenly) running guest code with the Xen setting in place. It cannot be excluded that running with LBR enabled has a performance impact, after all. > I can try to clarify all the above in the commit message. Thanks. Jan
On Mon, Jul 04, 2022 at 08:15:15AM +0200, Jan Beulich wrote: > On 01.07.2022 17:39, Roger Pau Monné wrote: > > On Mon, May 30, 2022 at 05:31:18PM +0200, Jan Beulich wrote: > >> On 20.05.2022 15:37, Roger Pau Monne wrote: > >>> --- a/xen/arch/x86/include/asm/msr-index.h > >>> +++ b/xen/arch/x86/include/asm/msr-index.h > >>> @@ -139,6 +139,24 @@ > >>> #define PASID_PASID_MASK 0x000fffff > >>> #define PASID_VALID (_AC(1, ULL) << 31) > >>> > >>> +#define MSR_ARCH_LBR_CTL 0x000014ce > >>> +#define ARCH_LBR_CTL_LBREN (_AC(1, ULL) << 0) > >>> +#define ARCH_LBR_CTL_OS (_AC(1, ULL) << 1) > >> > >> Bits 2 and 3 also have meaning (USR and CALL_STACK) according to > >> ISE version 44. If it was intentional that you omitted those > >> (perhaps you intended to add only the bits you actually use right > >> away), it would have been nice if you said so in the description. > > > > Yes, I've only added the bits used. I could add all if that's better. > > Personally I'd slightly prefer if you added all. But if you don't, which > is also okay, please make this explicit in the description. > > >>> --- a/xen/arch/x86/traps.c > >>> +++ b/xen/arch/x86/traps.c > >>> @@ -1963,6 +1963,29 @@ void do_device_not_available(struct cpu_user_regs *regs) > >>> #endif > >>> } > >>> > >>> +static bool enable_lbr(void) > >>> +{ > >>> + uint64_t debugctl; > >>> + > >>> + wrmsrl(MSR_IA32_DEBUGCTLMSR, IA32_DEBUGCTLMSR_LBR); > >>> + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); > >>> + if ( !(debugctl & IA32_DEBUGCTLMSR_LBR) ) > >>> + { > >>> + /* > >>> + * CPUs with no model-specific LBRs always return DEBUGCTLMSR.LBR > >>> + * == 0, attempt to set arch LBR if available. > >>> + */ > >>> + if ( !boot_cpu_has(X86_FEATURE_ARCH_LBR) ) > >>> + return false; > >>> + > >>> + /* Note that LASTINT{FROMIP,TOIP} matches LER_{FROM_IP,TO_IP} */ > >>> + wrmsrl(MSR_ARCH_LBR_CTL, ARCH_LBR_CTL_LBREN | ARCH_LBR_CTL_OS | > >>> + ARCH_LBR_CTL_RECORD_ALL); > >>> + } > >>> + > >>> + return true; > >>> +} > >> > >> Would it make sense to try architectural LBRs first? > > > > I didn't want to change behavior for existing platforms that have > > both architectural and model specific LBRs. > > Are there such platforms? While it may not be said explicitly, so far > I took it that the LBR format indicator being 0x3f was connected to > arch LBR being available. IIRC Ice Lake has both model-specific and architectural LBRs. While format being 0x3f could indicate the likely presence of arch LBRs, the CPUID bit need to be checked. > >> As there's no good place to ask the VMX-related question, it needs to > >> go here: Aiui with this patch in place VMX guests will be run with > >> Xen's choice of LBR_CTL. That's different from DebugCtl, which - being > >> part of the VMCS - would be loaded by the CPU. Such a difference, if > >> intended, would imo again want pointing out in the description. > > > > LBR_CTL will only be set by Xen when the CPU only supports > > architectural LBRs (no model specific LBR support at all), and in that > > case LBR support won't be exposed to guests, as the ARCH_LBR CPUID is > > not exposed, neither are guests allowed access to ARCH_LBR_CTL. > > > > Note that in such scenario also setting DebugCtl.LBR has not effect, as > > there's no model specific LBR support, and the hardware will just > > ignore the bit. Also none of the LBR MSRs are exposed to guests > > either. > > My question wasn't about guest support, but about us (perhaps mistakenly) > running guest code with the Xen setting in place. It cannot be excluded > that running with LBR enabled has a performance impact, after all. It's possible. 'ler' option already states that it should be used for debugging purposes only, so I think it's fine if this results in slower guest performance, as it's not a general purpose option after all. Thanks, Roger.
diff --git a/xen/arch/x86/include/asm/msr-index.h b/xen/arch/x86/include/asm/msr-index.h index 3e038db618..7b08e1804b 100644 --- a/xen/arch/x86/include/asm/msr-index.h +++ b/xen/arch/x86/include/asm/msr-index.h @@ -139,6 +139,24 @@ #define PASID_PASID_MASK 0x000fffff #define PASID_VALID (_AC(1, ULL) << 31) +#define MSR_ARCH_LBR_CTL 0x000014ce +#define ARCH_LBR_CTL_LBREN (_AC(1, ULL) << 0) +#define ARCH_LBR_CTL_OS (_AC(1, ULL) << 1) +#define ARCH_LBR_CTL_COND (_AC(1, ULL) << 16) +#define ARCH_LBR_CTL_NEAR_REL_JMP (_AC(1, ULL) << 17) +#define ARCH_LBR_CTL_NEAR_IND_JMP (_AC(1, ULL) << 18) +#define ARCH_LBR_CTL_NEAR_REL_CALL (_AC(1, ULL) << 19) +#define ARCH_LBR_CTL_NEAR_IND_CALL (_AC(1, ULL) << 20) +#define ARCH_LBR_CTL_NEAR_RET (_AC(1, ULL) << 21) +#define ARCH_LBR_CTL_OTHER_BRANCH (_AC(1, ULL) << 22) +#define ARCH_LBR_CTL_RECORD_ALL (ARCH_LBR_CTL_COND | \ + ARCH_LBR_CTL_NEAR_REL_JMP | \ + ARCH_LBR_CTL_NEAR_IND_JMP | \ + ARCH_LBR_CTL_NEAR_REL_CALL | \ + ARCH_LBR_CTL_NEAR_IND_CALL | \ + ARCH_LBR_CTL_NEAR_RET | \ + ARCH_LBR_CTL_OTHER_BRANCH) + #define MSR_EFER 0xc0000080 /* Extended Feature Enable Register */ #define EFER_SCE (_AC(1, ULL) << 0) /* SYSCALL Enable */ #define EFER_LME (_AC(1, ULL) << 8) /* Long Mode Enable */ diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 4c38f6c015..133348d9f9 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -1963,6 +1963,29 @@ void do_device_not_available(struct cpu_user_regs *regs) #endif } +static bool enable_lbr(void) +{ + uint64_t debugctl; + + wrmsrl(MSR_IA32_DEBUGCTLMSR, IA32_DEBUGCTLMSR_LBR); + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + if ( !(debugctl & IA32_DEBUGCTLMSR_LBR) ) + { + /* + * CPUs with no model-specific LBRs always return DEBUGCTLMSR.LBR + * == 0, attempt to set arch LBR if available. + */ + if ( !boot_cpu_has(X86_FEATURE_ARCH_LBR) ) + return false; + + /* Note that LASTINT{FROMIP,TOIP} matches LER_{FROM_IP,TO_IP} */ + wrmsrl(MSR_ARCH_LBR_CTL, ARCH_LBR_CTL_LBREN | ARCH_LBR_CTL_OS | + ARCH_LBR_CTL_RECORD_ALL); + } + + return true; +} + void do_debug(struct cpu_user_regs *regs) { unsigned long dr6; @@ -1997,7 +2020,7 @@ void do_debug(struct cpu_user_regs *regs) /* #DB automatically disabled LBR. Reinstate it if debugging Xen. */ if ( cpu_has_xen_lbr ) - wrmsrl(MSR_IA32_DEBUGCTLMSR, IA32_DEBUGCTLMSR_LBR); + enable_lbr(); if ( !guest_mode(regs) ) { @@ -2179,8 +2202,8 @@ void percpu_traps_init(void) if ( !ler_msr && (ler_msr = calc_ler_msr()) ) setup_force_cpu_cap(X86_FEATURE_XEN_LBR); - if ( cpu_has_xen_lbr ) - wrmsrl(MSR_IA32_DEBUGCTLMSR, IA32_DEBUGCTLMSR_LBR); + if ( cpu_has_xen_lbr && !enable_lbr() ) + printk(XENLOG_ERR "CPU#%u: failed to enable LBR\n", smp_processor_id()); } void __init init_idt_traps(void) diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h index 9cee4b439e..cd6409f9f3 100644 --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -280,6 +280,7 @@ XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! June 2021 TSX defeaturing in micro XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */ XEN_CPUFEATURE(SERIALIZE, 9*32+14) /*A SERIALIZE insn */ XEN_CPUFEATURE(TSXLDTRK, 9*32+16) /*a TSX load tracking suspend/resume insns */ +XEN_CPUFEATURE(ARCH_LBR, 9*32+19) /* Intel ARCH LBR */ XEN_CPUFEATURE(CET_IBT, 9*32+20) /* CET - Indirect Branch Tracking */ XEN_CPUFEATURE(IBRSB, 9*32+26) /*A IBRS and IBPB support (used by Intel) */ XEN_CPUFEATURE(STIBP, 9*32+27) /*A STIBP */
CPUs having no model-specific LBRs don't implement DEBUGCTLMSR.LBR and LBRs can only be enabled if the processor supports architectural LBRs. Split the logic to enable LBRs into a separate function and expand the logic to also implement support for arch LBRs if model-specific LBRs are not supported. Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> --- xen/arch/x86/include/asm/msr-index.h | 18 +++++++++++++ xen/arch/x86/traps.c | 29 ++++++++++++++++++--- xen/include/public/arch-x86/cpufeatureset.h | 1 + 3 files changed, 45 insertions(+), 3 deletions(-)