Message ID | 1545816338-1171-6-git-send-email-wei.w.wang@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Guest LBR Enabling | expand |
On Wed, Dec 26, 2018 at 2:01 AM Wei Wang <wei.w.wang@intel.com> wrote: > > Bits [0, 5] of MSR_IA32_PERF_CAPABILITIES tell about the format of > the addresses stored in the LBR stack. Expose those bits to the guest > when the guest lbr feature is enabled. > > Signed-off-by: Wei Wang <wei.w.wang@intel.com> > Cc: Paolo Bonzini <pbonzini@redhat.com> > Cc: Andi Kleen <ak@linux.intel.com> > --- > arch/x86/include/asm/perf_event.h | 2 ++ > arch/x86/kvm/cpuid.c | 2 +- > arch/x86/kvm/vmx.c | 9 +++++++++ > 3 files changed, 12 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h > index 2f82795..eee09b7 100644 > --- a/arch/x86/include/asm/perf_event.h > +++ b/arch/x86/include/asm/perf_event.h > @@ -87,6 +87,8 @@ > #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 > #define ARCH_PERFMON_EVENTS_COUNT 7 > > +#define X86_PERF_CAP_MASK_LBR_FMT 0x3f > + > /* > * Intel "Architectural Performance Monitoring" CPUID > * detection/enumeration details: > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > index 7bcfa61..3b8a57b 100644 > --- a/arch/x86/kvm/cpuid.c > +++ b/arch/x86/kvm/cpuid.c > @@ -365,7 +365,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, > F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | > 0 /* DS-CPL, VMX, SMX, EST */ | > 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | > - F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ | > + F(FMA) | F(CX16) | 0 /* xTPR Update*/ | F(PDCM) | > F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) | > F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | > 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 8d5d984..ee02967 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -4161,6 +4161,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > return 1; > msr_info->data = vcpu->arch.ia32_xss; > break; > + case MSR_IA32_PERF_CAPABILITIES: > + if (!boot_cpu_has(X86_FEATURE_PDCM)) > + return 1; > + msr_info->data = native_read_msr(MSR_IA32_PERF_CAPABILITIES); Since this isn't guarded by vcpu->kvm->arch.lbr_in_guest, it breaks backwards compatibility, doesn't it? > + if (vcpu->kvm->arch.lbr_in_guest) > + msr_info->data &= X86_PERF_CAP_MASK_LBR_FMT; > + break; > case MSR_TSC_AUX: > if (!msr_info->host_initiated && > !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) > @@ -4343,6 +4350,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > else > clear_atomic_switch_msr(vmx, MSR_IA32_XSS); > break; > + case MSR_IA32_PERF_CAPABILITIES: > + return 1; /* RO MSR */ > case MSR_TSC_AUX: > if (!msr_info->host_initiated && > !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) > -- > 2.7.4 >
On 01/03/2019 07:40 AM, Jim Mattson wrote: > On Wed, Dec 26, 2018 at 2:01 AM Wei Wang <wei.w.wang@intel.com> wrote: >> Bits [0, 5] of MSR_IA32_PERF_CAPABILITIES tell about the format of >> the addresses stored in the LBR stack. Expose those bits to the guest >> when the guest lbr feature is enabled. >> >> Signed-off-by: Wei Wang <wei.w.wang@intel.com> >> Cc: Paolo Bonzini <pbonzini@redhat.com> >> Cc: Andi Kleen <ak@linux.intel.com> >> --- >> arch/x86/include/asm/perf_event.h | 2 ++ >> arch/x86/kvm/cpuid.c | 2 +- >> arch/x86/kvm/vmx.c | 9 +++++++++ >> 3 files changed, 12 insertions(+), 1 deletion(-) >> >> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h >> index 2f82795..eee09b7 100644 >> --- a/arch/x86/include/asm/perf_event.h >> +++ b/arch/x86/include/asm/perf_event.h >> @@ -87,6 +87,8 @@ >> #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 >> #define ARCH_PERFMON_EVENTS_COUNT 7 >> >> +#define X86_PERF_CAP_MASK_LBR_FMT 0x3f >> + >> /* >> * Intel "Architectural Performance Monitoring" CPUID >> * detection/enumeration details: >> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c >> index 7bcfa61..3b8a57b 100644 >> --- a/arch/x86/kvm/cpuid.c >> +++ b/arch/x86/kvm/cpuid.c >> @@ -365,7 +365,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, >> F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | >> 0 /* DS-CPL, VMX, SMX, EST */ | >> 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | >> - F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ | >> + F(FMA) | F(CX16) | 0 /* xTPR Update*/ | F(PDCM) | >> F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) | >> F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | >> 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index 8d5d984..ee02967 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -4161,6 +4161,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) >> return 1; >> msr_info->data = vcpu->arch.ia32_xss; >> break; >> + case MSR_IA32_PERF_CAPABILITIES: >> + if (!boot_cpu_has(X86_FEATURE_PDCM)) >> + return 1; >> + msr_info->data = native_read_msr(MSR_IA32_PERF_CAPABILITIES); > Since this isn't guarded by vcpu->kvm->arch.lbr_in_guest, it breaks > backwards compatibility, doesn't it? Right, thanks. Probably better to change it to below: msr_info->data = 0; data = native_read_msr(MSR_IA32_PERF_CAPABILITIES); if (vcpu->kvm->arch.lbr_in_guest) msr_info->data |= (data & X86_PERF_CAP_MASK_LBR_FMT); Best, Wei
On Wed, Jan 2, 2019 at 11:55 PM Wei Wang <wei.w.wang@intel.com> wrote: > Right, thanks. Probably better to change it to below: > > msr_info->data = 0; > data = native_read_msr(MSR_IA32_PERF_CAPABILITIES); > if (vcpu->kvm->arch.lbr_in_guest) > msr_info->data |= (data & X86_PERF_CAP_MASK_LBR_FMT); > This still breaks backwards compatibility. Returning 0 and raising #GP are not the same.
On 01/03/2019 11:25 PM, Jim Mattson wrote: > On Wed, Jan 2, 2019 at 11:55 PM Wei Wang <wei.w.wang@intel.com> wrote: > >> Right, thanks. Probably better to change it to below: >> >> msr_info->data = 0; >> data = native_read_msr(MSR_IA32_PERF_CAPABILITIES); >> if (vcpu->kvm->arch.lbr_in_guest) >> msr_info->data |= (data & X86_PERF_CAP_MASK_LBR_FMT); >> > This still breaks backwards compatibility. Returning 0 and raising #GP > are not the same. I'm not sure about raising GP# in this case. This PERF_CAP msr contains more things than the lbr format. For example, a guest with lbr=false option could read it to get PEBS_FMT, which is PERF_CAP[11:8]. We should offer those bits in this case. When lbr=false, the lbr feature is not usable by the guest, so I think whatever value (0 or other value) of the LBR_FMT bits that we give to the guest might not be important. Best, Wei
On Mon, Jan 7, 2019 at 1:09 AM Wei Wang <wei.w.wang@intel.com> wrote: > > On 01/03/2019 11:25 PM, Jim Mattson wrote: > > On Wed, Jan 2, 2019 at 11:55 PM Wei Wang <wei.w.wang@intel.com> wrote: > > > >> Right, thanks. Probably better to change it to below: > >> > >> msr_info->data = 0; > >> data = native_read_msr(MSR_IA32_PERF_CAPABILITIES); > >> if (vcpu->kvm->arch.lbr_in_guest) > >> msr_info->data |= (data & X86_PERF_CAP_MASK_LBR_FMT); > >> > > This still breaks backwards compatibility. Returning 0 and raising #GP > > are not the same. > > I'm not sure about raising GP# in this case. > > This PERF_CAP msr contains more things than the lbr format. > For example, a guest with lbr=false option could read it to get PEBS_FMT, > which is PERF_CAP[11:8]. We should offer those bits in this case. > > When lbr=false, the lbr feature is not usable by the guest, > so I think whatever value (0 or other value) of the LBR_FMT bits that > we give to the guest might not be important. The issue is compatibility. Prior to your change, reading this MSR from a VM would raise #GP. After your change, it won't. That means that if you have a VM migrating between hosts with kernel versions before and after this change, the results will be inconsistent. In the forward migration path, RDMSR will first raise #GP, and later will return 0. In the backward migration path, RDMSR will first return 0, and later it will raise #GP. To avoid these inconsistencies, the new MSR behavior should be explicitly enabled by an opt-in ioctl from userspace (not necessarily the same ioctl that enables LBR).
> The issue is compatibility. Prior to your change, reading this MSR > from a VM would raise #GP. After your change, it won't. That means > that if you have a VM migrating between hosts with kernel versions > before and after this change, the results will be inconsistent. In the No it will not be. All Linux kernel uses of this MSR are guarded by a CPUID check. -Andi
On Mon, Jan 7, 2019 at 10:20 AM Andi Kleen <ak@linux.intel.com> wrote: > > > The issue is compatibility. Prior to your change, reading this MSR > > from a VM would raise #GP. After your change, it won't. That means > > that if you have a VM migrating between hosts with kernel versions > > before and after this change, the results will be inconsistent. In the > > No it will not be. All Linux kernel uses of this MSR are guarded > by a CPUID check. Linux usage is irrelevant to the architected behavior of the virtual CPU. According to volume 4 of the SDM, this MSR is only supported when CPUID.01H:ECX.PDCM [bit 15] is set. Therefore, kvm should raise #GP whenever a guest tries to read this MSR and the guest's CPUID.01H:ECX.PDCM [bit 15] is clear.
On Mon, Jan 07, 2019 at 10:48:38AM -0800, Jim Mattson wrote: > On Mon, Jan 7, 2019 at 10:20 AM Andi Kleen <ak@linux.intel.com> wrote: > > > > > The issue is compatibility. Prior to your change, reading this MSR > > > from a VM would raise #GP. After your change, it won't. That means > > > that if you have a VM migrating between hosts with kernel versions > > > before and after this change, the results will be inconsistent. In the > > > > No it will not be. All Linux kernel uses of this MSR are guarded > > by a CPUID check. > > Linux usage is irrelevant to the architected behavior of the virtual > CPU. According to volume 4 of the SDM, this MSR is only supported when > CPUID.01H:ECX.PDCM [bit 15] is set. Therefore, kvm should raise #GP > whenever a guest tries to read this MSR and the guest's > CPUID.01H:ECX.PDCM [bit 15] is clear. That's not general practice in KVM. There are lots of MSRs that don't fault even if their CPUID doesn't support them. It's also not generally true for instructions, where it is even impossible. You could argue it should be done for MSRs, but that would be a much larger patch for lots of MSRs. It seems pointless to single out this particular one. In practice I doubt it will make any difference for real software either way. -Andi
On Mon, Jan 7, 2019 at 12:14 PM Andi Kleen <ak@linux.intel.com> wrote: > > On Mon, Jan 07, 2019 at 10:48:38AM -0800, Jim Mattson wrote: > > On Mon, Jan 7, 2019 at 10:20 AM Andi Kleen <ak@linux.intel.com> wrote: > > > > > > > The issue is compatibility. Prior to your change, reading this MSR > > > > from a VM would raise #GP. After your change, it won't. That means > > > > that if you have a VM migrating between hosts with kernel versions > > > > before and after this change, the results will be inconsistent. In the > > > > > > No it will not be. All Linux kernel uses of this MSR are guarded > > > by a CPUID check. > > > > Linux usage is irrelevant to the architected behavior of the virtual > > CPU. According to volume 4 of the SDM, this MSR is only supported when > > CPUID.01H:ECX.PDCM [bit 15] is set. Therefore, kvm should raise #GP > > whenever a guest tries to read this MSR and the guest's > > CPUID.01H:ECX.PDCM [bit 15] is clear. > > That's not general practice in KVM. There are lots of MSRs > that don't fault even if their CPUID doesn't support them. KVM doesn't really have a "general practice" in this regard. True, there are lots of MSRs that don't fault even if unsupported. But there are quite a few that do fault if unsupported. On the Intel side, MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES, MSR_IA32_BNDCFGS, and MSR_TSC_AUX all check for the corresponding capabilities in guest CPUID. In the "common" code, MSR_IA32_TSC_ADJUST, MSR_AMD64_OSVW_ID_LENGTH, and MSR_AMD64_OSVW_STATUS all check for the corresponding capabilities in guest CPUID. > It's also not generally true for instructions, where it is even > impossible. Yes, sometimes it is impossible to #UD for instructions that are supported on the host but not in the guest. However, sometimes it is possible, and kvm sometimes does enforce this. Some examples that come to mind are RDTSCP and INVPCID. In fact, kvm goes so far as to require that the guest CPUID must enumerate PCID support in order to enumerate INVPCID support, and INVPCID will raise #UD unless both capabilities are enumerated! > You could argue it should be done for MSRs, but that would > be a much larger patch for lots of MSRs. It seems pointless > to single out this particular one. Past sloppiness isn't really a compelling argument for continued sloppiness going forward. And there are existing MSRs that do the appropriate checks, so I'm not singling this one out. > In practice I doubt it will make any difference for > real software either way. Perhaps, but when the correct behavior is so easy to implement, why not just do it right in the first place?
On 01/08/2019 02:48 AM, Jim Mattson wrote: > On Mon, Jan 7, 2019 at 10:20 AM Andi Kleen <ak@linux.intel.com> wrote: >>> The issue is compatibility. Prior to your change, reading this MSR >>> from a VM would raise #GP. After your change, it won't. That means >>> that if you have a VM migrating between hosts with kernel versions >>> before and after this change, the results will be inconsistent. In the >> No it will not be. All Linux kernel uses of this MSR are guarded >> by a CPUID check. > Linux usage is irrelevant to the architected behavior of the virtual > CPU. According to volume 4 of the SDM, this MSR is only supported when > CPUID.01H:ECX.PDCM [bit 15] is set. Therefore, kvm should raise #GP > whenever a guest tries to read this MSR and the guest's > CPUID.01H:ECX.PDCM [bit 15] is clear. > Probably one more check would be better: if (!boot_cpu_has(X86_FEATURE_PDCM) || !guest_cpuid_has(vcpu, X86_FEATURE_PDCM)) return 1; (host isn't expected to read this MSR when PDCM is not supported by the guest, so don't have "!msr_info->host_initiate" added to above) Best, Wei
On Mon, Jan 7, 2019 at 11:48 PM Wei Wang <wei.w.wang@intel.com> wrote: > > On 01/08/2019 02:48 AM, Jim Mattson wrote: > > On Mon, Jan 7, 2019 at 10:20 AM Andi Kleen <ak@linux.intel.com> wrote: > >>> The issue is compatibility. Prior to your change, reading this MSR > >>> from a VM would raise #GP. After your change, it won't. That means > >>> that if you have a VM migrating between hosts with kernel versions > >>> before and after this change, the results will be inconsistent. In the > >> No it will not be. All Linux kernel uses of this MSR are guarded > >> by a CPUID check. > > Linux usage is irrelevant to the architected behavior of the virtual > > CPU. According to volume 4 of the SDM, this MSR is only supported when > > CPUID.01H:ECX.PDCM [bit 15] is set. Therefore, kvm should raise #GP > > whenever a guest tries to read this MSR and the guest's > > CPUID.01H:ECX.PDCM [bit 15] is clear. > > > > Probably one more check would be better: > > if (!boot_cpu_has(X86_FEATURE_PDCM) || > !guest_cpuid_has(vcpu, X86_FEATURE_PDCM)) > return 1; Thanks for that change! > (host isn't expected to read this MSR when PDCM is not supported > by the guest, so don't have "!msr_info->host_initiate" added to above) In keeping with commit 44883f01fe6ae ("KVM: x86: ensure all MSRs can always be KVM_GET/SET_MSR'd"), I think the host_initiated check should be added.
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 2f82795..eee09b7 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -87,6 +87,8 @@ #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 #define ARCH_PERFMON_EVENTS_COUNT 7 +#define X86_PERF_CAP_MASK_LBR_FMT 0x3f + /* * Intel "Architectural Performance Monitoring" CPUID * detection/enumeration details: diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 7bcfa61..3b8a57b 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -365,7 +365,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | 0 /* DS-CPL, VMX, SMX, EST */ | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | - F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ | + F(FMA) | F(CX16) | 0 /* xTPR Update*/ | F(PDCM) | F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8d5d984..ee02967 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4161,6 +4161,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; msr_info->data = vcpu->arch.ia32_xss; break; + case MSR_IA32_PERF_CAPABILITIES: + if (!boot_cpu_has(X86_FEATURE_PDCM)) + return 1; + msr_info->data = native_read_msr(MSR_IA32_PERF_CAPABILITIES); + if (vcpu->kvm->arch.lbr_in_guest) + msr_info->data &= X86_PERF_CAP_MASK_LBR_FMT; + break; case MSR_TSC_AUX: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) @@ -4343,6 +4350,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) else clear_atomic_switch_msr(vmx, MSR_IA32_XSS); break; + case MSR_IA32_PERF_CAPABILITIES: + return 1; /* RO MSR */ case MSR_TSC_AUX: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
Bits [0, 5] of MSR_IA32_PERF_CAPABILITIES tell about the format of the addresses stored in the LBR stack. Expose those bits to the guest when the guest lbr feature is enabled. Signed-off-by: Wei Wang <wei.w.wang@intel.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Andi Kleen <ak@linux.intel.com> --- arch/x86/include/asm/perf_event.h | 2 ++ arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/vmx.c | 9 +++++++++ 3 files changed, 12 insertions(+), 1 deletion(-)