Message ID | 20170310114713.7571-1-david@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
2017-03-10 12:47+0100, David Hildenbrand: > vmm_exclusive=0 leads to KVM setting X86_CR4_VMXE always and calling > VMXON only when the vcpu is loaded. X86_CR4_VMXE is used as an > indication in cpu_emergency_vmxoff() (called on kdump) if VMXOFF has to be > called. This is obviously not the case if both are used independtly. > Calling VMXOFF without a previous VMXON will result in an exception. > > In addition, X86_CR4_VMXE is used as a mean to test if VMX is already in > use by another VMM in hardware_enable(). So there can't really be > co-existance. If the other VMM is prepared for co-existance and does a > similar check, only one VMM can exist. If the other VMM is not prepared > and blindly sets/clears X86_CR4_VMXE, we will get inconsistencies with > X86_CR4_VMXE. > > As we also had bug reports related to clearing of vmcs with vmm_exclusive=0 > this seems to be pretty much untested. So let's better drop it. Totally, Reviewed-by: Radim Krčmář <rkrcmar@redhat.com> > While at it, directly move setting/clearing X86_CR4_VMXE into > kvm_cpu_vmxon/off. > > Signed-off-by: David Hildenbrand <david@redhat.com> > --- > arch/x86/kvm/vmx.c | 38 +++++++------------------------------- > 1 file changed, 7 insertions(+), 31 deletions(-) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 283aa86..bbbfe12 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -84,9 +84,6 @@ module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO); > static bool __read_mostly emulate_invalid_guest_state = true; > module_param(emulate_invalid_guest_state, bool, S_IRUGO); > > -static bool __read_mostly vmm_exclusive = 1; > -module_param(vmm_exclusive, bool, S_IRUGO); > - > static bool __read_mostly fasteoi = 1; > module_param(fasteoi, bool, S_IRUGO); > > @@ -914,8 +911,6 @@ static void nested_release_page_clean(struct page *page) > > static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); > static u64 construct_eptp(unsigned long root_hpa); > -static void kvm_cpu_vmxon(u64 addr); > -static void kvm_cpu_vmxoff(void); > static bool vmx_xsaves_supported(void); > static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); > static void vmx_set_segment(struct kvm_vcpu *vcpu, > @@ -2235,15 +2230,10 @@ static void decache_tsc_multiplier(struct vcpu_vmx *vmx) > static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > - u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); > bool already_loaded = vmx->loaded_vmcs->cpu == cpu; > > - if (!vmm_exclusive) > - kvm_cpu_vmxon(phys_addr); > - else if (!already_loaded) > - loaded_vmcs_clear(vmx->loaded_vmcs); > - > if (!already_loaded) { > + loaded_vmcs_clear(vmx->loaded_vmcs); > local_irq_disable(); > crash_disable_local_vmclear(cpu); > > @@ -2321,11 +2311,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) > vmx_vcpu_pi_put(vcpu); > > __vmx_load_host_state(to_vmx(vcpu)); > - if (!vmm_exclusive) { > - __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs); > - vcpu->cpu = -1; > - kvm_cpu_vmxoff(); > - } > } > > static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); > @@ -3416,6 +3401,7 @@ static __init int vmx_disabled_by_bios(void) > > static void kvm_cpu_vmxon(u64 addr) > { > + cr4_set_bits(X86_CR4_VMXE); > intel_pt_handle_vmx(1); > > asm volatile (ASM_VMX_VMXON_RAX > @@ -3458,12 +3444,8 @@ static int hardware_enable(void) > /* enable and lock */ > wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); > } > - cr4_set_bits(X86_CR4_VMXE); > - > - if (vmm_exclusive) { > - kvm_cpu_vmxon(phys_addr); > - ept_sync_global(); > - } > + kvm_cpu_vmxon(phys_addr); > + ept_sync_global(); > > native_store_gdt(this_cpu_ptr(&host_gdt)); > > @@ -3489,15 +3471,13 @@ static void kvm_cpu_vmxoff(void) > asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); > > intel_pt_handle_vmx(0); > + cr4_clear_bits(X86_CR4_VMXE); > } > > static void hardware_disable(void) > { > - if (vmm_exclusive) { > - vmclear_local_loaded_vmcss(); > - kvm_cpu_vmxoff(); > - } > - cr4_clear_bits(X86_CR4_VMXE); > + vmclear_local_loaded_vmcss(); > + kvm_cpu_vmxoff(); > } > > static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, > @@ -9228,11 +9208,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) > vmx->loaded_vmcs->shadow_vmcs = NULL; > if (!vmx->loaded_vmcs->vmcs) > goto free_msrs; > - if (!vmm_exclusive) > - kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id()))); > loaded_vmcs_init(vmx->loaded_vmcs); > - if (!vmm_exclusive) > - kvm_cpu_vmxoff(); > > cpu = get_cpu(); > vmx_vcpu_load(&vmx->vcpu, cpu); > -- > 2.9.3 >
On 14/03/2017 21:30, Radim Krčmář wrote: > 2017-03-10 12:47+0100, David Hildenbrand: >> vmm_exclusive=0 leads to KVM setting X86_CR4_VMXE always and calling >> VMXON only when the vcpu is loaded. X86_CR4_VMXE is used as an >> indication in cpu_emergency_vmxoff() (called on kdump) if VMXOFF has to be >> called. This is obviously not the case if both are used independtly. >> Calling VMXOFF without a previous VMXON will result in an exception. >> >> In addition, X86_CR4_VMXE is used as a mean to test if VMX is already in >> use by another VMM in hardware_enable(). So there can't really be >> co-existance. If the other VMM is prepared for co-existance and does a >> similar check, only one VMM can exist. If the other VMM is not prepared >> and blindly sets/clears X86_CR4_VMXE, we will get inconsistencies with >> X86_CR4_VMXE. >> >> As we also had bug reports related to clearing of vmcs with vmm_exclusive=0 >> this seems to be pretty much untested. So let's better drop it. > > Totally, > > Reviewed-by: Radim Krčmář <rkrcmar@redhat.com> Radim, are you putting this in kvm/queue for 4.12? Thanks, Paolo >> While at it, directly move setting/clearing X86_CR4_VMXE into >> kvm_cpu_vmxon/off. >> >> Signed-off-by: David Hildenbrand <david@redhat.com> >> --- >> arch/x86/kvm/vmx.c | 38 +++++++------------------------------- >> 1 file changed, 7 insertions(+), 31 deletions(-) >> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index 283aa86..bbbfe12 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -84,9 +84,6 @@ module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO); >> static bool __read_mostly emulate_invalid_guest_state = true; >> module_param(emulate_invalid_guest_state, bool, S_IRUGO); >> >> -static bool __read_mostly vmm_exclusive = 1; >> -module_param(vmm_exclusive, bool, S_IRUGO); >> - >> static bool __read_mostly fasteoi = 1; >> module_param(fasteoi, bool, S_IRUGO); >> >> @@ -914,8 +911,6 @@ static void nested_release_page_clean(struct page *page) >> >> static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); >> static u64 construct_eptp(unsigned long root_hpa); >> -static void kvm_cpu_vmxon(u64 addr); >> -static void kvm_cpu_vmxoff(void); >> static bool vmx_xsaves_supported(void); >> static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); >> static void vmx_set_segment(struct kvm_vcpu *vcpu, >> @@ -2235,15 +2230,10 @@ static void decache_tsc_multiplier(struct vcpu_vmx *vmx) >> static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) >> { >> struct vcpu_vmx *vmx = to_vmx(vcpu); >> - u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); >> bool already_loaded = vmx->loaded_vmcs->cpu == cpu; >> >> - if (!vmm_exclusive) >> - kvm_cpu_vmxon(phys_addr); >> - else if (!already_loaded) >> - loaded_vmcs_clear(vmx->loaded_vmcs); >> - >> if (!already_loaded) { >> + loaded_vmcs_clear(vmx->loaded_vmcs); >> local_irq_disable(); >> crash_disable_local_vmclear(cpu); >> >> @@ -2321,11 +2311,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) >> vmx_vcpu_pi_put(vcpu); >> >> __vmx_load_host_state(to_vmx(vcpu)); >> - if (!vmm_exclusive) { >> - __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs); >> - vcpu->cpu = -1; >> - kvm_cpu_vmxoff(); >> - } >> } >> >> static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); >> @@ -3416,6 +3401,7 @@ static __init int vmx_disabled_by_bios(void) >> >> static void kvm_cpu_vmxon(u64 addr) >> { >> + cr4_set_bits(X86_CR4_VMXE); >> intel_pt_handle_vmx(1); >> >> asm volatile (ASM_VMX_VMXON_RAX >> @@ -3458,12 +3444,8 @@ static int hardware_enable(void) >> /* enable and lock */ >> wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); >> } >> - cr4_set_bits(X86_CR4_VMXE); >> - >> - if (vmm_exclusive) { >> - kvm_cpu_vmxon(phys_addr); >> - ept_sync_global(); >> - } >> + kvm_cpu_vmxon(phys_addr); >> + ept_sync_global(); >> >> native_store_gdt(this_cpu_ptr(&host_gdt)); >> >> @@ -3489,15 +3471,13 @@ static void kvm_cpu_vmxoff(void) >> asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); >> >> intel_pt_handle_vmx(0); >> + cr4_clear_bits(X86_CR4_VMXE); >> } >> >> static void hardware_disable(void) >> { >> - if (vmm_exclusive) { >> - vmclear_local_loaded_vmcss(); >> - kvm_cpu_vmxoff(); >> - } >> - cr4_clear_bits(X86_CR4_VMXE); >> + vmclear_local_loaded_vmcss(); >> + kvm_cpu_vmxoff(); >> } >> >> static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, >> @@ -9228,11 +9208,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) >> vmx->loaded_vmcs->shadow_vmcs = NULL; >> if (!vmx->loaded_vmcs->vmcs) >> goto free_msrs; >> - if (!vmm_exclusive) >> - kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id()))); >> loaded_vmcs_init(vmx->loaded_vmcs); >> - if (!vmm_exclusive) >> - kvm_cpu_vmxoff(); >> >> cpu = get_cpu(); >> vmx_vcpu_load(&vmx->vcpu, cpu); >> -- >> 2.9.3 >>
On 10/03/2017 12:47, David Hildenbrand wrote: > vmm_exclusive=0 leads to KVM setting X86_CR4_VMXE always and calling > VMXON only when the vcpu is loaded. X86_CR4_VMXE is used as an > indication in cpu_emergency_vmxoff() (called on kdump) if VMXOFF has to be > called. This is obviously not the case if both are used independtly. > Calling VMXOFF without a previous VMXON will result in an exception. > > In addition, X86_CR4_VMXE is used as a mean to test if VMX is already in > use by another VMM in hardware_enable(). So there can't really be > co-existance. If the other VMM is prepared for co-existance and does a > similar check, only one VMM can exist. If the other VMM is not prepared > and blindly sets/clears X86_CR4_VMXE, we will get inconsistencies with > X86_CR4_VMXE. > > As we also had bug reports related to clearing of vmcs with vmm_exclusive=0 > this seems to be pretty much untested. So let's better drop it. > > While at it, directly move setting/clearing X86_CR4_VMXE into > kvm_cpu_vmxon/off. > > Signed-off-by: David Hildenbrand <david@redhat.com> > --- > arch/x86/kvm/vmx.c | 38 +++++++------------------------------- > 1 file changed, 7 insertions(+), 31 deletions(-) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 283aa86..bbbfe12 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -84,9 +84,6 @@ module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO); > static bool __read_mostly emulate_invalid_guest_state = true; > module_param(emulate_invalid_guest_state, bool, S_IRUGO); > > -static bool __read_mostly vmm_exclusive = 1; > -module_param(vmm_exclusive, bool, S_IRUGO); > - > static bool __read_mostly fasteoi = 1; > module_param(fasteoi, bool, S_IRUGO); > > @@ -914,8 +911,6 @@ static void nested_release_page_clean(struct page *page) > > static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); > static u64 construct_eptp(unsigned long root_hpa); > -static void kvm_cpu_vmxon(u64 addr); > -static void kvm_cpu_vmxoff(void); > static bool vmx_xsaves_supported(void); > static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); > static void vmx_set_segment(struct kvm_vcpu *vcpu, > @@ -2235,15 +2230,10 @@ static void decache_tsc_multiplier(struct vcpu_vmx *vmx) > static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > - u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); > bool already_loaded = vmx->loaded_vmcs->cpu == cpu; > > - if (!vmm_exclusive) > - kvm_cpu_vmxon(phys_addr); > - else if (!already_loaded) > - loaded_vmcs_clear(vmx->loaded_vmcs); > - > if (!already_loaded) { > + loaded_vmcs_clear(vmx->loaded_vmcs); > local_irq_disable(); > crash_disable_local_vmclear(cpu); > > @@ -2321,11 +2311,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) > vmx_vcpu_pi_put(vcpu); > > __vmx_load_host_state(to_vmx(vcpu)); > - if (!vmm_exclusive) { > - __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs); > - vcpu->cpu = -1; > - kvm_cpu_vmxoff(); > - } > } > > static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); > @@ -3416,6 +3401,7 @@ static __init int vmx_disabled_by_bios(void) > > static void kvm_cpu_vmxon(u64 addr) > { > + cr4_set_bits(X86_CR4_VMXE); > intel_pt_handle_vmx(1); > > asm volatile (ASM_VMX_VMXON_RAX > @@ -3458,12 +3444,8 @@ static int hardware_enable(void) > /* enable and lock */ > wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); > } > - cr4_set_bits(X86_CR4_VMXE); > - > - if (vmm_exclusive) { > - kvm_cpu_vmxon(phys_addr); > - ept_sync_global(); > - } > + kvm_cpu_vmxon(phys_addr); > + ept_sync_global(); > > native_store_gdt(this_cpu_ptr(&host_gdt)); > > @@ -3489,15 +3471,13 @@ static void kvm_cpu_vmxoff(void) > asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); > > intel_pt_handle_vmx(0); > + cr4_clear_bits(X86_CR4_VMXE); > } > > static void hardware_disable(void) > { > - if (vmm_exclusive) { > - vmclear_local_loaded_vmcss(); > - kvm_cpu_vmxoff(); > - } > - cr4_clear_bits(X86_CR4_VMXE); > + vmclear_local_loaded_vmcss(); > + kvm_cpu_vmxoff(); > } > > static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, > @@ -9228,11 +9208,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) > vmx->loaded_vmcs->shadow_vmcs = NULL; > if (!vmx->loaded_vmcs->vmcs) > goto free_msrs; > - if (!vmm_exclusive) > - kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id()))); > loaded_vmcs_init(vmx->loaded_vmcs); > - if (!vmm_exclusive) > - kvm_cpu_vmxoff(); > > cpu = get_cpu(); > vmx_vcpu_load(&vmx->vcpu, cpu); > Applied, thanks. Paolo
Em Fri, Mar 10, 2017 at 12:47:13PM +0100, David Hildenbrand escreveu: > vmm_exclusive=0 leads to KVM setting X86_CR4_VMXE always and calling > VMXON only when the vcpu is loaded. X86_CR4_VMXE is used as an > indication in cpu_emergency_vmxoff() (called on kdump) if VMXOFF has to be > called. This is obviously not the case if both are used independtly. > Calling VMXOFF without a previous VMXON will result in an exception. > > In addition, X86_CR4_VMXE is used as a mean to test if VMX is already in > use by another VMM in hardware_enable(). So there can't really be > co-existance. If the other VMM is prepared for co-existance and does a > similar check, only one VMM can exist. If the other VMM is not prepared > and blindly sets/clears X86_CR4_VMXE, we will get inconsistencies with > X86_CR4_VMXE. > > As we also had bug reports related to clearing of vmcs with vmm_exclusive=0 > this seems to be pretty much untested. So let's better drop it. > > While at it, directly move setting/clearing X86_CR4_VMXE into > kvm_cpu_vmxon/off. Oh well, I was using, as suggested by Alexander, this parameter to be able to use Intel PT on the host on a Broadwell machine, i.e.: perf record -e intel_pt// usleep 1 perf script would show decoded Intel PT records, no more :-\ But I'm clueless about KVM internals, so just reporting the change in behaviour for this very specific use case. Now I don't know if this is something that would make Intel PT be usable on Broadwell machines but wouldn't be required with newer chips, will test with a Kaby Lake i5 7500 when back at my home office... - Arnaldo
2017-06-21 14:48-0300, Arnaldo Carvalho de Melo: > Em Fri, Mar 10, 2017 at 12:47:13PM +0100, David Hildenbrand escreveu: > > vmm_exclusive=0 leads to KVM setting X86_CR4_VMXE always and calling > > VMXON only when the vcpu is loaded. X86_CR4_VMXE is used as an > > indication in cpu_emergency_vmxoff() (called on kdump) if VMXOFF has to be > > called. This is obviously not the case if both are used independtly. > > Calling VMXOFF without a previous VMXON will result in an exception. > > > > In addition, X86_CR4_VMXE is used as a mean to test if VMX is already in > > use by another VMM in hardware_enable(). So there can't really be > > co-existance. If the other VMM is prepared for co-existance and does a > > similar check, only one VMM can exist. If the other VMM is not prepared > > and blindly sets/clears X86_CR4_VMXE, we will get inconsistencies with > > X86_CR4_VMXE. > > > > As we also had bug reports related to clearing of vmcs with vmm_exclusive=0 > > this seems to be pretty much untested. So let's better drop it. > > > > While at it, directly move setting/clearing X86_CR4_VMXE into > > kvm_cpu_vmxon/off. > > Oh well, I was using, as suggested by Alexander, this parameter to be > able to use Intel PT on the host on a Broadwell machine, i.e.: > > perf record -e intel_pt// usleep 1 > perf script We thought that blacklisting the KVM module was a good solution ... Were you using KVM virtual machines with vmm_exclusive=0? > would show decoded Intel PT records, no more :-\ But I'm clueless about > KVM internals, so just reporting the change in behaviour for this very > specific use case. > > Now I don't know if this is something that would make Intel PT be usable > on Broadwell machines but wouldn't be required with newer chips, will > test with a Kaby Lake i5 7500 when back at my home office... Most likely, SDM 35.2.8.2 says: Initial implementations of Intel Processor Trace do not support tracing in VMX operation. Such processors indicate this by returning 0 for IA32_VMX_MISC[bit 14]. so something akin to vmm_exclusive is about the only option there. Please try if Kaby Lake is already an advanced implementation, because we might need to disable PT when entering VMX non-root mode (so the tracing packets are not be written into guest's memory, just like with PEBS). Thanks.
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 283aa86..bbbfe12 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -84,9 +84,6 @@ module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO); static bool __read_mostly emulate_invalid_guest_state = true; module_param(emulate_invalid_guest_state, bool, S_IRUGO); -static bool __read_mostly vmm_exclusive = 1; -module_param(vmm_exclusive, bool, S_IRUGO); - static bool __read_mostly fasteoi = 1; module_param(fasteoi, bool, S_IRUGO); @@ -914,8 +911,6 @@ static void nested_release_page_clean(struct page *page) static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); static u64 construct_eptp(unsigned long root_hpa); -static void kvm_cpu_vmxon(u64 addr); -static void kvm_cpu_vmxoff(void); static bool vmx_xsaves_supported(void); static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); static void vmx_set_segment(struct kvm_vcpu *vcpu, @@ -2235,15 +2230,10 @@ static void decache_tsc_multiplier(struct vcpu_vmx *vmx) static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); bool already_loaded = vmx->loaded_vmcs->cpu == cpu; - if (!vmm_exclusive) - kvm_cpu_vmxon(phys_addr); - else if (!already_loaded) - loaded_vmcs_clear(vmx->loaded_vmcs); - if (!already_loaded) { + loaded_vmcs_clear(vmx->loaded_vmcs); local_irq_disable(); crash_disable_local_vmclear(cpu); @@ -2321,11 +2311,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) vmx_vcpu_pi_put(vcpu); __vmx_load_host_state(to_vmx(vcpu)); - if (!vmm_exclusive) { - __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs); - vcpu->cpu = -1; - kvm_cpu_vmxoff(); - } } static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); @@ -3416,6 +3401,7 @@ static __init int vmx_disabled_by_bios(void) static void kvm_cpu_vmxon(u64 addr) { + cr4_set_bits(X86_CR4_VMXE); intel_pt_handle_vmx(1); asm volatile (ASM_VMX_VMXON_RAX @@ -3458,12 +3444,8 @@ static int hardware_enable(void) /* enable and lock */ wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); } - cr4_set_bits(X86_CR4_VMXE); - - if (vmm_exclusive) { - kvm_cpu_vmxon(phys_addr); - ept_sync_global(); - } + kvm_cpu_vmxon(phys_addr); + ept_sync_global(); native_store_gdt(this_cpu_ptr(&host_gdt)); @@ -3489,15 +3471,13 @@ static void kvm_cpu_vmxoff(void) asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); intel_pt_handle_vmx(0); + cr4_clear_bits(X86_CR4_VMXE); } static void hardware_disable(void) { - if (vmm_exclusive) { - vmclear_local_loaded_vmcss(); - kvm_cpu_vmxoff(); - } - cr4_clear_bits(X86_CR4_VMXE); + vmclear_local_loaded_vmcss(); + kvm_cpu_vmxoff(); } static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, @@ -9228,11 +9208,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->loaded_vmcs->shadow_vmcs = NULL; if (!vmx->loaded_vmcs->vmcs) goto free_msrs; - if (!vmm_exclusive) - kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id()))); loaded_vmcs_init(vmx->loaded_vmcs); - if (!vmm_exclusive) - kvm_cpu_vmxoff(); cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu);
vmm_exclusive=0 leads to KVM setting X86_CR4_VMXE always and calling VMXON only when the vcpu is loaded. X86_CR4_VMXE is used as an indication in cpu_emergency_vmxoff() (called on kdump) if VMXOFF has to be called. This is obviously not the case if both are used independtly. Calling VMXOFF without a previous VMXON will result in an exception. In addition, X86_CR4_VMXE is used as a mean to test if VMX is already in use by another VMM in hardware_enable(). So there can't really be co-existance. If the other VMM is prepared for co-existance and does a similar check, only one VMM can exist. If the other VMM is not prepared and blindly sets/clears X86_CR4_VMXE, we will get inconsistencies with X86_CR4_VMXE. As we also had bug reports related to clearing of vmcs with vmm_exclusive=0 this seems to be pretty much untested. So let's better drop it. While at it, directly move setting/clearing X86_CR4_VMXE into kvm_cpu_vmxon/off. Signed-off-by: David Hildenbrand <david@redhat.com> --- arch/x86/kvm/vmx.c | 38 +++++++------------------------------- 1 file changed, 7 insertions(+), 31 deletions(-)