diff mbox series

target/i386: do not set unsupported VMX secondary execution controls

Message ID 20200331162752.1209928-1-vkuznets@redhat.com (mailing list archive)
State New, archived
Headers show
Series target/i386: do not set unsupported VMX secondary execution controls | expand

Commit Message

Vitaly Kuznetsov March 31, 2020, 4:27 p.m. UTC
Commit 048c95163b4 ("target/i386: work around KVM_GET_MSRS bug for
secondary execution controls") added a workaround for KVM pre-dating
commit 6defc591846d ("KVM: nVMX: include conditional controls in /dev/kvm
KVM_GET_MSRS") which wasn't setting certain available controls. The
workaround uses generic CPUID feature bits to set missing VMX controls.

It was found that in some cases it is possible to observe hosts which
have certain CPUID features but lack the corresponding VMX control.

In particular, it was reported that Azure VMs have RDSEED but lack
VMX_SECONDARY_EXEC_RDSEED_EXITING; attempts to enable this feature
bit result in QEMU abort.

Resolve the issue but not applying the workaround when we don't have
to. As there is no good way to find out if KVM has the fix itself, use
95c5c7c77c ("KVM: nVMX: list VMX MSRs in KVM_GET_MSR_INDEX_LIST") instead
as these [are supposed to] come together.

Fixes: 048c95163b4 ("target/i386: work around KVM_GET_MSRS bug for secondary execution controls")
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 target/i386/kvm.c | 41 ++++++++++++++++++++++++++---------------
 1 file changed, 26 insertions(+), 15 deletions(-)

Comments

Paolo Bonzini March 31, 2020, 4:32 p.m. UTC | #1
On 31/03/20 18:27, Vitaly Kuznetsov wrote:
> Commit 048c95163b4 ("target/i386: work around KVM_GET_MSRS bug for
> secondary execution controls") added a workaround for KVM pre-dating
> commit 6defc591846d ("KVM: nVMX: include conditional controls in /dev/kvm
> KVM_GET_MSRS") which wasn't setting certain available controls. The
> workaround uses generic CPUID feature bits to set missing VMX controls.
> 
> It was found that in some cases it is possible to observe hosts which
> have certain CPUID features but lack the corresponding VMX control.
> 
> In particular, it was reported that Azure VMs have RDSEED but lack
> VMX_SECONDARY_EXEC_RDSEED_EXITING; attempts to enable this feature
> bit result in QEMU abort.
> 
> Resolve the issue but not applying the workaround when we don't have
> to. As there is no good way to find out if KVM has the fix itself, use
> 95c5c7c77c ("KVM: nVMX: list VMX MSRs in KVM_GET_MSR_INDEX_LIST") instead
> as these [are supposed to] come together.
> 
> Fixes: 048c95163b4 ("target/i386: work around KVM_GET_MSRS bug for secondary execution controls")
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>

Queued, thanks.

Paolo

> ---
>  target/i386/kvm.c | 41 ++++++++++++++++++++++++++---------------
>  1 file changed, 26 insertions(+), 15 deletions(-)
> 
> diff --git a/target/i386/kvm.c b/target/i386/kvm.c
> index 69eb43d796e6..4901c6dd747d 100644
> --- a/target/i386/kvm.c
> +++ b/target/i386/kvm.c
> @@ -106,6 +106,7 @@ static bool has_msr_arch_capabs;
>  static bool has_msr_core_capabs;
>  static bool has_msr_vmx_vmfunc;
>  static bool has_msr_ucode_rev;
> +static bool has_msr_vmx_procbased_ctls2;
>  
>  static uint32_t has_architectural_pmu_version;
>  static uint32_t num_architectural_pmu_gp_counters;
> @@ -490,21 +491,28 @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index)
>      value = msr_data.entries[0].data;
>      switch (index) {
>      case MSR_IA32_VMX_PROCBASED_CTLS2:
> -        /* KVM forgot to add these bits for some time, do this ourselves.  */
> -        if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & CPUID_XSAVE_XSAVES) {
> -            value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32;
> -        }
> -        if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & CPUID_EXT_RDRAND) {
> -            value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32;
> -        }
> -        if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_INVPCID) {
> -            value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32;
> -        }
> -        if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_RDSEED) {
> -            value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32;
> -        }
> -        if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & CPUID_EXT2_RDTSCP) {
> -            value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32;
> +        if (!has_msr_vmx_procbased_ctls2) {
> +            /* KVM forgot to add these bits for some time, do this ourselves. */
> +            if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) &
> +                CPUID_XSAVE_XSAVES) {
> +                value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32;
> +            }
> +            if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) &
> +                CPUID_EXT_RDRAND) {
> +                value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32;
> +            }
> +            if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) &
> +                CPUID_7_0_EBX_INVPCID) {
> +                value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32;
> +            }
> +            if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) &
> +                CPUID_7_0_EBX_RDSEED) {
> +                value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32;
> +            }
> +            if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) &
> +                CPUID_EXT2_RDTSCP) {
> +                value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32;
> +            }
>          }
>          /* fall through */
>      case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
> @@ -2060,6 +2068,9 @@ static int kvm_get_supported_msrs(KVMState *s)
>              case MSR_IA32_UCODE_REV:
>                  has_msr_ucode_rev = true;
>                  break;
> +            case MSR_IA32_VMX_PROCBASED_CTLS2:
> +                has_msr_vmx_procbased_ctls2 = true;
> +                break;
>              }
>          }
>      }
>
Montes, Julio March 31, 2020, 4:56 p.m. UTC | #2
Hi Vitaly

thanks for raising this, unfortunately this patch didn't work for me, I still get the same error:


my qemu command line:
Montes, Julio March 31, 2020, 4:59 p.m. UTC | #3
Sorry for my last email, it was incomplete

Hi Vitaly

thanks for raising this, unfortunately this patch didn't work for me, I still get the same error:

qemu-system-x86_64: error: failed to set MSR 0x48b to 0x1582e00000000
qemu-system-x86_64: /home/testpmem/go/src/github.com/kata-containers/qemu/target/i386/kvm.c:2695: kvm_buf_set_msrs: Assertion `ret == cpu->kvm_msr_buf->nmsrs

my qemu command line:
/usr/bin/qemu-system-x86_64 -name sandbox-f218abcb05f6e05cc68768f74e9106303066f377a877c03ddc64e1e5e8685633 -uuid 8189ac12-5a5c-4989-bf82-c0218f8a3d33 -machine pc,accel=kvm,kernel_irqchip,nvdimm -cpu host,pmu=off -qmp unix:/run/vc/vm/f218abcb05f6e05cc68768f74e9106303066f377a877c03ddc64e1e5e8685633/qmp.sock,server,nowait -m 2048M,slots=10,maxmem=17041M -device pci-bridge,bus=pci.0,id=pci-bridge-0,chassis_nr=1,shpc=on,addr=2,romfile= -device virtio-serial-pci,disable-modern=true,id=serial0,romfile= -device virtconsole,chardev=charconsole0,id=console0 -chardev socket,id=charconsole0,path=/run/vc/vm/f218abcb05f6e05cc68768f74e9106303066f377a877c03ddc64e1e5e8685633/console.sock,server,nowait -device nvdimm,id=nv0,memdev=mem0 -object memory-backend-file,id=mem0,mem-path=/usr/share/kata-containers/kata-containers-clearlinux-32700-osbuilder-891b61c-agent-73afd1a.img,size=134217728 -device virtio-scsi-pci,id=scsi0,disable-modern=true,romfile= -object rng-random,id=rng0,filename=/dev/urandom -device virtio-rng-pci,rng=rng0,romfile= -device virtserialport,chardev=charch0,id=channel0,name=agent.channel.0 -chardev socket,id=charch0,path=/run/vc/vm/f218abcb05f6e05cc68768f74e9106303066f377a877c03ddc64e1e5e8685633/kata.sock,server,nowait -device virtio-9p-pci,disable-modern=true,fsdev=extra-9p-kataShared,mount_tag=kataShared,romfile= -fsdev local,id=extra-9p-kataShared,path=/run/kata-containers/shared/sandboxes/f218abcb05f6e05cc68768f74e9106303066f377a877c03ddc64e1e5e8685633,security_model=none -netdev tap,id=network-0,vhost=on,vhostfds=3,fds=4 -device driver=virtio-net-pci,netdev=network-0,mac=02:42:ac:11:00:02,disable-modern=true,mq=on,vectors=4,romfile= -global kvm-pit.lost_tick_policy=discard -vga none -no-user-config -nodefaults -nographic -daemonize -object memory-backend-ram,id=dimm1,size=2048M -numa node,memdev=dimm1 -kernel /usr/share/kata-containers/vmlinuz-5.4.15-71 -append tsc=reliable no_timer_check rcupdate.rcu_expedited=1 i8042.direct=1 i8042.dumbkbd=1 i8042.nopnp=1 i8042.noaux=1 noreplace-smp reboot=k console=hvc0 console=hvc1 iommu=off cryptomgr.notests net.ifnames=0 pci=lastbus=0 root=/dev/pmem0p1 rootflags=dax,data=ordered,errors=remount-ro ro rootfstype=ext4 debug systemd.show_status=true systemd.log_level=debug panic=1 nr_cpus=4 agent.use_vsock=false systemd.unit=kata-containers.target systemd.mask=systemd-networkd.service systemd.mask=systemd-networkd.socket agent.log=debug agent.log=debug -pidfile /run/vc/vm/f218abcb05f6e05cc68768f74e9106303066f37
7a877c03ddc64e1e5e8685633/pid -D /run/vc/vm/f218abcb05f6e05cc68768f74e9106303066f377a877c03ddc64e1e5e8685633/qemu.log -smp 1,cores=1,threads=1,sockets=4,maxcpus=4



./vmxcap output:

Basic VMX Information
  Hex: 0x98100000000001
  Revision                                 1
  VMCS size                                4096
  VMCS restricted to 32 bit addresses      no
  Dual-monitor support                     no
  VMCS memory type                         6
  INS/OUTS instruction information         no
  IA32_VMX_TRUE_*_CTLS support             yes
pin-based controls
  External interrupt exiting               yes
  NMI exiting                              yes
  Virtual NMIs                             yes
  Activate VMX-preemption timer            no
  Process posted interrupts                no
primary processor-based controls
  Interrupt window exiting                 yes
  Use TSC offsetting                       yes
  HLT exiting                              forced
  INVLPG exiting                           yes
  MWAIT exiting                            forced
  RDPMC exiting                            yes
  RDTSC exiting                            yes
  CR3-load exiting                         default
  CR3-store exiting                        default
  CR8-load exiting                         yes
  CR8-store exiting                        yes
  Use TPR shadow                           yes
  NMI-window exiting                       yes
  MOV-DR exiting                           yes
  Unconditional I/O exiting                yes
  Use I/O bitmaps                          yes
  Monitor trap flag                        no
  Use MSR bitmaps                          yes
  MONITOR exiting                          forced
  PAUSE exiting                            yes
  Activate secondary control               yes
secondary processor-based controls
  Virtualize APIC accesses                 no
  Enable EPT                               yes
  Descriptor-table exiting                 yes
  Enable RDTSCP                            yes
  Virtualize x2APIC mode                   no
  Enable VPID                              yes
  WBINVD exiting                           no
  Unrestricted guest                       no
  APIC register emulation                  no
  Virtual interrupt delivery               no
  PAUSE-loop exiting                       no
  RDRAND exiting                           yes
  Enable INVPCID                           yes
  Enable VM functions                      no
  VMCS shadowing                           no
  Enable ENCLS exiting                     no
  RDSEED exiting                           no
  Enable PML                               no
  EPT-violation #VE                        no
  Conceal non-root operation from PT       no
  Enable XSAVES/XRSTORS                    no
  Mode-based execute control (XS/XU)       no
  Sub-page write permissions               no
  GPA translation for PT                   no
  TSC scaling                              no
  User wait and pause                      no
  ENCLV exiting                            no
VM-Exit controls
  Save debug controls                      default
  Host address-space size                  forced
  Load IA32_PERF_GLOBAL_CTRL               no
  Acknowledge interrupt on exit            yes
  Save IA32_PAT                            yes
  Load IA32_PAT                            yes
  Save IA32_EFER                           yes
  Load IA32_EFER                           yes
  Save VMX-preemption timer value          no
  Clear IA32_BNDCFGS                       no
  Conceal VM exits from PT                 no
  Clear IA32_RTIT_CTL                      no
VM-Entry controls
  Load debug controls                      default
  IA-32e mode guest                        yes
  Entry to SMM                             no
  Deactivate dual-monitor treatment        no
  Load IA32_PERF_GLOBAL_CTRL               no
  Load IA32_PAT                            yes
  Load IA32_EFER                           yes
  Load IA32_BNDCFGS                        no
  Conceal VM entries from PT               no
  Load IA32_RTIT_CTL                       no
Miscellaneous data
  Hex: 0x40
  VMX-preemption timer scale (log2)        0
  Store EFER.LMA into IA-32e mode guest control no
  HLT activity state                       yes
  Shutdown activity state                  no
  Wait-for-SIPI activity state             no
  PT in VMX operation                      no
  IA32_SMBASE support                      no
  Number of CR3-target values              0
  MSR-load/store count recommendation      0
  IA32_SMM_MONITOR_CTL[2] can be set to 1  no
  VMWRITE to VM-exit information fields    no
  Inject event with insn length=0          no
  MSEG revision identifier                 0
VPID and EPT capabilities
  Hex: 0xf0106114040
  Execute-only EPT translations            no
  Page-walk length 4                       yes
  Paging-structure memory type UC          no
  Paging-structure memory type WB          yes
  2MB EPT pages                            yes
  1GB EPT pages                            no
  INVEPT supported                         yes
  EPT accessed and dirty flags             no
  Advanced VM-exit information for EPT violations no
  Single-context INVEPT                    yes
  All-context INVEPT                       yes
  INVVPID supported                        yes
  Individual-address INVVPID               yes
  Single-context INVVPID                   yes
  All-context INVVPID                      yes
  Single-context-retaining-globals INVVPID yes
VM Functions
  Hex: 0x0
  EPTP Switching                           no
Dr. David Alan Gilbert March 31, 2020, 5:26 p.m. UTC | #4
* Montes, Julio (julio.montes@intel.com) wrote:
> Sorry for my last email, it was incomplete
> 
> Hi Vitaly
> 
> thanks for raising this, unfortunately this patch didn't work for me, I still get the same error:

Are you trying that on top of 5.0 or ontop of the older 4.2 world?

> qemu-system-x86_64: error: failed to set MSR 0x48b to 0x1582e00000000
> qemu-system-x86_64: /home/testpmem/go/src/github.com/kata-containers/qemu/target/i386/kvm.c:2695: kvm_buf_set_msrs: Assertion `ret == cpu->kvm_msr_buf->nmsrs

If my reading of 0x1582e00000000 is correct then we have:
                                               0x1582e 00000000
VMX_SECONDARY_EXEC_RDSEED_EXITING           0x00010000  !
 
VMX_SECONDARY_EXEC_SHADOW_VMCS              0x00004000  !
VMX_SECONDARY_EXEC_ENABLE_INVPCID           0x00001000
 
VMX_SECONDARY_EXEC_RDRAND_EXITING           0x00000800
 
VMX_SECONDARY_EXEC_ENABLE_VPID              0x00000020
 
VMX_SECONDARY_EXEC_ENABLE_EPT               0x00000002
VMX_SECONDARY_EXEC_DESC                     0x00000004
VMX_SECONDARY_EXEC_RDTSCP                   0x00000008

> 
> my qemu command line:
> /usr/bin/qemu-system-x86_64 -name sandbox-f218abcb05f6e05cc68768f74e9106303066f377a877c03ddc64e1e5e8685633 -uuid 8189ac12-5a5c-4989-bf82-c0218f8a3d33 -machine pc,accel=kvm,kernel_irqchip,nvdimm -cpu host,pmu=off -qmp unix:/run/vc/vm/f218abcb05f6e05cc68768f74e9106303066f377a877c03ddc64e1e5e8685633/qmp.sock,server,nowait -m 2048M,slots=10,maxmem=17041M -device pci-bridge,bus=pci.0,id=pci-bridge-0,chassis_nr=1,shpc=on,addr=2,romfile= -device virtio-serial-pci,disable-modern=true,id=serial0,romfile= -device virtconsole,chardev=charconsole0,id=console0 -chardev socket,id=charconsole0,path=/run/vc/vm/f218abcb05f6e05cc68768f74e9106303066f377a877c03ddc64e1e5e8685633/console.sock,server,nowait -device nvdimm,id=nv0,memdev=mem0 -object memory-backend-file,id=mem0,mem-path=/usr/share/kata-containers/kata-containers-clearlinux-32700-osbuilder-891b61c-agent-73afd1a.img,size=134217728 -device virtio-scsi-pci,id=scsi0,disable-modern=true,romfile= -object rng-random,id=rng0,filename=/dev/urandom -device virtio-rng-pci,rng=rng0,romfile= -device virtserialport,chardev=charch0,id=channel0,name=agent.channel.0 -chardev socket,id=charch0,path=/run/vc/vm/f218abcb05f6e05cc68768f74e9106303066f377a877c03ddc64e1e5e8685633/kata.sock,server,nowait -device virtio-9p-pci,disable-modern=true,fsdev=extra-9p-kataShared,mount_tag=kataShared,romfile= -fsdev local,id=extra-9p-kataShared,path=/run/kata-containers/shared/sandboxes/f218abcb05f6e05cc68768f74e9106303066f377a877c03ddc64e1e5e8685633,security_model=none -netdev tap,id=network-0,vhost=on,vhostfds=3,fds=4 -device driver=virtio-net-pci,netdev=network-0,mac=02:42:ac:11:00:02,disable-modern=true,mq=on,vectors=4,romfile= -global kvm-pit.lost_tick_policy=discard -vga none -no-user-config -nodefaults -nographic -daemonize -object memory-backend-ram,id=dimm1,size=2048M -numa node,memdev=dimm1 -kernel /usr/share/kata-containers/vmlinuz-5.4.15-71 -append tsc=reliable no_timer_check rcupdate.rcu_expedited=1 i8042.direct=1 i8042.dumbkbd=1 i8042.nopnp=1 i8042.noaux=1 noreplace-smp reboot=k console=hvc0 console=hvc1 iommu=off cryptomgr.notests net.ifnames=0 pci=lastbus=0 root=/dev/pmem0p1 rootflags=dax,data=ordered,errors=remount-ro ro rootfstype=ext4 debug systemd.show_status=true systemd.log_level=debug panic=1 nr_cpus=4 agent.use_vsock=false systemd.unit=kata-containers.target systemd.mask=systemd-networkd.service systemd.mask=systemd-networkd.socket agent.log=debug agent.log=debug -pidfile /run/vc/vm/f218abcb05f6e05cc68768f74e9106303066f37
> 7a877c03ddc64e1e5e8685633/pid -D /run/vc/vm/f218abcb05f6e05cc68768f74e9106303066f377a877c03ddc64e1e5e8685633/qemu.log -smp 1,cores=1,threads=1,sockets=4,maxcpus=4
> 
> 
> 
> ./vmxcap output:
> 
> secondary processor-based controls
>   Virtualize APIC accesses                 no
>   Enable EPT                               yes
>   Descriptor-table exiting                 yes
>   Enable RDTSCP                            yes
>   Virtualize x2APIC mode                   no
>   Enable VPID                              yes
>   WBINVD exiting                           no
>   Unrestricted guest                       no
>   APIC register emulation                  no
>   Virtual interrupt delivery               no
>   PAUSE-loop exiting                       no
>   RDRAND exiting                           yes
>   Enable INVPCID                           yes
>   Enable VM functions                      no
>   VMCS shadowing                           no   <<<<<
>   Enable ENCLS exiting                     no
>   RDSEED exiting                           no   <<<<<
>   Enable PML                               no
>   EPT-violation #VE                        no
>   Conceal non-root operation from PT       no
>   Enable XSAVES/XRSTORS                    no
>   Mode-based execute control (XS/XU)       no
>   Sub-page write permissions               no
>   GPA translation for PT                   no
>   TSC scaling                              no
>   User wait and pause                      no
>   ENCLV exiting                            no


So we're apparently trying to enable both RDSEED_EXITING and SHADOW_VMCS
which are missing.


> On 31/03/20 18:27, Vitaly Kuznetsov wrote:

> >      case MSR_IA32_VMX_PROCBASED_CTLS2:
> > -        /* KVM forgot to add these bits for some time, do this ourselves.  */
> > -        if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & CPUID_XSAVE_XSAVES) {
> > -            value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32;
> > -        }
> > -        if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & CPUID_EXT_RDRAND) {
> > -            value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32;
> > -        }
> > -        if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_INVPCID) {
> > -            value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32;
> > -        }
> > -        if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_RDSEED) {
> > -            value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32;
> > -        }
> > -        if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & CPUID_EXT2_RDTSCP) {
> > -            value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32;
> > +        if (!has_msr_vmx_procbased_ctls2) {
> > +            /* KVM forgot to add these bits for some time, do this ourselves. */
> > +            if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) &
> > +                CPUID_XSAVE_XSAVES) {
> > +                value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32;
> > +            }
> > +            if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) &
> > +                CPUID_EXT_RDRAND) {
> > +                value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32;
> > +            }
> > +            if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) &
> > +                CPUID_7_0_EBX_INVPCID) {
> > +                value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32;
> > +            }
> > +            if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) &
> > +                CPUID_7_0_EBX_RDSEED) {
> > +                value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32;
> > +            }
> > +            if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) &
> > +                CPUID_EXT2_RDTSCP) {
> > +                value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32;
> > +            }

So you would think that would tkae care of RDSEED exiting - but what
about VMCS shadowing?

Dave

> >          }
> >          /* fall through */
> >      case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
> > @@ -2060,6 +2068,9 @@ static int kvm_get_supported_msrs(KVMState *s)
> >              case MSR_IA32_UCODE_REV:
> >                  has_msr_ucode_rev = true;
> >                  break;
> > +            case MSR_IA32_VMX_PROCBASED_CTLS2:
> > +                has_msr_vmx_procbased_ctls2 = true;
> > +                break;
> >              }
> >          }
> >      }
> >
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Montes, Julio March 31, 2020, 5:37 p.m. UTC | #5
David

I'm using master
17083d6d1e Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging

-
Cheers
Julio
Vitaly Kuznetsov April 1, 2020, 7:05 a.m. UTC | #6
"Montes, Julio" <julio.montes@intel.com> writes:

> Hi Vitaly
>
> thanks for raising this, unfortunately this patch didn't work for me, I still get the same error:
>
>

Does you kernel have 95c5c7c77c ("KVM: nVMX: list VMX MSRs in
KVM_GET_MSR_INDEX_LIST")?
Vitaly Kuznetsov April 1, 2020, 7:08 a.m. UTC | #7
"Dr. David Alan Gilbert" <dgilbert@redhat.com> writes:

> So you would think that would tkae care of RDSEED exiting - but what
> about VMCS shadowing?
>

SECONDARY_EXEC_SHADOW_VMCS is special, we are able to emulate it in KVM
even when it is not supported by hardware, see
nested_vmx_setup_ctls_msrs():

	/*
	 * We can emulate "VMCS shadowing," even if the hardware
	 * doesn't support it.
	 */
	msrs->secondary_ctls_high |=
		SECONDARY_EXEC_SHADOW_VMCS;
Montes, Julio April 1, 2020, 2:23 p.m. UTC | #8
> Does you kernel have 95c5c7c77c ("KVM: nVMX: list VMX MSRs in
> KVM_GET_MSR_INDEX_LIST")?

I was using linux 5.0.0, now I have 5.3.0 and it's working, thanks for fixing this
Vitaly Kuznetsov April 1, 2020, 2:36 p.m. UTC | #9
"Montes, Julio" <julio.montes@intel.com> writes:

>> Does you kernel have 95c5c7c77c ("KVM: nVMX: list VMX MSRs in
>> KVM_GET_MSR_INDEX_LIST")?
>
> I was using linux 5.0.0, now I have 5.3.0 and it's working, thanks for fixing this
>

Thanks for the confirmation!

I don't see any good solution for kernels without 95c5c7c77c, we'll have
either one issue or another.
diff mbox series

Patch

diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 69eb43d796e6..4901c6dd747d 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -106,6 +106,7 @@  static bool has_msr_arch_capabs;
 static bool has_msr_core_capabs;
 static bool has_msr_vmx_vmfunc;
 static bool has_msr_ucode_rev;
+static bool has_msr_vmx_procbased_ctls2;
 
 static uint32_t has_architectural_pmu_version;
 static uint32_t num_architectural_pmu_gp_counters;
@@ -490,21 +491,28 @@  uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index)
     value = msr_data.entries[0].data;
     switch (index) {
     case MSR_IA32_VMX_PROCBASED_CTLS2:
-        /* KVM forgot to add these bits for some time, do this ourselves.  */
-        if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & CPUID_XSAVE_XSAVES) {
-            value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32;
-        }
-        if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & CPUID_EXT_RDRAND) {
-            value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32;
-        }
-        if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_INVPCID) {
-            value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32;
-        }
-        if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_RDSEED) {
-            value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32;
-        }
-        if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & CPUID_EXT2_RDTSCP) {
-            value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32;
+        if (!has_msr_vmx_procbased_ctls2) {
+            /* KVM forgot to add these bits for some time, do this ourselves. */
+            if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) &
+                CPUID_XSAVE_XSAVES) {
+                value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32;
+            }
+            if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) &
+                CPUID_EXT_RDRAND) {
+                value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32;
+            }
+            if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) &
+                CPUID_7_0_EBX_INVPCID) {
+                value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32;
+            }
+            if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) &
+                CPUID_7_0_EBX_RDSEED) {
+                value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32;
+            }
+            if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) &
+                CPUID_EXT2_RDTSCP) {
+                value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32;
+            }
         }
         /* fall through */
     case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
@@ -2060,6 +2068,9 @@  static int kvm_get_supported_msrs(KVMState *s)
             case MSR_IA32_UCODE_REV:
                 has_msr_ucode_rev = true;
                 break;
+            case MSR_IA32_VMX_PROCBASED_CTLS2:
+                has_msr_vmx_procbased_ctls2 = true;
+                break;
             }
         }
     }