Message ID | 1251905916-2834-2-git-send-email-oritw@il.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 09/02/2009 06:38 PM, oritw@il.ibm.com wrote: > From: Orit Wasserman<oritw@il.ibm.com> > > --- > arch/x86/kvm/svm.c | 3 - > arch/x86/kvm/vmx.c | 187 +++++++++++++++++++++++++++++++++++++++++++++++++++- > arch/x86/kvm/x86.c | 6 ++- > arch/x86/kvm/x86.h | 2 + > 4 files changed, 192 insertions(+), 6 deletions(-) > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index 2df9b45..3c1f22a 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -124,9 +124,6 @@ static int npt = 1; > > module_param(npt, int, S_IRUGO); > > -static int nested = 1; > -module_param(nested, int, S_IRUGO); > - > static void svm_flush_tlb(struct kvm_vcpu *vcpu); > static void svm_complete_interrupts(struct vcpu_svm *svm); > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 78101dd..abba325 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -67,6 +67,11 @@ struct vmcs { > char data[0]; > }; > > +struct nested_vmx { > + /* Has the level1 guest done vmon? */ > + bool vmon; > +}; > vmxon > @@ -967,6 +975,69 @@ static void guest_write_tsc(u64 guest_tsc, u64 host_tsc) > } > > /* > + * Handles msr read for nested virtualization > + */ > +static int nested_vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, > + u64 *pdata) > +{ > + u32 vmx_msr_low = 0, vmx_msr_high = 0; > + > + switch (msr_index) { > + case MSR_IA32_FEATURE_CONTROL: > + *pdata = 0; > + break; > + case MSR_IA32_VMX_BASIC: > + rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); > Use rdmsrl, it's easier. I think we need to mask it with the capabilities we support. Otherwise the guest can try to use some new feature which we don't support yet, and crash. > + *pdata = vmx_msr_low | ((u64)vmx_msr_high<< 32); > + break; > + case MSR_IA32_VMX_PINBASED_CTLS: > + *pdata = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | > + PIN_BASED_VIRTUAL_NMIS; > Need to mask with actual cpu capabilities in case we run on an older cpu. > + break; > + case MSR_IA32_VMX_PROCBASED_CTLS: > + *pdata = CPU_BASED_HLT_EXITING | > +#ifdef CONFIG_X86_64 > + CPU_BASED_CR8_LOAD_EXITING | > + CPU_BASED_CR8_STORE_EXITING | > +#endif > + CPU_BASED_CR3_LOAD_EXITING | > + CPU_BASED_CR3_STORE_EXITING | > + CPU_BASED_USE_IO_BITMAPS | > + CPU_BASED_MOV_DR_EXITING | > + CPU_BASED_USE_TSC_OFFSETING | > + CPU_BASED_INVLPG_EXITING; > Same here... or are all these guaranteed to be present? > + > +static int handle_vmon(struct kvm_vcpu *vcpu) > +{ > + struct kvm_segment cs; > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + > + if (!nested) { > + printk(KERN_DEBUG "%s: nested vmx not enabled\n", __func__); > + kvm_queue_exception(vcpu, UD_VECTOR); > + return 1; > + } > + > + vmx_get_segment(vcpu,&cs, VCPU_SREG_CS); > + > + if (!(vcpu->arch.cr4& X86_CR4_VMXE) || > + !(vcpu->arch.cr0& X86_CR0_PE) || > + (vmx_get_rflags(vcpu)& X86_EFLAGS_VM) || > + ((find_msr_entry(to_vmx(vcpu), > + MSR_EFER)->data& EFER_LMA)&& !cs.l)) { > Not everyone has EFER. Better to wrap this in an #ifdef CONFIG_X86_64.
Avi Kivity <avi@redhat.com> wrote on 02/09/2009 22:34:58: > From: > > Avi Kivity <avi@redhat.com> > > To: > > Orit Wasserman/Haifa/IBM@IBMIL > > Cc: > > kvm@vger.kernel.org, Ben-Ami Yassour1/Haifa/IBM@IBMIL, Muli Ben- > Yehuda/Haifa/IBM@IBMIL, Abel Gordon/Haifa/IBM@IBMIL, > aliguori@us.ibm.com, mmday@us.ibm.com > > Date: > > 02/09/2009 22:34 > > Subject: > > Re: [PATCH 1/6] Nested VMX patch 1 implements vmon and vmoff > > On 09/02/2009 06:38 PM, oritw@il.ibm.com wrote: > > From: Orit Wasserman<oritw@il.ibm.com> > > > > --- > > arch/x86/kvm/svm.c | 3 - > > arch/x86/kvm/vmx.c | 187 ++++++++++++++++++++++++++++++++++++++ > +++++++++++++- > > arch/x86/kvm/x86.c | 6 ++- > > arch/x86/kvm/x86.h | 2 + > > 4 files changed, 192 insertions(+), 6 deletions(-) > > > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > > index 2df9b45..3c1f22a 100644 > > --- a/arch/x86/kvm/svm.c > > +++ b/arch/x86/kvm/svm.c > > @@ -124,9 +124,6 @@ static int npt = 1; > > > > module_param(npt, int, S_IRUGO); > > > > -static int nested = 1; > > -module_param(nested, int, S_IRUGO); > > - > > static void svm_flush_tlb(struct kvm_vcpu *vcpu); > > static void svm_complete_interrupts(struct vcpu_svm *svm); > > > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > > index 78101dd..abba325 100644 > > --- a/arch/x86/kvm/vmx.c > > +++ b/arch/x86/kvm/vmx.c > > @@ -67,6 +67,11 @@ struct vmcs { > > char data[0]; > > }; > > > > +struct nested_vmx { > > + /* Has the level1 guest done vmon? */ > > + bool vmon; > > +}; > > > > vmxon fixed > > > @@ -967,6 +975,69 @@ static void guest_write_tsc(u64 guest_tsc, > u64 host_tsc) > > } > > > > /* > > + * Handles msr read for nested virtualization > > + */ > > +static int nested_vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, > > + u64 *pdata) > > +{ > > + u32 vmx_msr_low = 0, vmx_msr_high = 0; > > + > > + switch (msr_index) { > > + case MSR_IA32_FEATURE_CONTROL: > > + *pdata = 0; > > + break; > > + case MSR_IA32_VMX_BASIC: > > + rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); > > > > Use rdmsrl, it's easier. fixed > > I think we need to mask it with the capabilities we support. Otherwise > the guest can try to use some new feature which we don't support yet, > and crash. I agree , but I went over the Intel spec and didn't find any problematic feature. We may need to consider it in the future. > > > + *pdata = vmx_msr_low | ((u64)vmx_msr_high<< 32); > > + break; > > + case MSR_IA32_VMX_PINBASED_CTLS: > > + *pdata = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | > > + PIN_BASED_VIRTUAL_NMIS; > > > > Need to mask with actual cpu capabilities in case we run on an older cpu. fixed > > > + break; > > + case MSR_IA32_VMX_PROCBASED_CTLS: > > + *pdata = CPU_BASED_HLT_EXITING | > > +#ifdef CONFIG_X86_64 > > + CPU_BASED_CR8_LOAD_EXITING | > > + CPU_BASED_CR8_STORE_EXITING | > > +#endif > > + CPU_BASED_CR3_LOAD_EXITING | > > + CPU_BASED_CR3_STORE_EXITING | > > + CPU_BASED_USE_IO_BITMAPS | > > + CPU_BASED_MOV_DR_EXITING | > > + CPU_BASED_USE_TSC_OFFSETING | > > + CPU_BASED_INVLPG_EXITING; > > > > Same here... or are all these guaranteed to be present? fixed > > > + > > +static int handle_vmon(struct kvm_vcpu *vcpu) > > +{ > > + struct kvm_segment cs; > > + struct vcpu_vmx *vmx = to_vmx(vcpu); > > + > > + if (!nested) { > > + printk(KERN_DEBUG "%s: nested vmx not enabled\n", __func__); > > + kvm_queue_exception(vcpu, UD_VECTOR); > > + return 1; > > + } > > + > > + vmx_get_segment(vcpu,&cs, VCPU_SREG_CS); > > + > > + if (!(vcpu->arch.cr4& X86_CR4_VMXE) || > > + !(vcpu->arch.cr0& X86_CR0_PE) || > > + (vmx_get_rflags(vcpu)& X86_EFLAGS_VM) || > > + ((find_msr_entry(to_vmx(vcpu), > > + MSR_EFER)->data& EFER_LMA)&& !cs.l)) { > > > > Not everyone has EFER. Better to wrap this in an #ifdef CONFIG_X86_64. fixed > > -- > I have a truly marvellous patch that fixes the bug which this > signature is too narrow to contain. > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 09/03/2009 03:34 PM, Orit Wasserman wrote: > >> I think we need to mask it with the capabilities we support. Otherwise >> the guest can try to use some new feature which we don't support yet, >> and crash. >> > I agree , but I went over the Intel spec and didn't find any problematic > feature. > We may need to consider it in the future. > We need to do it, since we don't know anything about future processors.
Avi Kivity <avi@redhat.com> wrote on 03/09/2009 16:39:09: > From: > > Avi Kivity <avi@redhat.com> > > To: > > Orit Wasserman/Haifa/IBM@IBMIL > > Cc: > > Abel Gordon/Haifa/IBM@IBMIL, aliguori@us.ibm.com, Ben-Ami Yassour1/ > Haifa/IBM@IBMIL, kvm@vger.kernel.org, mmday@us.ibm.com, Muli Ben- > Yehuda/Haifa/IBM@IBMIL > > Date: > > 03/09/2009 16:39 > > Subject: > > Re: [PATCH 1/6] Nested VMX patch 1 implements vmon and vmoff > > On 09/03/2009 03:34 PM, Orit Wasserman wrote: > > > >> I think we need to mask it with the capabilities we support. Otherwise > >> the guest can try to use some new feature which we don't support yet, > >> and crash. > >> > > I agree , but I went over the Intel spec and didn't find any problematic > > feature. > > We may need to consider it in the future. > > > > We need to do it, since we don't know anything about future processors. OK > > -- > error compiling committee.c: too many arguments to function > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2df9b45..3c1f22a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -124,9 +124,6 @@ static int npt = 1; module_param(npt, int, S_IRUGO); -static int nested = 1; -module_param(nested, int, S_IRUGO); - static void svm_flush_tlb(struct kvm_vcpu *vcpu); static void svm_complete_interrupts(struct vcpu_svm *svm); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 78101dd..abba325 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -67,6 +67,11 @@ struct vmcs { char data[0]; }; +struct nested_vmx { + /* Has the level1 guest done vmon? */ + bool vmon; +}; + struct vcpu_vmx { struct kvm_vcpu vcpu; struct list_head local_vcpus_link; @@ -114,6 +119,9 @@ struct vcpu_vmx { ktime_t entry_time; s64 vnmi_blocked_time; u32 exit_reason; + + /* Nested vmx */ + struct nested_vmx nested; }; static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) @@ -967,6 +975,69 @@ static void guest_write_tsc(u64 guest_tsc, u64 host_tsc) } /* + * Handles msr read for nested virtualization + */ +static int nested_vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, + u64 *pdata) +{ + u32 vmx_msr_low = 0, vmx_msr_high = 0; + + switch (msr_index) { + case MSR_IA32_FEATURE_CONTROL: + *pdata = 0; + break; + case MSR_IA32_VMX_BASIC: + rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); + *pdata = vmx_msr_low | ((u64)vmx_msr_high << 32); + break; + case MSR_IA32_VMX_PINBASED_CTLS: + *pdata = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | + PIN_BASED_VIRTUAL_NMIS; + break; + case MSR_IA32_VMX_PROCBASED_CTLS: + *pdata = CPU_BASED_HLT_EXITING | +#ifdef CONFIG_X86_64 + CPU_BASED_CR8_LOAD_EXITING | + CPU_BASED_CR8_STORE_EXITING | +#endif + CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING | + CPU_BASED_USE_IO_BITMAPS | + CPU_BASED_MOV_DR_EXITING | + CPU_BASED_USE_TSC_OFFSETING | + CPU_BASED_INVLPG_EXITING; + + if (cpu_has_secondary_exec_ctrls()) + *pdata |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + + if (vm_need_tpr_shadow(vcpu->kvm)) + *pdata |= CPU_BASED_TPR_SHADOW; + break; + case MSR_IA32_VMX_EXIT_CTLS: + *pdata = 0; +#ifdef CONFIG_X86_64 + *pdata |= VM_EXIT_HOST_ADDR_SPACE_SIZE; +#endif + break; + case MSR_IA32_VMX_ENTRY_CTLS: + *pdata = 0; + break; + case MSR_IA32_VMX_PROCBASED_CTLS2: + *pdata = 0; + if (vm_need_virtualize_apic_accesses(vcpu->kvm)) + *pdata |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + break; + case MSR_IA32_VMX_EPT_VPID_CAP: + *pdata = 0; + break; + default: + return 1; + } + + return 0; +} + +/* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. @@ -1005,6 +1076,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) data = vmcs_readl(GUEST_SYSENTER_ESP); break; default: + if (nested && + !nested_vmx_get_msr(vcpu, msr_index, &data)) + break; vmx_load_host_state(to_vmx(vcpu)); msr = find_msr_entry(to_vmx(vcpu), msr_index); if (msr) { @@ -1019,6 +1093,27 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) } /* + * Writes msr value for nested virtualization + * Returns 0 on success, non-0 otherwise. + */ +static int nested_vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) +{ + switch (msr_index) { + case MSR_IA32_FEATURE_CONTROL: + if ((data & (FEATURE_CONTROL_LOCKED | + FEATURE_CONTROL_VMXON_ENABLED)) + != (FEATURE_CONTROL_LOCKED | + FEATURE_CONTROL_VMXON_ENABLED)) + return 1; + break; + default: + return 1; + } + + return 0; +} + +/* * Writes msr value into into the appropriate "register". * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. @@ -1064,6 +1159,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) } /* Otherwise falls through to kvm_set_msr_common */ default: + if (nested && + !nested_vmx_set_msr(vcpu, msr_index, data)) + break; vmx_load_host_state(vmx); msr = find_msr_entry(vmx, msr_index); if (msr) { @@ -3095,12 +3193,97 @@ static int handle_vmcall(struct kvm_vcpu *vcpu) return 1; } +/* + * Check to see if vcpu can execute vmx command + * Inject the corrseponding exception + */ +static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) +{ + struct kvm_segment cs; + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_msr_entry *msr; + + vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); + + if (!vmx->nested.vmon) { + printk(KERN_DEBUG "%s: vmx not on\n", __func__); + kvm_queue_exception(vcpu, UD_VECTOR); + return 0; + } + + msr = find_msr_entry(vmx, MSR_EFER); + + if ((vmx_get_rflags(vcpu) & X86_EFLAGS_VM) || + ((msr->data & EFER_LMA) && !cs.l)) { + printk(KERN_DEBUG "%s: invalid mode cs.l %d lma %llu\n", + __func__, cs.l, msr->data & EFER_LMA); + kvm_queue_exception(vcpu, UD_VECTOR); + return 0; + } + + if (vmx_get_cpl(vcpu)) { + kvm_inject_gp(vcpu, 0); + return 0; + } + + return 1; +} + + static int handle_vmx_insn(struct kvm_vcpu *vcpu) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; } +static int handle_vmoff(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!nested_vmx_check_permission(vcpu)) + return 1; + + vmx->nested.vmon = 0; + + skip_emulated_instruction(vcpu); + return 1; +} + +static int handle_vmon(struct kvm_vcpu *vcpu) +{ + struct kvm_segment cs; + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!nested) { + printk(KERN_DEBUG "%s: nested vmx not enabled\n", __func__); + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); + + if (!(vcpu->arch.cr4 & X86_CR4_VMXE) || + !(vcpu->arch.cr0 & X86_CR0_PE) || + (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) || + ((find_msr_entry(to_vmx(vcpu), + MSR_EFER)->data & EFER_LMA) && !cs.l)) { + kvm_queue_exception(vcpu, UD_VECTOR); + printk(KERN_INFO "%s invalid register state\n", __func__); + return 1; + } + + if (vmx_get_cpl(vcpu)) { + printk(KERN_INFO "%s no permission\n", __func__); + kvm_inject_gp(vcpu, 0); + return 1; + } + + vmx->nested.vmon = 1; + + skip_emulated_instruction(vcpu); + return 1; +} + static int handle_invlpg(struct kvm_vcpu *vcpu) { unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); @@ -3376,8 +3559,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_VMREAD] = handle_vmx_insn, [EXIT_REASON_VMRESUME] = handle_vmx_insn, [EXIT_REASON_VMWRITE] = handle_vmx_insn, - [EXIT_REASON_VMOFF] = handle_vmx_insn, - [EXIT_REASON_VMON] = handle_vmx_insn, + [EXIT_REASON_VMOFF] = handle_vmoff, + [EXIT_REASON_VMON] = handle_vmon, [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, [EXIT_REASON_APIC_ACCESS] = handle_apic_access, [EXIT_REASON_WBINVD] = handle_wbinvd, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8b3a169..9c39092 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -87,6 +87,10 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); int ignore_msrs = 0; module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); +int nested = 1; +EXPORT_SYMBOL_GPL(nested); +module_param(nested, int, S_IRUGO); + struct kvm_stats_debugfs_item debugfs_entries[] = { { "pf_fixed", VCPU_STAT(pf_fixed) }, { "pf_guest", VCPU_STAT(pf_guest) }, @@ -373,7 +377,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return; } - if (cr4 & X86_CR4_VMXE) { + if (cr4 & X86_CR4_VMXE && !nested) { printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n"); kvm_inject_gp(vcpu, 0); return; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 5eadea5..57204cb 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -35,4 +35,6 @@ static inline bool kvm_exception_is_soft(unsigned int nr) struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, u32 function, u32 index); +extern int nested; + #endif
From: Orit Wasserman <oritw@il.ibm.com> --- arch/x86/kvm/svm.c | 3 - arch/x86/kvm/vmx.c | 187 +++++++++++++++++++++++++++++++++++++++++++++++++++- arch/x86/kvm/x86.c | 6 ++- arch/x86/kvm/x86.h | 2 + 4 files changed, 192 insertions(+), 6 deletions(-)