[1/6] Nested VMX patch 1 implements vmon and vmoff

Message ID	1251905916-2834-2-git-send-email-oritw@il.ibm.com (mailing list archive)
State	New, archived
Headers	show Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id n82Fcgix004296 for <patchwork-kvm@patchwork.kernel.org>; Wed, 2 Sep 2009 15:38:42 GMT From: oritw@il.ibm.com To: kvm@vger.kernel.org Cc: oritw@il.ibm.com, benami@il.ibm.com, muli@il.ibm.com, abelg@il.ibm.com, aliguori@us.ibm.com, mmday@us.ibm.com Subject: [PATCH 1/6] Nested VMX patch 1 implements vmon and vmoff Date: Wed, 2 Sep 2009 18:38:31 +0300 Message-Id: <1251905916-2834-2-git-send-email-oritw@il.ibm.com> In-Reply-To: <1251905916-2834-1-git-send-email-oritw@il.ibm.com> References: <1251905916-2834-1-git-send-email-oritw@il.ibm.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk

Message ID

1251905916-2834-2-git-send-email-oritw@il.ibm.com (mailing list archive)

State

New, archived

Headers

From: oritw@il.ibm.com
To: kvm@vger.kernel.org
Cc: oritw@il.ibm.com, benami@il.ibm.com, muli@il.ibm.com, abelg@il.ibm.com, 
	aliguori@us.ibm.com, mmday@us.ibm.com
Subject: [PATCH 1/6] Nested VMX patch 1 implements vmon and vmoff
Date: Wed,  2 Sep 2009 18:38:31 +0300
Message-Id: <1251905916-2834-2-git-send-email-oritw@il.ibm.com>
In-Reply-To: <1251905916-2834-1-git-send-email-oritw@il.ibm.com>
References: <1251905916-2834-1-git-send-email-oritw@il.ibm.com>
Sender: kvm-owner@vger.kernel.org
Precedence: bulk

Commit Message

oritw@il.ibm.com Sept. 2, 2009, 3:38 p.m. UTC

From: Orit Wasserman <oritw@il.ibm.com>

---
 arch/x86/kvm/svm.c |    3 -
 arch/x86/kvm/vmx.c |  187 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 arch/x86/kvm/x86.c |    6 ++-
 arch/x86/kvm/x86.h |    2 +
 4 files changed, 192 insertions(+), 6 deletions(-)

Comments

Avi Kivity Sept. 2, 2009, 7:34 p.m. UTC | #1

On 09/02/2009 06:38 PM, oritw@il.ibm.com wrote:
> From: Orit Wasserman<oritw@il.ibm.com>
>
> ---
>   arch/x86/kvm/svm.c |    3 -
>   arch/x86/kvm/vmx.c |  187 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>   arch/x86/kvm/x86.c |    6 ++-
>   arch/x86/kvm/x86.h |    2 +
>   4 files changed, 192 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 2df9b45..3c1f22a 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -124,9 +124,6 @@ static int npt = 1;
>
>   module_param(npt, int, S_IRUGO);
>
> -static int nested = 1;
> -module_param(nested, int, S_IRUGO);
> -
>   static void svm_flush_tlb(struct kvm_vcpu *vcpu);
>   static void svm_complete_interrupts(struct vcpu_svm *svm);
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 78101dd..abba325 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -67,6 +67,11 @@ struct vmcs {
>   	char data[0];
>   };
>
> +struct nested_vmx {
> +	/* Has the level1 guest done vmon? */
> +	bool vmon;
> +};
>    

vmxon

> @@ -967,6 +975,69 @@ static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
>   }
>
>   /*
> + * Handles msr read for nested virtualization
> + */
> +static int nested_vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index,
> +			      u64 *pdata)
> +{
> +	u32 vmx_msr_low = 0, vmx_msr_high = 0;
> +
> +	switch (msr_index) {
> +	case MSR_IA32_FEATURE_CONTROL:
> +		*pdata = 0;
> +		break;
> +	case MSR_IA32_VMX_BASIC:
> +		rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
>    

Use rdmsrl, it's easier.

I think we need to mask it with the capabilities we support.  Otherwise 
the guest can try to use some new feature which we don't support yet, 
and crash.

> +		*pdata = vmx_msr_low | ((u64)vmx_msr_high<<  32);
> +		break;
> +	case MSR_IA32_VMX_PINBASED_CTLS:
> +		*pdata = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
> +			PIN_BASED_VIRTUAL_NMIS;
>    

Need to mask with actual cpu capabilities in case we run on an older cpu.

> +		break;
> +	case MSR_IA32_VMX_PROCBASED_CTLS:
> +		*pdata =  CPU_BASED_HLT_EXITING |
> +#ifdef CONFIG_X86_64
> +			CPU_BASED_CR8_LOAD_EXITING |
> +			CPU_BASED_CR8_STORE_EXITING |
> +#endif
> +			CPU_BASED_CR3_LOAD_EXITING |
> +			CPU_BASED_CR3_STORE_EXITING |
> +			CPU_BASED_USE_IO_BITMAPS |
> +			CPU_BASED_MOV_DR_EXITING |
> +			CPU_BASED_USE_TSC_OFFSETING |
> +			CPU_BASED_INVLPG_EXITING;
>    

Same here... or are all these guaranteed to be present?

> +
> +static int handle_vmon(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_segment cs;
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> +	if (!nested) {
> +		printk(KERN_DEBUG "%s: nested vmx not enabled\n", __func__);
> +		kvm_queue_exception(vcpu, UD_VECTOR);
> +		return 1;
> +	}
> +
> +	vmx_get_segment(vcpu,&cs, VCPU_SREG_CS);
> +
> +	if (!(vcpu->arch.cr4&  X86_CR4_VMXE) ||
> +	    !(vcpu->arch.cr0&  X86_CR0_PE) ||
> +	    (vmx_get_rflags(vcpu)&  X86_EFLAGS_VM) ||
> +	    ((find_msr_entry(to_vmx(vcpu),
> +			     MSR_EFER)->data&  EFER_LMA)&&  !cs.l)) {
>    

Not everyone has EFER.  Better to wrap this in an #ifdef CONFIG_X86_64.

oritw@il.ibm.com Sept. 3, 2009, 12:34 p.m. UTC | #2

Avi Kivity <avi@redhat.com> wrote on 02/09/2009 22:34:58:

> From:
>
> Avi Kivity <avi@redhat.com>
>
> To:
>
> Orit Wasserman/Haifa/IBM@IBMIL
>
> Cc:
>
> kvm@vger.kernel.org, Ben-Ami Yassour1/Haifa/IBM@IBMIL, Muli Ben-
> Yehuda/Haifa/IBM@IBMIL, Abel Gordon/Haifa/IBM@IBMIL,
> aliguori@us.ibm.com, mmday@us.ibm.com
>
> Date:
>
> 02/09/2009 22:34
>
> Subject:
>
> Re: [PATCH 1/6] Nested VMX patch 1 implements vmon and vmoff
>
> On 09/02/2009 06:38 PM, oritw@il.ibm.com wrote:
> > From: Orit Wasserman<oritw@il.ibm.com>
> >
> > ---
> >   arch/x86/kvm/svm.c |    3 -
> >   arch/x86/kvm/vmx.c |  187 ++++++++++++++++++++++++++++++++++++++
> +++++++++++++-
> >   arch/x86/kvm/x86.c |    6 ++-
> >   arch/x86/kvm/x86.h |    2 +
> >   4 files changed, 192 insertions(+), 6 deletions(-)
> >
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index 2df9b45..3c1f22a 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -124,9 +124,6 @@ static int npt = 1;
> >
> >   module_param(npt, int, S_IRUGO);
> >
> > -static int nested = 1;
> > -module_param(nested, int, S_IRUGO);
> > -
> >   static void svm_flush_tlb(struct kvm_vcpu *vcpu);
> >   static void svm_complete_interrupts(struct vcpu_svm *svm);
> >
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index 78101dd..abba325 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -67,6 +67,11 @@ struct vmcs {
> >      char data[0];
> >   };
> >
> > +struct nested_vmx {
> > +   /* Has the level1 guest done vmon? */
> > +   bool vmon;
> > +};
> >
>
> vmxon
fixed
>
> > @@ -967,6 +975,69 @@ static void guest_write_tsc(u64 guest_tsc,
> u64 host_tsc)
> >   }
> >
> >   /*
> > + * Handles msr read for nested virtualization
> > + */
> > +static int nested_vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index,
> > +               u64 *pdata)
> > +{
> > +   u32 vmx_msr_low = 0, vmx_msr_high = 0;
> > +
> > +   switch (msr_index) {
> > +   case MSR_IA32_FEATURE_CONTROL:
> > +      *pdata = 0;
> > +      break;
> > +   case MSR_IA32_VMX_BASIC:
> > +      rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
> >
>
> Use rdmsrl, it's easier.
fixed
>
> I think we need to mask it with the capabilities we support.  Otherwise
> the guest can try to use some new feature which we don't support yet,
> and crash.
I agree , but I went over the Intel spec and didn't find any problematic
feature.
We may need to consider it in the future.
>
> > +      *pdata = vmx_msr_low | ((u64)vmx_msr_high<<  32);
> > +      break;
> > +   case MSR_IA32_VMX_PINBASED_CTLS:
> > +      *pdata = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
> > +         PIN_BASED_VIRTUAL_NMIS;
> >
>
> Need to mask with actual cpu capabilities in case we run on an older cpu.
fixed
>
> > +      break;
> > +   case MSR_IA32_VMX_PROCBASED_CTLS:
> > +      *pdata =  CPU_BASED_HLT_EXITING |
> > +#ifdef CONFIG_X86_64
> > +         CPU_BASED_CR8_LOAD_EXITING |
> > +         CPU_BASED_CR8_STORE_EXITING |
> > +#endif
> > +         CPU_BASED_CR3_LOAD_EXITING |
> > +         CPU_BASED_CR3_STORE_EXITING |
> > +         CPU_BASED_USE_IO_BITMAPS |
> > +         CPU_BASED_MOV_DR_EXITING |
> > +         CPU_BASED_USE_TSC_OFFSETING |
> > +         CPU_BASED_INVLPG_EXITING;
> >
>
> Same here... or are all these guaranteed to be present?
fixed
>
> > +
> > +static int handle_vmon(struct kvm_vcpu *vcpu)
> > +{
> > +   struct kvm_segment cs;
> > +   struct vcpu_vmx *vmx = to_vmx(vcpu);
> > +
> > +   if (!nested) {
> > +      printk(KERN_DEBUG "%s: nested vmx not enabled\n", __func__);
> > +      kvm_queue_exception(vcpu, UD_VECTOR);
> > +      return 1;
> > +   }
> > +
> > +   vmx_get_segment(vcpu,&cs, VCPU_SREG_CS);
> > +
> > +   if (!(vcpu->arch.cr4&  X86_CR4_VMXE) ||
> > +       !(vcpu->arch.cr0&  X86_CR0_PE) ||
> > +       (vmx_get_rflags(vcpu)&  X86_EFLAGS_VM) ||
> > +       ((find_msr_entry(to_vmx(vcpu),
> > +              MSR_EFER)->data&  EFER_LMA)&&  !cs.l)) {
> >
>
> Not everyone has EFER.  Better to wrap this in an #ifdef CONFIG_X86_64.
fixed
>
> --
> I have a truly marvellous patch that fixes the bug which this
> signature is too narrow to contain.
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Avi Kivity Sept. 3, 2009, 1:39 p.m. UTC | #3

On 09/03/2009 03:34 PM, Orit Wasserman wrote:
>
>> I think we need to mask it with the capabilities we support.  Otherwise
>> the guest can try to use some new feature which we don't support yet,
>> and crash.
>>      
> I agree , but I went over the Intel spec and didn't find any problematic
> feature.
> We may need to consider it in the future.
>    

We need to do it, since we don't know anything about future processors.

oritw@il.ibm.com Sept. 3, 2009, 2:54 p.m. UTC | #4

Avi Kivity <avi@redhat.com> wrote on 03/09/2009 16:39:09:

> From:
>
> Avi Kivity <avi@redhat.com>
>
> To:
>
> Orit Wasserman/Haifa/IBM@IBMIL
>
> Cc:
>
> Abel Gordon/Haifa/IBM@IBMIL, aliguori@us.ibm.com, Ben-Ami Yassour1/
> Haifa/IBM@IBMIL, kvm@vger.kernel.org, mmday@us.ibm.com, Muli Ben-
> Yehuda/Haifa/IBM@IBMIL
>
> Date:
>
> 03/09/2009 16:39
>
> Subject:
>
> Re: [PATCH 1/6] Nested VMX patch 1 implements vmon and vmoff
>
> On 09/03/2009 03:34 PM, Orit Wasserman wrote:
> >
> >> I think we need to mask it with the capabilities we support.
Otherwise
> >> the guest can try to use some new feature which we don't support yet,
> >> and crash.
> >>
> > I agree , but I went over the Intel spec and didn't find any
problematic
> > feature.
> > We may need to consider it in the future.
> >
>
> We need to do it, since we don't know anything about future processors.
OK
>
> --
> error compiling committee.c: too many arguments to function
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2df9b45..3c1f22a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -124,9 +124,6 @@  static int npt = 1;
 
 module_param(npt, int, S_IRUGO);
 
-static int nested = 1;
-module_param(nested, int, S_IRUGO);
-
 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
 static void svm_complete_interrupts(struct vcpu_svm *svm);
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 78101dd..abba325 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -67,6 +67,11 @@  struct vmcs {
 	char data[0];
 };
 
+struct nested_vmx {
+	/* Has the level1 guest done vmon? */
+	bool vmon;
+};
+
 struct vcpu_vmx {
 	struct kvm_vcpu       vcpu;
 	struct list_head      local_vcpus_link;
@@ -114,6 +119,9 @@  struct vcpu_vmx {
 	ktime_t entry_time;
 	s64 vnmi_blocked_time;
 	u32 exit_reason;
+
+	/* Nested vmx */
+	struct nested_vmx nested;
 };
 
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -967,6 +975,69 @@  static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
 }
 
 /*
+ * Handles msr read for nested virtualization
+ */
+static int nested_vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index,
+			      u64 *pdata)
+{
+	u32 vmx_msr_low = 0, vmx_msr_high = 0;
+
+	switch (msr_index) {
+	case MSR_IA32_FEATURE_CONTROL:
+		*pdata = 0;
+		break;
+	case MSR_IA32_VMX_BASIC:
+		rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
+		*pdata = vmx_msr_low | ((u64)vmx_msr_high << 32);
+		break;
+	case MSR_IA32_VMX_PINBASED_CTLS:
+		*pdata = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
+			PIN_BASED_VIRTUAL_NMIS;
+		break;
+	case MSR_IA32_VMX_PROCBASED_CTLS:
+		*pdata =  CPU_BASED_HLT_EXITING |
+#ifdef CONFIG_X86_64
+			CPU_BASED_CR8_LOAD_EXITING |
+			CPU_BASED_CR8_STORE_EXITING |
+#endif
+			CPU_BASED_CR3_LOAD_EXITING |
+			CPU_BASED_CR3_STORE_EXITING |
+			CPU_BASED_USE_IO_BITMAPS |
+			CPU_BASED_MOV_DR_EXITING |
+			CPU_BASED_USE_TSC_OFFSETING |
+			CPU_BASED_INVLPG_EXITING;
+
+		if (cpu_has_secondary_exec_ctrls())
+			*pdata |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+
+		if (vm_need_tpr_shadow(vcpu->kvm))
+			*pdata |= CPU_BASED_TPR_SHADOW;
+		break;
+	case MSR_IA32_VMX_EXIT_CTLS:
+		*pdata = 0;
+#ifdef CONFIG_X86_64
+		*pdata |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
+#endif
+		break;
+	case MSR_IA32_VMX_ENTRY_CTLS:
+		*pdata = 0;
+		break;
+	case MSR_IA32_VMX_PROCBASED_CTLS2:
+		*pdata = 0;
+		if (vm_need_virtualize_apic_accesses(vcpu->kvm))
+			*pdata |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+		break;
+	case MSR_IA32_VMX_EPT_VPID_CAP:
+		*pdata = 0;
+		break;
+	default:
+		return 1;
+	}
+
+	return 0;
+}
+
+/*
  * Reads an msr value (of 'msr_index') into 'pdata'.
  * Returns 0 on success, non-0 otherwise.
  * Assumes vcpu_load() was already called.
@@ -1005,6 +1076,9 @@  static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 		data = vmcs_readl(GUEST_SYSENTER_ESP);
 		break;
 	default:
+		if (nested &&
+		    !nested_vmx_get_msr(vcpu, msr_index, &data))
+			break;
 		vmx_load_host_state(to_vmx(vcpu));
 		msr = find_msr_entry(to_vmx(vcpu), msr_index);
 		if (msr) {
@@ -1019,6 +1093,27 @@  static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 }
 
 /*
+ * Writes msr value for nested virtualization
+ * Returns 0 on success, non-0 otherwise.
+ */
+static int nested_vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
+{
+	switch (msr_index) {
+	case MSR_IA32_FEATURE_CONTROL:
+		if ((data & (FEATURE_CONTROL_LOCKED |
+			     FEATURE_CONTROL_VMXON_ENABLED))
+		    != (FEATURE_CONTROL_LOCKED |
+			FEATURE_CONTROL_VMXON_ENABLED))
+			return 1;
+		break;
+	default:
+		return 1;
+	}
+
+	return 0;
+}
+
+/*
  * Writes msr value into into the appropriate "register".
  * Returns 0 on success, non-0 otherwise.
  * Assumes vcpu_load() was already called.
@@ -1064,6 +1159,9 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 		}
 		/* Otherwise falls through to kvm_set_msr_common */
 	default:
+		if (nested &&
+		    !nested_vmx_set_msr(vcpu, msr_index, data))
+			break;
 		vmx_load_host_state(vmx);
 		msr = find_msr_entry(vmx, msr_index);
 		if (msr) {
@@ -3095,12 +3193,97 @@  static int handle_vmcall(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+/*
+ * Check to see if vcpu can execute vmx command
+ * Inject the corrseponding exception
+ */
+static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
+{
+	struct kvm_segment cs;
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct kvm_msr_entry *msr;
+
+	vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
+
+	if (!vmx->nested.vmon) {
+		printk(KERN_DEBUG "%s: vmx not on\n", __func__);
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 0;
+	}
+
+	msr = find_msr_entry(vmx, MSR_EFER);
+
+	if ((vmx_get_rflags(vcpu) & X86_EFLAGS_VM) ||
+		 ((msr->data & EFER_LMA) && !cs.l)) {
+		printk(KERN_DEBUG "%s: invalid mode cs.l %d lma %llu\n",
+		       __func__, cs.l, msr->data & EFER_LMA);
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 0;
+	}
+
+	if (vmx_get_cpl(vcpu)) {
+		kvm_inject_gp(vcpu, 0);
+		return 0;
+	}
+
+	return 1;
+}
+
+
 static int handle_vmx_insn(struct kvm_vcpu *vcpu)
 {
 	kvm_queue_exception(vcpu, UD_VECTOR);
 	return 1;
 }
 
+static int handle_vmoff(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (!nested_vmx_check_permission(vcpu))
+		return 1;
+
+	vmx->nested.vmon = 0;
+
+	skip_emulated_instruction(vcpu);
+	return 1;
+}
+
+static int handle_vmon(struct kvm_vcpu *vcpu)
+{
+	struct kvm_segment cs;
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (!nested) {
+		printk(KERN_DEBUG "%s: nested vmx not enabled\n", __func__);
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 1;
+	}
+
+	vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
+
+	if (!(vcpu->arch.cr4 & X86_CR4_VMXE) ||
+	    !(vcpu->arch.cr0 & X86_CR0_PE) ||
+	    (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) ||
+	    ((find_msr_entry(to_vmx(vcpu),
+			     MSR_EFER)->data & EFER_LMA) && !cs.l)) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		printk(KERN_INFO "%s invalid register state\n", __func__);
+		return 1;
+	}
+
+	if (vmx_get_cpl(vcpu)) {
+		printk(KERN_INFO "%s no permission\n", __func__);
+		kvm_inject_gp(vcpu, 0);
+		return 1;
+	}
+
+	vmx->nested.vmon = 1;
+
+	skip_emulated_instruction(vcpu);
+	return 1;
+}
+
 static int handle_invlpg(struct kvm_vcpu *vcpu)
 {
 	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -3376,8 +3559,8 @@  static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_VMREAD]                  = handle_vmx_insn,
 	[EXIT_REASON_VMRESUME]                = handle_vmx_insn,
 	[EXIT_REASON_VMWRITE]                 = handle_vmx_insn,
-	[EXIT_REASON_VMOFF]                   = handle_vmx_insn,
-	[EXIT_REASON_VMON]                    = handle_vmx_insn,
+	[EXIT_REASON_VMOFF]                   = handle_vmoff,
+	[EXIT_REASON_VMON]                    = handle_vmon,
 	[EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
 	[EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
 	[EXIT_REASON_WBINVD]                  = handle_wbinvd,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8b3a169..9c39092 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -87,6 +87,10 @@  EXPORT_SYMBOL_GPL(kvm_x86_ops);
 int ignore_msrs = 0;
 module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
 
+int nested = 1;
+EXPORT_SYMBOL_GPL(nested);
+module_param(nested, int, S_IRUGO);
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "pf_fixed", VCPU_STAT(pf_fixed) },
 	{ "pf_guest", VCPU_STAT(pf_guest) },
@@ -373,7 +377,7 @@  void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 		return;
 	}
 
-	if (cr4 & X86_CR4_VMXE) {
+	if (cr4 & X86_CR4_VMXE && !nested) {
 		printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
 		kvm_inject_gp(vcpu, 0);
 		return;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 5eadea5..57204cb 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -35,4 +35,6 @@  static inline bool kvm_exception_is_soft(unsigned int nr)
 struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
                                              u32 function, u32 index);
 
+extern int nested;
+
 #endif

[1/6] Nested VMX patch 1 implements vmon and vmoff

Commit Message

Comments

Patch