diff mbox

[18/31] nVMX: Implement VMLAUNCH and VMRESUME

Message ID 201105161953.p4GJr8Jo001858@rice.haifa.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nadav Har'El May 16, 2011, 7:53 p.m. UTC
Implement the VMLAUNCH and VMRESUME instructions, allowing a guest
hypervisor to run its own guests.

This patch does not include some of the necessary validity checks on
vmcs12 fields before the entry. These will appear in a separate patch
below.

Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
---
 arch/x86/kvm/vmx.c |   84 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 82 insertions(+), 2 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Tian, Kevin May 24, 2011, 8:45 a.m. UTC | #1
> From: Nadav Har'El
> Sent: Tuesday, May 17, 2011 3:53 AM
> 
> Implement the VMLAUNCH and VMRESUME instructions, allowing a guest
> hypervisor to run its own guests.
> 
> This patch does not include some of the necessary validity checks on
> vmcs12 fields before the entry. These will appear in a separate patch
> below.
> 
> Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
> ---
>  arch/x86/kvm/vmx.c |   84
> +++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 82 insertions(+), 2 deletions(-)
> 
> --- .before/arch/x86/kvm/vmx.c	2011-05-16 22:36:49.000000000 +0300
> +++ .after/arch/x86/kvm/vmx.c	2011-05-16 22:36:49.000000000 +0300
> @@ -347,6 +347,9 @@ struct nested_vmx {
>  	/* vmcs02_list cache of VMCSs recently used to run L2 guests */
>  	struct list_head vmcs02_pool;
>  	int vmcs02_num;
> +
> +	/* Saving the VMCS that we used for running L1 */
> +	struct saved_vmcs saved_vmcs01;
>  	u64 vmcs01_tsc_offset;
>  	/*
>  	 * Guest pages referred to in vmcs02 with host-physical pointers, so
> @@ -4668,6 +4671,8 @@ static void nested_free_all_saved_vmcss(
>  		kfree(item);
>  	}
>  	vmx->nested.vmcs02_num = 0;
> +	if (is_guest_mode(&vmx->vcpu))
> +		nested_free_saved_vmcs(vmx, &vmx->nested.saved_vmcs01);
>  }
> 
>  /* Get a vmcs02 for the current vmcs12. */
> @@ -4959,6 +4964,21 @@ static int handle_vmclear(struct kvm_vcp
>  	return 1;
>  }
> 
> +static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch);
> +
> +/* Emulate the VMLAUNCH instruction */
> +static int handle_vmlaunch(struct kvm_vcpu *vcpu)
> +{
> +	return nested_vmx_run(vcpu, true);
> +}
> +
> +/* Emulate the VMRESUME instruction */
> +static int handle_vmresume(struct kvm_vcpu *vcpu)
> +{
> +
> +	return nested_vmx_run(vcpu, false);
> +}
> +
>  enum vmcs_field_type {
>  	VMCS_FIELD_TYPE_U16 = 0,
>  	VMCS_FIELD_TYPE_U64 = 1,
> @@ -5239,11 +5259,11 @@ static int (*kvm_vmx_exit_handlers[])(st
>  	[EXIT_REASON_INVLPG]		      = handle_invlpg,
>  	[EXIT_REASON_VMCALL]                  = handle_vmcall,
>  	[EXIT_REASON_VMCLEAR]	              = handle_vmclear,
> -	[EXIT_REASON_VMLAUNCH]                = handle_vmx_insn,
> +	[EXIT_REASON_VMLAUNCH]                = handle_vmlaunch,
>  	[EXIT_REASON_VMPTRLD]                 = handle_vmptrld,
>  	[EXIT_REASON_VMPTRST]                 = handle_vmptrst,
>  	[EXIT_REASON_VMREAD]                  = handle_vmread,
> -	[EXIT_REASON_VMRESUME]                = handle_vmx_insn,
> +	[EXIT_REASON_VMRESUME]                = handle_vmresume,
>  	[EXIT_REASON_VMWRITE]                 = handle_vmwrite,
>  	[EXIT_REASON_VMOFF]                   = handle_vmoff,
>  	[EXIT_REASON_VMON]                    = handle_vmon,
> @@ -6129,6 +6149,66 @@ static void nested_maintain_per_cpu_list
>  	}
>  }
> 
> +/*
> + * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or
> VMRESUME on L1
> + * for running an L2 nested guest.
> + */
> +static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
> +{
> +	struct vmcs12 *vmcs12;
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	int cpu;
> +	struct saved_vmcs *saved_vmcs02;
> +
> +	if (!nested_vmx_check_permission(vcpu))
> +		return 1;
> +	skip_emulated_instruction(vcpu);
> +
> +	vmcs12 = get_vmcs12(vcpu);
> +
> +	enter_guest_mode(vcpu);
> +
> +	vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
> +
> +	/*
> +	 * Switch from L1's VMCS (vmcs01), to L2's VMCS (vmcs02). Remember
> +	 * vmcs01, on which CPU it was last loaded, and whether it was launched
> +	 * (we need all these values next time we will use L1). Then recall
> +	 * these values from the last time vmcs02 was used.
> +	 */
> +	saved_vmcs02 = nested_get_current_vmcs02(vmx);
> +	if (!saved_vmcs02)
> +		return -ENOMEM;
> +
> +	cpu = get_cpu();
> +	vmx->nested.saved_vmcs01.vmcs = vmx->vmcs;
> +	vmx->nested.saved_vmcs01.cpu = vcpu->cpu;
> +	vmx->nested.saved_vmcs01.launched = vmx->launched;
> +	vmx->vmcs = saved_vmcs02->vmcs;
> +	vcpu->cpu = saved_vmcs02->cpu;

this may be another valid reason for your check on cpu_online in your
latest [08/31] local_vcpus_link fix, since cpu may be offlined after
this assignment. :-)

> +	vmx->launched = saved_vmcs02->launched;
> +
> +	nested_maintain_per_cpu_lists(vmx,
> +		saved_vmcs02, &vmx->nested.saved_vmcs01);
> +
> +	vmx_vcpu_put(vcpu);
> +	vmx_vcpu_load(vcpu, cpu);
> +	vcpu->cpu = cpu;
> +	put_cpu();
> +
> +	vmcs12->launch_state = 1;
> +
> +	prepare_vmcs02(vcpu, vmcs12);

Since prepare_vmcs may fail, add a check here and move launch_state
assignment after its success?

Thanks
Kevin

> +
> +	/*
> +	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
> +	 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
> +	 * returned as far as L1 is concerned. It will only return (and set
> +	 * the success flag) when L2 exits (see nested_vmx_vmexit()).
> +	 */
> +	return 1;
> +}
> +
>  static int vmx_check_intercept(struct kvm_vcpu *vcpu,
>  			       struct x86_instruction_info *info,
>  			       enum x86_intercept_stage stage)
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nadav Har'El May 24, 2011, 9:45 a.m. UTC | #2
On Tue, May 24, 2011, Tian, Kevin wrote about "RE: [PATCH 18/31] nVMX: Implement VMLAUNCH and VMRESUME":
> > +	/*
> > +	 * Switch from L1's VMCS (vmcs01), to L2's VMCS (vmcs02). Remember
> > +	 * vmcs01, on which CPU it was last loaded, and whether it was launched
> > +	 * (we need all these values next time we will use L1). Then recall
> > +	 * these values from the last time vmcs02 was used.
> > +	 */
> > +	saved_vmcs02 = nested_get_current_vmcs02(vmx);
> > +	if (!saved_vmcs02)
> > +		return -ENOMEM;
> > +
> > +	cpu = get_cpu();
> > +	vmx->nested.saved_vmcs01.vmcs = vmx->vmcs;
> > +	vmx->nested.saved_vmcs01.cpu = vcpu->cpu;
> > +	vmx->nested.saved_vmcs01.launched = vmx->launched;
> > +	vmx->vmcs = saved_vmcs02->vmcs;
> > +	vcpu->cpu = saved_vmcs02->cpu;
> 
> this may be another valid reason for your check on cpu_online in your
> latest [08/31] local_vcpus_link fix, since cpu may be offlined after
> this assignment. :-)

I believe that wrapping this part of the code with get_cpu()/put_cpu()
protected me from these kinds of race conditions.

By the way, please note that this part of the code was changed after my
latest loaded_vmcs overhaul. It now looks like this:

	vmcs02 = nested_get_current_vmcs02(vmx);
	if (!vmcs02)
		return -ENOMEM;

	cpu = get_cpu();
	vmx->loaded_vmcs = vmcs02;
	vmx_vcpu_put(vcpu);
	vmx_vcpu_load(vcpu, cpu);
	vcpu->cpu = cpu;
	put_cpu();

(if Avi gives me the green light, I'll send the entire, up-to-date, patch set
again).

> > +	vmcs12->launch_state = 1;
> > +
> > +	prepare_vmcs02(vcpu, vmcs12);
> 
> Since prepare_vmcs may fail, add a check here and move launch_state
> assignment after its success?

prepare_vmcs02() cannot fail. All the checks that need to be done on vmcs12
are done before calling it, in nested_vmx_run().

Currently, there's a single case where prepare_vmcs02 "fails" when it fails
to access apic_access_addr memory. This is wrong - the check should have been
done earlier. I'll fix that, and make prepare_vmcs02() void.
Tian, Kevin May 24, 2011, 10:54 a.m. UTC | #3
> From: Nadav Har'El [mailto:nyh@math.technion.ac.il]
> Sent: Tuesday, May 24, 2011 5:45 PM
> 
> On Tue, May 24, 2011, Tian, Kevin wrote about "RE: [PATCH 18/31] nVMX:
> Implement VMLAUNCH and VMRESUME":
> > > +	/*
> > > +	 * Switch from L1's VMCS (vmcs01), to L2's VMCS (vmcs02).
> Remember
> > > +	 * vmcs01, on which CPU it was last loaded, and whether it was
> launched
> > > +	 * (we need all these values next time we will use L1). Then recall
> > > +	 * these values from the last time vmcs02 was used.
> > > +	 */
> > > +	saved_vmcs02 = nested_get_current_vmcs02(vmx);
> > > +	if (!saved_vmcs02)
> > > +		return -ENOMEM;
> > > +
> > > +	cpu = get_cpu();
> > > +	vmx->nested.saved_vmcs01.vmcs = vmx->vmcs;
> > > +	vmx->nested.saved_vmcs01.cpu = vcpu->cpu;
> > > +	vmx->nested.saved_vmcs01.launched = vmx->launched;
> > > +	vmx->vmcs = saved_vmcs02->vmcs;
> > > +	vcpu->cpu = saved_vmcs02->cpu;
> >
> > this may be another valid reason for your check on cpu_online in your
> > latest [08/31] local_vcpus_link fix, since cpu may be offlined after
> > this assignment. :-)
> 
> I believe that wrapping this part of the code with get_cpu()/put_cpu()
> protected me from these kinds of race conditions.

you're right.

> 
> By the way, please note that this part of the code was changed after my
> latest loaded_vmcs overhaul. It now looks like this:
> 
> 	vmcs02 = nested_get_current_vmcs02(vmx);
> 	if (!vmcs02)
> 		return -ENOMEM;
> 
> 	cpu = get_cpu();
> 	vmx->loaded_vmcs = vmcs02;
> 	vmx_vcpu_put(vcpu);
> 	vmx_vcpu_load(vcpu, cpu);
> 	vcpu->cpu = cpu;
> 	put_cpu();
> 
> (if Avi gives me the green light, I'll send the entire, up-to-date, patch set
> again).

Generally your new patch looks good.

> 
> > > +	vmcs12->launch_state = 1;
> > > +
> > > +	prepare_vmcs02(vcpu, vmcs12);
> >
> > Since prepare_vmcs may fail, add a check here and move launch_state
> > assignment after its success?
> 
> prepare_vmcs02() cannot fail. All the checks that need to be done on vmcs12
> are done before calling it, in nested_vmx_run().
> 
> Currently, there's a single case where prepare_vmcs02 "fails" when it fails
> to access apic_access_addr memory. This is wrong - the check should have
> been
> done earlier. I'll fix that, and make prepare_vmcs02() void.
> 

then no problem, as long as you keep this choice clear.

Thanks
Kevin
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tian, Kevin May 25, 2011, 8 a.m. UTC | #4
> From: Nadav Har'El
> Sent: Tuesday, May 17, 2011 3:53 AM
> 
> Implement the VMLAUNCH and VMRESUME instructions, allowing a guest
> hypervisor to run its own guests.
> 
> This patch does not include some of the necessary validity checks on
> vmcs12 fields before the entry. These will appear in a separate patch
> below.
> 
> Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
> ---

[...]

> +static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
> +{
> +	struct vmcs12 *vmcs12;
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	int cpu;
> +	struct saved_vmcs *saved_vmcs02;
> +
> +	if (!nested_vmx_check_permission(vcpu))
> +		return 1;
> +	skip_emulated_instruction(vcpu);
> +
> +	vmcs12 = get_vmcs12(vcpu);
> +
> +	enter_guest_mode(vcpu);
> +
> +	vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
> +
> +	/*
> +	 * Switch from L1's VMCS (vmcs01), to L2's VMCS (vmcs02). Remember
> +	 * vmcs01, on which CPU it was last loaded, and whether it was launched
> +	 * (we need all these values next time we will use L1). Then recall
> +	 * these values from the last time vmcs02 was used.
> +	 */
> +	saved_vmcs02 = nested_get_current_vmcs02(vmx);
> +	if (!saved_vmcs02)
> +		return -ENOMEM;
> +

we shouldn't return error after the guest mode is updated. Or else move
enter_guest_mode to a later place...

Thanks
Kevin
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nadav Har'El May 25, 2011, 1:26 p.m. UTC | #5
On Wed, May 25, 2011, Tian, Kevin wrote about "RE: [PATCH 18/31] nVMX: Implement VMLAUNCH and VMRESUME":
> > +	if (!saved_vmcs02)
> > +		return -ENOMEM;
> > +
> 
> we shouldn't return error after the guest mode is updated. Or else move
> enter_guest_mode to a later place...

I moved things around, but I don't think it matters anyway: If we return
ENOMEM, the KVM ioctl fails, and the whole L1 guest dies - it doesn't matter
at this point if we were in the middle of updating its state.
Tian, Kevin May 26, 2011, 12:42 a.m. UTC | #6
> From: Nadav Har'El [mailto:nyh@math.technion.ac.il]
> Sent: Wednesday, May 25, 2011 9:26 PM
> 
> On Wed, May 25, 2011, Tian, Kevin wrote about "RE: [PATCH 18/31] nVMX:
> Implement VMLAUNCH and VMRESUME":
> > > +	if (!saved_vmcs02)
> > > +		return -ENOMEM;
> > > +
> >
> > we shouldn't return error after the guest mode is updated. Or else move
> > enter_guest_mode to a later place...
> 
> I moved things around, but I don't think it matters anyway: If we return
> ENOMEM, the KVM ioctl fails, and the whole L1 guest dies - it doesn't matter
> at this point if we were in the middle of updating its state.
> 

yes but the code this way is cleaner that guest mode is set only when next
we'll certainly enter L2.

Thanks
Kevin
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

--- .before/arch/x86/kvm/vmx.c	2011-05-16 22:36:49.000000000 +0300
+++ .after/arch/x86/kvm/vmx.c	2011-05-16 22:36:49.000000000 +0300
@@ -347,6 +347,9 @@  struct nested_vmx {
 	/* vmcs02_list cache of VMCSs recently used to run L2 guests */
 	struct list_head vmcs02_pool;
 	int vmcs02_num;
+
+	/* Saving the VMCS that we used for running L1 */
+	struct saved_vmcs saved_vmcs01;
 	u64 vmcs01_tsc_offset;
 	/*
 	 * Guest pages referred to in vmcs02 with host-physical pointers, so
@@ -4668,6 +4671,8 @@  static void nested_free_all_saved_vmcss(
 		kfree(item);
 	}
 	vmx->nested.vmcs02_num = 0;
+	if (is_guest_mode(&vmx->vcpu))
+		nested_free_saved_vmcs(vmx, &vmx->nested.saved_vmcs01);
 }
 
 /* Get a vmcs02 for the current vmcs12. */
@@ -4959,6 +4964,21 @@  static int handle_vmclear(struct kvm_vcp
 	return 1;
 }
 
+static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch);
+
+/* Emulate the VMLAUNCH instruction */
+static int handle_vmlaunch(struct kvm_vcpu *vcpu)
+{
+	return nested_vmx_run(vcpu, true);
+}
+
+/* Emulate the VMRESUME instruction */
+static int handle_vmresume(struct kvm_vcpu *vcpu)
+{
+
+	return nested_vmx_run(vcpu, false);
+}
+
 enum vmcs_field_type {
 	VMCS_FIELD_TYPE_U16 = 0,
 	VMCS_FIELD_TYPE_U64 = 1,
@@ -5239,11 +5259,11 @@  static int (*kvm_vmx_exit_handlers[])(st
 	[EXIT_REASON_INVLPG]		      = handle_invlpg,
 	[EXIT_REASON_VMCALL]                  = handle_vmcall,
 	[EXIT_REASON_VMCLEAR]	              = handle_vmclear,
-	[EXIT_REASON_VMLAUNCH]                = handle_vmx_insn,
+	[EXIT_REASON_VMLAUNCH]                = handle_vmlaunch,
 	[EXIT_REASON_VMPTRLD]                 = handle_vmptrld,
 	[EXIT_REASON_VMPTRST]                 = handle_vmptrst,
 	[EXIT_REASON_VMREAD]                  = handle_vmread,
-	[EXIT_REASON_VMRESUME]                = handle_vmx_insn,
+	[EXIT_REASON_VMRESUME]                = handle_vmresume,
 	[EXIT_REASON_VMWRITE]                 = handle_vmwrite,
 	[EXIT_REASON_VMOFF]                   = handle_vmoff,
 	[EXIT_REASON_VMON]                    = handle_vmon,
@@ -6129,6 +6149,66 @@  static void nested_maintain_per_cpu_list
 	}
 }
 
+/*
+ * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1
+ * for running an L2 nested guest.
+ */
+static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
+{
+	struct vmcs12 *vmcs12;
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	int cpu;
+	struct saved_vmcs *saved_vmcs02;
+
+	if (!nested_vmx_check_permission(vcpu))
+		return 1;
+	skip_emulated_instruction(vcpu);
+
+	vmcs12 = get_vmcs12(vcpu);
+
+	enter_guest_mode(vcpu);
+
+	vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
+
+	/*
+	 * Switch from L1's VMCS (vmcs01), to L2's VMCS (vmcs02). Remember
+	 * vmcs01, on which CPU it was last loaded, and whether it was launched
+	 * (we need all these values next time we will use L1). Then recall
+	 * these values from the last time vmcs02 was used.
+	 */
+	saved_vmcs02 = nested_get_current_vmcs02(vmx);
+	if (!saved_vmcs02)
+		return -ENOMEM;
+
+	cpu = get_cpu();
+	vmx->nested.saved_vmcs01.vmcs = vmx->vmcs;
+	vmx->nested.saved_vmcs01.cpu = vcpu->cpu;
+	vmx->nested.saved_vmcs01.launched = vmx->launched;
+	vmx->vmcs = saved_vmcs02->vmcs;
+	vcpu->cpu = saved_vmcs02->cpu;
+	vmx->launched = saved_vmcs02->launched;
+
+	nested_maintain_per_cpu_lists(vmx,
+		saved_vmcs02, &vmx->nested.saved_vmcs01);
+
+	vmx_vcpu_put(vcpu);
+	vmx_vcpu_load(vcpu, cpu);
+	vcpu->cpu = cpu;
+	put_cpu();
+
+	vmcs12->launch_state = 1;
+
+	prepare_vmcs02(vcpu, vmcs12);
+
+	/*
+	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
+	 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
+	 * returned as far as L1 is concerned. It will only return (and set
+	 * the success flag) when L2 exits (see nested_vmx_vmexit()).
+	 */
+	return 1;
+}
+
 static int vmx_check_intercept(struct kvm_vcpu *vcpu,
 			       struct x86_instruction_info *info,
 			       enum x86_intercept_stage stage)