Message ID | 201105161953.p4GJr8Jo001858@rice.haifa.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
> From: Nadav Har'El > Sent: Tuesday, May 17, 2011 3:53 AM > > Implement the VMLAUNCH and VMRESUME instructions, allowing a guest > hypervisor to run its own guests. > > This patch does not include some of the necessary validity checks on > vmcs12 fields before the entry. These will appear in a separate patch > below. > > Signed-off-by: Nadav Har'El <nyh@il.ibm.com> > --- > arch/x86/kvm/vmx.c | 84 > +++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 82 insertions(+), 2 deletions(-) > > --- .before/arch/x86/kvm/vmx.c 2011-05-16 22:36:49.000000000 +0300 > +++ .after/arch/x86/kvm/vmx.c 2011-05-16 22:36:49.000000000 +0300 > @@ -347,6 +347,9 @@ struct nested_vmx { > /* vmcs02_list cache of VMCSs recently used to run L2 guests */ > struct list_head vmcs02_pool; > int vmcs02_num; > + > + /* Saving the VMCS that we used for running L1 */ > + struct saved_vmcs saved_vmcs01; > u64 vmcs01_tsc_offset; > /* > * Guest pages referred to in vmcs02 with host-physical pointers, so > @@ -4668,6 +4671,8 @@ static void nested_free_all_saved_vmcss( > kfree(item); > } > vmx->nested.vmcs02_num = 0; > + if (is_guest_mode(&vmx->vcpu)) > + nested_free_saved_vmcs(vmx, &vmx->nested.saved_vmcs01); > } > > /* Get a vmcs02 for the current vmcs12. */ > @@ -4959,6 +4964,21 @@ static int handle_vmclear(struct kvm_vcp > return 1; > } > > +static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch); > + > +/* Emulate the VMLAUNCH instruction */ > +static int handle_vmlaunch(struct kvm_vcpu *vcpu) > +{ > + return nested_vmx_run(vcpu, true); > +} > + > +/* Emulate the VMRESUME instruction */ > +static int handle_vmresume(struct kvm_vcpu *vcpu) > +{ > + > + return nested_vmx_run(vcpu, false); > +} > + > enum vmcs_field_type { > VMCS_FIELD_TYPE_U16 = 0, > VMCS_FIELD_TYPE_U64 = 1, > @@ -5239,11 +5259,11 @@ static int (*kvm_vmx_exit_handlers[])(st > [EXIT_REASON_INVLPG] = handle_invlpg, > [EXIT_REASON_VMCALL] = handle_vmcall, > [EXIT_REASON_VMCLEAR] = handle_vmclear, > - [EXIT_REASON_VMLAUNCH] = handle_vmx_insn, > + [EXIT_REASON_VMLAUNCH] = handle_vmlaunch, > [EXIT_REASON_VMPTRLD] = handle_vmptrld, > [EXIT_REASON_VMPTRST] = handle_vmptrst, > [EXIT_REASON_VMREAD] = handle_vmread, > - [EXIT_REASON_VMRESUME] = handle_vmx_insn, > + [EXIT_REASON_VMRESUME] = handle_vmresume, > [EXIT_REASON_VMWRITE] = handle_vmwrite, > [EXIT_REASON_VMOFF] = handle_vmoff, > [EXIT_REASON_VMON] = handle_vmon, > @@ -6129,6 +6149,66 @@ static void nested_maintain_per_cpu_list > } > } > > +/* > + * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or > VMRESUME on L1 > + * for running an L2 nested guest. > + */ > +static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) > +{ > + struct vmcs12 *vmcs12; > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + int cpu; > + struct saved_vmcs *saved_vmcs02; > + > + if (!nested_vmx_check_permission(vcpu)) > + return 1; > + skip_emulated_instruction(vcpu); > + > + vmcs12 = get_vmcs12(vcpu); > + > + enter_guest_mode(vcpu); > + > + vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); > + > + /* > + * Switch from L1's VMCS (vmcs01), to L2's VMCS (vmcs02). Remember > + * vmcs01, on which CPU it was last loaded, and whether it was launched > + * (we need all these values next time we will use L1). Then recall > + * these values from the last time vmcs02 was used. > + */ > + saved_vmcs02 = nested_get_current_vmcs02(vmx); > + if (!saved_vmcs02) > + return -ENOMEM; > + > + cpu = get_cpu(); > + vmx->nested.saved_vmcs01.vmcs = vmx->vmcs; > + vmx->nested.saved_vmcs01.cpu = vcpu->cpu; > + vmx->nested.saved_vmcs01.launched = vmx->launched; > + vmx->vmcs = saved_vmcs02->vmcs; > + vcpu->cpu = saved_vmcs02->cpu; this may be another valid reason for your check on cpu_online in your latest [08/31] local_vcpus_link fix, since cpu may be offlined after this assignment. :-) > + vmx->launched = saved_vmcs02->launched; > + > + nested_maintain_per_cpu_lists(vmx, > + saved_vmcs02, &vmx->nested.saved_vmcs01); > + > + vmx_vcpu_put(vcpu); > + vmx_vcpu_load(vcpu, cpu); > + vcpu->cpu = cpu; > + put_cpu(); > + > + vmcs12->launch_state = 1; > + > + prepare_vmcs02(vcpu, vmcs12); Since prepare_vmcs may fail, add a check here and move launch_state assignment after its success? Thanks Kevin > + > + /* > + * Note no nested_vmx_succeed or nested_vmx_fail here. At this point > + * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet > + * returned as far as L1 is concerned. It will only return (and set > + * the success flag) when L2 exits (see nested_vmx_vmexit()). > + */ > + return 1; > +} > + > static int vmx_check_intercept(struct kvm_vcpu *vcpu, > struct x86_instruction_info *info, > enum x86_intercept_stage stage) > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, May 24, 2011, Tian, Kevin wrote about "RE: [PATCH 18/31] nVMX: Implement VMLAUNCH and VMRESUME": > > + /* > > + * Switch from L1's VMCS (vmcs01), to L2's VMCS (vmcs02). Remember > > + * vmcs01, on which CPU it was last loaded, and whether it was launched > > + * (we need all these values next time we will use L1). Then recall > > + * these values from the last time vmcs02 was used. > > + */ > > + saved_vmcs02 = nested_get_current_vmcs02(vmx); > > + if (!saved_vmcs02) > > + return -ENOMEM; > > + > > + cpu = get_cpu(); > > + vmx->nested.saved_vmcs01.vmcs = vmx->vmcs; > > + vmx->nested.saved_vmcs01.cpu = vcpu->cpu; > > + vmx->nested.saved_vmcs01.launched = vmx->launched; > > + vmx->vmcs = saved_vmcs02->vmcs; > > + vcpu->cpu = saved_vmcs02->cpu; > > this may be another valid reason for your check on cpu_online in your > latest [08/31] local_vcpus_link fix, since cpu may be offlined after > this assignment. :-) I believe that wrapping this part of the code with get_cpu()/put_cpu() protected me from these kinds of race conditions. By the way, please note that this part of the code was changed after my latest loaded_vmcs overhaul. It now looks like this: vmcs02 = nested_get_current_vmcs02(vmx); if (!vmcs02) return -ENOMEM; cpu = get_cpu(); vmx->loaded_vmcs = vmcs02; vmx_vcpu_put(vcpu); vmx_vcpu_load(vcpu, cpu); vcpu->cpu = cpu; put_cpu(); (if Avi gives me the green light, I'll send the entire, up-to-date, patch set again). > > + vmcs12->launch_state = 1; > > + > > + prepare_vmcs02(vcpu, vmcs12); > > Since prepare_vmcs may fail, add a check here and move launch_state > assignment after its success? prepare_vmcs02() cannot fail. All the checks that need to be done on vmcs12 are done before calling it, in nested_vmx_run(). Currently, there's a single case where prepare_vmcs02 "fails" when it fails to access apic_access_addr memory. This is wrong - the check should have been done earlier. I'll fix that, and make prepare_vmcs02() void.
> From: Nadav Har'El [mailto:nyh@math.technion.ac.il] > Sent: Tuesday, May 24, 2011 5:45 PM > > On Tue, May 24, 2011, Tian, Kevin wrote about "RE: [PATCH 18/31] nVMX: > Implement VMLAUNCH and VMRESUME": > > > + /* > > > + * Switch from L1's VMCS (vmcs01), to L2's VMCS (vmcs02). > Remember > > > + * vmcs01, on which CPU it was last loaded, and whether it was > launched > > > + * (we need all these values next time we will use L1). Then recall > > > + * these values from the last time vmcs02 was used. > > > + */ > > > + saved_vmcs02 = nested_get_current_vmcs02(vmx); > > > + if (!saved_vmcs02) > > > + return -ENOMEM; > > > + > > > + cpu = get_cpu(); > > > + vmx->nested.saved_vmcs01.vmcs = vmx->vmcs; > > > + vmx->nested.saved_vmcs01.cpu = vcpu->cpu; > > > + vmx->nested.saved_vmcs01.launched = vmx->launched; > > > + vmx->vmcs = saved_vmcs02->vmcs; > > > + vcpu->cpu = saved_vmcs02->cpu; > > > > this may be another valid reason for your check on cpu_online in your > > latest [08/31] local_vcpus_link fix, since cpu may be offlined after > > this assignment. :-) > > I believe that wrapping this part of the code with get_cpu()/put_cpu() > protected me from these kinds of race conditions. you're right. > > By the way, please note that this part of the code was changed after my > latest loaded_vmcs overhaul. It now looks like this: > > vmcs02 = nested_get_current_vmcs02(vmx); > if (!vmcs02) > return -ENOMEM; > > cpu = get_cpu(); > vmx->loaded_vmcs = vmcs02; > vmx_vcpu_put(vcpu); > vmx_vcpu_load(vcpu, cpu); > vcpu->cpu = cpu; > put_cpu(); > > (if Avi gives me the green light, I'll send the entire, up-to-date, patch set > again). Generally your new patch looks good. > > > > + vmcs12->launch_state = 1; > > > + > > > + prepare_vmcs02(vcpu, vmcs12); > > > > Since prepare_vmcs may fail, add a check here and move launch_state > > assignment after its success? > > prepare_vmcs02() cannot fail. All the checks that need to be done on vmcs12 > are done before calling it, in nested_vmx_run(). > > Currently, there's a single case where prepare_vmcs02 "fails" when it fails > to access apic_access_addr memory. This is wrong - the check should have > been > done earlier. I'll fix that, and make prepare_vmcs02() void. > then no problem, as long as you keep this choice clear. Thanks Kevin -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> From: Nadav Har'El > Sent: Tuesday, May 17, 2011 3:53 AM > > Implement the VMLAUNCH and VMRESUME instructions, allowing a guest > hypervisor to run its own guests. > > This patch does not include some of the necessary validity checks on > vmcs12 fields before the entry. These will appear in a separate patch > below. > > Signed-off-by: Nadav Har'El <nyh@il.ibm.com> > --- [...] > +static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) > +{ > + struct vmcs12 *vmcs12; > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + int cpu; > + struct saved_vmcs *saved_vmcs02; > + > + if (!nested_vmx_check_permission(vcpu)) > + return 1; > + skip_emulated_instruction(vcpu); > + > + vmcs12 = get_vmcs12(vcpu); > + > + enter_guest_mode(vcpu); > + > + vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); > + > + /* > + * Switch from L1's VMCS (vmcs01), to L2's VMCS (vmcs02). Remember > + * vmcs01, on which CPU it was last loaded, and whether it was launched > + * (we need all these values next time we will use L1). Then recall > + * these values from the last time vmcs02 was used. > + */ > + saved_vmcs02 = nested_get_current_vmcs02(vmx); > + if (!saved_vmcs02) > + return -ENOMEM; > + we shouldn't return error after the guest mode is updated. Or else move enter_guest_mode to a later place... Thanks Kevin -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, May 25, 2011, Tian, Kevin wrote about "RE: [PATCH 18/31] nVMX: Implement VMLAUNCH and VMRESUME": > > + if (!saved_vmcs02) > > + return -ENOMEM; > > + > > we shouldn't return error after the guest mode is updated. Or else move > enter_guest_mode to a later place... I moved things around, but I don't think it matters anyway: If we return ENOMEM, the KVM ioctl fails, and the whole L1 guest dies - it doesn't matter at this point if we were in the middle of updating its state.
> From: Nadav Har'El [mailto:nyh@math.technion.ac.il] > Sent: Wednesday, May 25, 2011 9:26 PM > > On Wed, May 25, 2011, Tian, Kevin wrote about "RE: [PATCH 18/31] nVMX: > Implement VMLAUNCH and VMRESUME": > > > + if (!saved_vmcs02) > > > + return -ENOMEM; > > > + > > > > we shouldn't return error after the guest mode is updated. Or else move > > enter_guest_mode to a later place... > > I moved things around, but I don't think it matters anyway: If we return > ENOMEM, the KVM ioctl fails, and the whole L1 guest dies - it doesn't matter > at this point if we were in the middle of updating its state. > yes but the code this way is cleaner that guest mode is set only when next we'll certainly enter L2. Thanks Kevin -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
--- .before/arch/x86/kvm/vmx.c 2011-05-16 22:36:49.000000000 +0300 +++ .after/arch/x86/kvm/vmx.c 2011-05-16 22:36:49.000000000 +0300 @@ -347,6 +347,9 @@ struct nested_vmx { /* vmcs02_list cache of VMCSs recently used to run L2 guests */ struct list_head vmcs02_pool; int vmcs02_num; + + /* Saving the VMCS that we used for running L1 */ + struct saved_vmcs saved_vmcs01; u64 vmcs01_tsc_offset; /* * Guest pages referred to in vmcs02 with host-physical pointers, so @@ -4668,6 +4671,8 @@ static void nested_free_all_saved_vmcss( kfree(item); } vmx->nested.vmcs02_num = 0; + if (is_guest_mode(&vmx->vcpu)) + nested_free_saved_vmcs(vmx, &vmx->nested.saved_vmcs01); } /* Get a vmcs02 for the current vmcs12. */ @@ -4959,6 +4964,21 @@ static int handle_vmclear(struct kvm_vcp return 1; } +static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch); + +/* Emulate the VMLAUNCH instruction */ +static int handle_vmlaunch(struct kvm_vcpu *vcpu) +{ + return nested_vmx_run(vcpu, true); +} + +/* Emulate the VMRESUME instruction */ +static int handle_vmresume(struct kvm_vcpu *vcpu) +{ + + return nested_vmx_run(vcpu, false); +} + enum vmcs_field_type { VMCS_FIELD_TYPE_U16 = 0, VMCS_FIELD_TYPE_U64 = 1, @@ -5239,11 +5259,11 @@ static int (*kvm_vmx_exit_handlers[])(st [EXIT_REASON_INVLPG] = handle_invlpg, [EXIT_REASON_VMCALL] = handle_vmcall, [EXIT_REASON_VMCLEAR] = handle_vmclear, - [EXIT_REASON_VMLAUNCH] = handle_vmx_insn, + [EXIT_REASON_VMLAUNCH] = handle_vmlaunch, [EXIT_REASON_VMPTRLD] = handle_vmptrld, [EXIT_REASON_VMPTRST] = handle_vmptrst, [EXIT_REASON_VMREAD] = handle_vmread, - [EXIT_REASON_VMRESUME] = handle_vmx_insn, + [EXIT_REASON_VMRESUME] = handle_vmresume, [EXIT_REASON_VMWRITE] = handle_vmwrite, [EXIT_REASON_VMOFF] = handle_vmoff, [EXIT_REASON_VMON] = handle_vmon, @@ -6129,6 +6149,66 @@ static void nested_maintain_per_cpu_list } } +/* + * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1 + * for running an L2 nested guest. + */ +static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) +{ + struct vmcs12 *vmcs12; + struct vcpu_vmx *vmx = to_vmx(vcpu); + int cpu; + struct saved_vmcs *saved_vmcs02; + + if (!nested_vmx_check_permission(vcpu)) + return 1; + skip_emulated_instruction(vcpu); + + vmcs12 = get_vmcs12(vcpu); + + enter_guest_mode(vcpu); + + vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); + + /* + * Switch from L1's VMCS (vmcs01), to L2's VMCS (vmcs02). Remember + * vmcs01, on which CPU it was last loaded, and whether it was launched + * (we need all these values next time we will use L1). Then recall + * these values from the last time vmcs02 was used. + */ + saved_vmcs02 = nested_get_current_vmcs02(vmx); + if (!saved_vmcs02) + return -ENOMEM; + + cpu = get_cpu(); + vmx->nested.saved_vmcs01.vmcs = vmx->vmcs; + vmx->nested.saved_vmcs01.cpu = vcpu->cpu; + vmx->nested.saved_vmcs01.launched = vmx->launched; + vmx->vmcs = saved_vmcs02->vmcs; + vcpu->cpu = saved_vmcs02->cpu; + vmx->launched = saved_vmcs02->launched; + + nested_maintain_per_cpu_lists(vmx, + saved_vmcs02, &vmx->nested.saved_vmcs01); + + vmx_vcpu_put(vcpu); + vmx_vcpu_load(vcpu, cpu); + vcpu->cpu = cpu; + put_cpu(); + + vmcs12->launch_state = 1; + + prepare_vmcs02(vcpu, vmcs12); + + /* + * Note no nested_vmx_succeed or nested_vmx_fail here. At this point + * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet + * returned as far as L1 is concerned. It will only return (and set + * the success flag) when L2 exits (see nested_vmx_vmexit()). + */ + return 1; +} + static int vmx_check_intercept(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage)
Implement the VMLAUNCH and VMRESUME instructions, allowing a guest hypervisor to run its own guests. This patch does not include some of the necessary validity checks on vmcs12 fields before the entry. These will appear in a separate patch below. Signed-off-by: Nadav Har'El <nyh@il.ibm.com> --- arch/x86/kvm/vmx.c | 84 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 82 insertions(+), 2 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html