diff mbox series

[v6,4/4] KVM: nSVM: implement on demand allocation of the nested state

Message ID 20200922211025.175547-5-mlevitsk@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM: nSVM: ondemand nested state allocation | expand

Commit Message

Maxim Levitsky Sept. 22, 2020, 9:10 p.m. UTC
This way we don't waste memory on VMs which don't use nesting
virtualization even when the host enabled it for them.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 42 ++++++++++++++++++++++++++++++
 arch/x86/kvm/svm/svm.c    | 55 ++++++++++++++++++++++-----------------
 arch/x86/kvm/svm/svm.h    |  6 +++++
 3 files changed, 79 insertions(+), 24 deletions(-)

Comments

Sean Christopherson Sept. 29, 2020, 5:15 a.m. UTC | #1
On Wed, Sep 23, 2020 at 12:10:25AM +0300, Maxim Levitsky wrote:
> This way we don't waste memory on VMs which don't use nesting
> virtualization even when the host enabled it for them.
> 
> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  arch/x86/kvm/svm/nested.c | 42 ++++++++++++++++++++++++++++++
>  arch/x86/kvm/svm/svm.c    | 55 ++++++++++++++++++++++-----------------
>  arch/x86/kvm/svm/svm.h    |  6 +++++
>  3 files changed, 79 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index 09417f5197410..dd13856818a03 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -467,6 +467,9 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
>  
>  	vmcb12 = map.hva;
>  
> +	if (WARN_ON(!svm->nested.initialized))

Probably should use WARN_ON_ONCE, if this is somehow it, userspace could
easily spam the kernel.

Side topic, do we actually need 'initialized'?  Wouldn't checking for a
valid nested.msrpm or nested.hsave suffice?

> +		return 1;
> +
>  	if (!nested_vmcb_checks(svm, vmcb12)) {
>  		vmcb12->control.exit_code    = SVM_EXIT_ERR;
>  		vmcb12->control.exit_code_hi = 0;
> @@ -684,6 +687,45 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
>  	return 0;
>  }
Maxim Levitsky Sept. 30, 2020, 3:35 p.m. UTC | #2
On Mon, 2020-09-28 at 22:15 -0700, Sean Christopherson wrote:
> On Wed, Sep 23, 2020 at 12:10:25AM +0300, Maxim Levitsky wrote:
> > This way we don't waste memory on VMs which don't use nesting
> > virtualization even when the host enabled it for them.
> > 
> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >  arch/x86/kvm/svm/nested.c | 42 ++++++++++++++++++++++++++++++
> >  arch/x86/kvm/svm/svm.c    | 55 ++++++++++++++++++++++-----------------
> >  arch/x86/kvm/svm/svm.h    |  6 +++++
> >  3 files changed, 79 insertions(+), 24 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> > index 09417f5197410..dd13856818a03 100644
> > --- a/arch/x86/kvm/svm/nested.c
> > +++ b/arch/x86/kvm/svm/nested.c
> > @@ -467,6 +467,9 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
> >  
> >  	vmcb12 = map.hva;
> >  
> > +	if (WARN_ON(!svm->nested.initialized))
> 
> Probably should use WARN_ON_ONCE, if this is somehow it, userspace could
> easily spam the kernel.
Makes sense. 

> 
> Side topic, do we actually need 'initialized'?  Wouldn't checking for a
> valid nested.msrpm or nested.hsave suffice?

It a matter of taste - I prefer to have a single variable controlling this,
rather than two. 
a WARN_ON(svm->nested.initialized && !svm->nested.msrpm || !svm->nested.hsave))
would probably be nice to have. IMHO I rather leave this like it is if you
don't object.

> 
> > +		return 1;
> > +
> >  	if (!nested_vmcb_checks(svm, vmcb12)) {
> >  		vmcb12->control.exit_code    = SVM_EXIT_ERR;
> >  		vmcb12->control.exit_code_hi = 0;
> > @@ -684,6 +687,45 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
> >  	return 0;
> >  }

Best regards,
	Maxim Levitsky
Sean Christopherson Oct. 1, 2020, 12:26 a.m. UTC | #3
On Wed, Sep 30, 2020 at 06:35:40PM +0300, Maxim Levitsky wrote:
> On Mon, 2020-09-28 at 22:15 -0700, Sean Christopherson wrote:
> > Side topic, do we actually need 'initialized'?  Wouldn't checking for a
> > valid nested.msrpm or nested.hsave suffice?
> 
> It a matter of taste - I prefer to have a single variable controlling this,
> rather than two. 
> a WARN_ON(svm->nested.initialized && !svm->nested.msrpm || !svm->nested.hsave))
> would probably be nice to have. IMHO I rather leave this like it is if you
> don't object.

I don't have a strong preference.  I wouldn't bother with the second WARN_ON.
Unless you take action, e.g. bail early, a NULL pointer will likely provide a
stack trace soon enough :-).
diff mbox series

Patch

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 09417f5197410..dd13856818a03 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -467,6 +467,9 @@  int nested_svm_vmrun(struct vcpu_svm *svm)
 
 	vmcb12 = map.hva;
 
+	if (WARN_ON(!svm->nested.initialized))
+		return 1;
+
 	if (!nested_vmcb_checks(svm, vmcb12)) {
 		vmcb12->control.exit_code    = SVM_EXIT_ERR;
 		vmcb12->control.exit_code_hi = 0;
@@ -684,6 +687,45 @@  int nested_svm_vmexit(struct vcpu_svm *svm)
 	return 0;
 }
 
+int svm_allocate_nested(struct vcpu_svm *svm)
+{
+	struct page *hsave_page;
+
+	if (svm->nested.initialized)
+		return 0;
+
+	hsave_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+	if (!hsave_page)
+		return -ENOMEM;
+
+	svm->nested.hsave = page_address(hsave_page);
+
+	svm->nested.msrpm = svm_vcpu_init_msrpm();
+	if (!svm->nested.msrpm)
+		goto err_free_hsave;
+
+	svm->nested.initialized = true;
+	return 0;
+
+err_free_hsave:
+	__free_page(hsave_page);
+	return -ENOMEM;
+}
+
+void svm_free_nested(struct vcpu_svm *svm)
+{
+	if (!svm->nested.initialized)
+		return;
+
+	svm_vcpu_free_msrpm(svm->nested.msrpm);
+	svm->nested.msrpm = NULL;
+
+	__free_page(virt_to_page(svm->nested.hsave));
+	svm->nested.hsave = NULL;
+
+	svm->nested.initialized = false;
+}
+
 /*
  * Forcibly leave nested mode in order to be able to reset the VCPU later on.
  */
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 18f8af55e970a..d1265c95e2b0b 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -266,6 +266,7 @@  static int get_max_npt_level(void)
 int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
+	u64 old_efer = vcpu->arch.efer;
 	vcpu->arch.efer = efer;
 
 	if (!npt_enabled) {
@@ -276,9 +277,27 @@  int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 			efer &= ~EFER_LME;
 	}
 
-	if (!(efer & EFER_SVME)) {
-		svm_leave_nested(svm);
-		svm_set_gif(svm, true);
+	if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) {
+		if (!(efer & EFER_SVME)) {
+			svm_leave_nested(svm);
+			svm_set_gif(svm, true);
+
+			/*
+			 * Free the nested guest state, unless we are in SMM.
+			 * In this case we will return to the nested guest
+			 * as soon as we leave SMM.
+			 */
+			if (!is_smm(&svm->vcpu))
+				svm_free_nested(svm);
+
+		} else {
+			int ret = svm_allocate_nested(svm);
+
+			if (ret) {
+				vcpu->arch.efer = old_efer;
+				return ret;
+			}
+		}
 	}
 
 	svm->vmcb->save.efer = efer | EFER_SVME;
@@ -610,7 +629,7 @@  static void set_msr_interception(u32 *msrpm, unsigned msr,
 	msrpm[offset] = tmp;
 }
 
-static u32 *svm_vcpu_init_msrpm(void)
+u32 *svm_vcpu_init_msrpm(void)
 {
 	int i;
 	u32 *msrpm;
@@ -630,7 +649,7 @@  static u32 *svm_vcpu_init_msrpm(void)
 	return msrpm;
 }
 
-static void svm_vcpu_free_msrpm(u32 *msrpm)
+void svm_vcpu_free_msrpm(u32 *msrpm)
 {
 	__free_pages(virt_to_page(msrpm), MSRPM_ALLOC_ORDER);
 }
@@ -1204,7 +1223,6 @@  static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm;
 	struct page *vmcb_page;
-	struct page *hsave_page;
 	int err;
 
 	BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0);
@@ -1215,13 +1233,9 @@  static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 	if (!vmcb_page)
 		goto out;
 
-	hsave_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
-	if (!hsave_page)
-		goto error_free_vmcb_page;
-
 	err = avic_init_vcpu(svm);
 	if (err)
-		goto error_free_hsave_page;
+		goto out;
 
 	/* We initialize this flag to true to make sure that the is_running
 	 * bit would be set the first time the vcpu is loaded.
@@ -1229,15 +1243,9 @@  static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 	if (irqchip_in_kernel(vcpu->kvm) && kvm_apicv_activated(vcpu->kvm))
 		svm->avic_is_running = true;
 
-	svm->nested.hsave = page_address(hsave_page);
-
 	svm->msrpm = svm_vcpu_init_msrpm();
 	if (!svm->msrpm)
-		goto error_free_hsave_page;
-
-	svm->nested.msrpm = svm_vcpu_init_msrpm();
-	if (!svm->nested.msrpm)
-		goto error_free_msrpm;
+		goto error_free_vmcb_page;
 
 	svm->vmcb = page_address(vmcb_page);
 	svm->vmcb_pa = __sme_set(page_to_pfn(vmcb_page) << PAGE_SHIFT);
@@ -1249,10 +1257,6 @@  static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 
 	return 0;
 
-error_free_msrpm:
-	svm_vcpu_free_msrpm(svm->msrpm);
-error_free_hsave_page:
-	__free_page(hsave_page);
 error_free_vmcb_page:
 	__free_page(vmcb_page);
 out:
@@ -1278,10 +1282,10 @@  static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 	 */
 	svm_clear_current_vmcb(svm->vmcb);
 
+	svm_free_nested(svm);
+
 	__free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
 	__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
-	__free_page(virt_to_page(svm->nested.hsave));
-	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
 }
 
 static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -3964,6 +3968,9 @@  static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
 					 gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
 				return 1;
 
+			if (svm_allocate_nested(svm))
+				return 1;
+
 			ret = enter_svm_guest_mode(svm, vmcb12_gpa, map.hva);
 			kvm_vcpu_unmap(&svm->vcpu, &map, true);
 		}
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 1e1842de0efe7..10453abc5bed3 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -96,6 +96,8 @@  struct svm_nested_state {
 
 	/* cache for control fields of the guest */
 	struct vmcb_control_area ctl;
+
+	bool initialized;
 };
 
 struct vcpu_svm {
@@ -339,6 +341,8 @@  static inline bool gif_set(struct vcpu_svm *svm)
 
 u32 svm_msrpm_offset(u32 msr);
 int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
+u32 *svm_vcpu_init_msrpm(void);
+void svm_vcpu_free_msrpm(u32 *msrpm);
 void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
 void svm_flush_tlb(struct kvm_vcpu *vcpu);
@@ -379,6 +383,8 @@  static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
 int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
 			 struct vmcb *nested_vmcb);
 void svm_leave_nested(struct vcpu_svm *svm);
+void svm_free_nested(struct vcpu_svm *svm);
+int svm_allocate_nested(struct vcpu_svm *svm);
 int nested_svm_vmrun(struct vcpu_svm *svm);
 void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
 int nested_svm_vmexit(struct vcpu_svm *svm);