diff mbox series

[v8,12/18] KVM: SVM: Add support for static allocation of unified Page Encryption Bitmap.

Message ID 17c14245a404ff679253313ffe899c5f4e966717.1588711355.git.ashish.kalra@amd.com (mailing list archive)
State New, archived
Headers show
Series Add AMD SEV guest live migration support | expand

Commit Message

Kalra, Ashish May 5, 2020, 9:18 p.m. UTC
From: Ashish Kalra <ashish.kalra@amd.com>

Add support for static allocation of the unified Page encryption bitmap by
extending kvm_arch_commit_memory_region() callack to add svm specific x86_ops
which can read the userspace provided memory region/memslots and calculate
the amount of guest RAM managed by the KVM and grow the bitmap based
on that information, i.e. the highest guest PA that is mapped by a memslot.

Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/svm/sev.c          | 35 +++++++++++++++++++++++++++++++++
 arch/x86/kvm/svm/svm.c          |  1 +
 arch/x86/kvm/svm/svm.h          |  1 +
 arch/x86/kvm/x86.c              |  5 +++++
 5 files changed, 43 insertions(+)

Comments

Steve Rutherford May 30, 2020, 2:07 a.m. UTC | #1
On Tue, May 5, 2020 at 2:18 PM Ashish Kalra <Ashish.Kalra@amd.com> wrote:
>
> From: Ashish Kalra <ashish.kalra@amd.com>
>
> Add support for static allocation of the unified Page encryption bitmap by
> extending kvm_arch_commit_memory_region() callack to add svm specific x86_ops
> which can read the userspace provided memory region/memslots and calculate
> the amount of guest RAM managed by the KVM and grow the bitmap based
> on that information, i.e. the highest guest PA that is mapped by a memslot.
>
> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/svm/sev.c          | 35 +++++++++++++++++++++++++++++++++
>  arch/x86/kvm/svm/svm.c          |  1 +
>  arch/x86/kvm/svm/svm.h          |  1 +
>  arch/x86/kvm/x86.c              |  5 +++++
>  5 files changed, 43 insertions(+)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index fc74144d5ab0..b573ea85b57e 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1254,6 +1254,7 @@ struct kvm_x86_ops {
>
>         bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
>         int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
> +       void (*commit_memory_region)(struct kvm *kvm, enum kvm_mr_change change);
>         int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa,
>                                   unsigned long sz, unsigned long mode);
>         int (*get_page_enc_bitmap)(struct kvm *kvm,
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 30efc1068707..c0d7043a0627 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -1377,6 +1377,41 @@ static int sev_resize_page_enc_bitmap(struct kvm *kvm, unsigned long new_size)
>         return 0;
>  }
>
> +void svm_commit_memory_region(struct kvm *kvm, enum kvm_mr_change change)
> +{
> +       struct kvm_memslots *slots;
> +       struct kvm_memory_slot *memslot;
> +       gfn_t start, end = 0;
> +
> +       spin_lock(&kvm->mmu_lock);
> +       if (change == KVM_MR_CREATE) {
> +               slots = kvm_memslots(kvm);
> +               kvm_for_each_memslot(memslot, slots) {
> +                       start = memslot->base_gfn;
> +                       end = memslot->base_gfn + memslot->npages;
> +                       /*
> +                        * KVM memslots is a sorted list, starting with
> +                        * the highest mapped guest PA, so pick the topmost
> +                        * valid guest PA.
> +                        */
> +                       if (memslot->npages)
> +                               break;
> +               }
> +       }
> +       spin_unlock(&kvm->mmu_lock);
> +
> +       if (end) {
> +               /*
> +                * NORE: This callback is invoked in vm ioctl
> +                * set_user_memory_region, hence we can use a
> +                * mutex here.
> +                */
> +               mutex_lock(&kvm->lock);
> +               sev_resize_page_enc_bitmap(kvm, end);
> +               mutex_unlock(&kvm->lock);
> +       }
> +}
> +
>  int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
>                                   unsigned long npages, unsigned long enc)
>  {
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 501e82f5593c..442adbbb0641 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -4015,6 +4015,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
>
>         .check_nested_events = svm_check_nested_events,
>
> +       .commit_memory_region = svm_commit_memory_region,
>         .page_enc_status_hc = svm_page_enc_status_hc,
>         .get_page_enc_bitmap = svm_get_page_enc_bitmap,
>         .set_page_enc_bitmap = svm_set_page_enc_bitmap,
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 2ebdcce50312..fd99e0a5417a 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -406,6 +406,7 @@ int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
>                                   unsigned long npages, unsigned long enc);
>  int svm_get_page_enc_bitmap(struct kvm *kvm, struct kvm_page_enc_bitmap *bmap);
>  int svm_set_page_enc_bitmap(struct kvm *kvm, struct kvm_page_enc_bitmap *bmap);
> +void svm_commit_memory_region(struct kvm *kvm, enum kvm_mr_change change);
>
>  /* avic.c */
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index c4166d7a0493..8938de868d42 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -10133,6 +10133,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
>                 kvm_mmu_change_mmu_pages(kvm,
>                                 kvm_mmu_calculate_default_mmu_pages(kvm));
>
> +       if (change == KVM_MR_CREATE || change == KVM_MR_DELETE) {
> +               if (kvm_x86_ops.commit_memory_region)
> +                       kvm_x86_ops.commit_memory_region(kvm, change);
Why not just call this every time (if it exists) and have the
kvm_x86_op determine if it should do anything?

It seems like it's a nop anyway unless you are doing a create.

> +       }
> +
>         /*
>          * Dirty logging tracks sptes in 4k granularity, meaning that large
>          * sptes have to be split.  If live migration is successful, the guest
> --
> 2.17.1
>
Kalra, Ashish May 30, 2020, 5:49 a.m. UTC | #2
Hello Steve,

On Fri, May 29, 2020 at 07:07:33PM -0700, Steve Rutherford wrote:
> On Tue, May 5, 2020 at 2:18 PM Ashish Kalra <Ashish.Kalra@amd.com> wrote:
> >
> > From: Ashish Kalra <ashish.kalra@amd.com>
> >
> > Add support for static allocation of the unified Page encryption bitmap by
> > extending kvm_arch_commit_memory_region() callack to add svm specific x86_ops
> > which can read the userspace provided memory region/memslots and calculate
> > the amount of guest RAM managed by the KVM and grow the bitmap based
> > on that information, i.e. the highest guest PA that is mapped by a memslot.
> >
> > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> > ---
> >  arch/x86/include/asm/kvm_host.h |  1 +
> >  arch/x86/kvm/svm/sev.c          | 35 +++++++++++++++++++++++++++++++++
> >  arch/x86/kvm/svm/svm.c          |  1 +
> >  arch/x86/kvm/svm/svm.h          |  1 +
> >  arch/x86/kvm/x86.c              |  5 +++++
> >  5 files changed, 43 insertions(+)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index fc74144d5ab0..b573ea85b57e 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1254,6 +1254,7 @@ struct kvm_x86_ops {
> >
> >         bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
> >         int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
> > +       void (*commit_memory_region)(struct kvm *kvm, enum kvm_mr_change change);
> >         int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa,
> >                                   unsigned long sz, unsigned long mode);
> >         int (*get_page_enc_bitmap)(struct kvm *kvm,
> > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> > index 30efc1068707..c0d7043a0627 100644
> > --- a/arch/x86/kvm/svm/sev.c
> > +++ b/arch/x86/kvm/svm/sev.c
> > @@ -1377,6 +1377,41 @@ static int sev_resize_page_enc_bitmap(struct kvm *kvm, unsigned long new_size)
> >         return 0;
> >  }
> >
> > +void svm_commit_memory_region(struct kvm *kvm, enum kvm_mr_change change)
> > +{
> > +       struct kvm_memslots *slots;
> > +       struct kvm_memory_slot *memslot;
> > +       gfn_t start, end = 0;
> > +
> > +       spin_lock(&kvm->mmu_lock);
> > +       if (change == KVM_MR_CREATE) {
> > +               slots = kvm_memslots(kvm);
> > +               kvm_for_each_memslot(memslot, slots) {
> > +                       start = memslot->base_gfn;
> > +                       end = memslot->base_gfn + memslot->npages;
> > +                       /*
> > +                        * KVM memslots is a sorted list, starting with
> > +                        * the highest mapped guest PA, so pick the topmost
> > +                        * valid guest PA.
> > +                        */
> > +                       if (memslot->npages)
> > +                               break;
> > +               }
> > +       }
> > +       spin_unlock(&kvm->mmu_lock);
> > +
> > +       if (end) {
> > +               /*
> > +                * NORE: This callback is invoked in vm ioctl
> > +                * set_user_memory_region, hence we can use a
> > +                * mutex here.
> > +                */
> > +               mutex_lock(&kvm->lock);
> > +               sev_resize_page_enc_bitmap(kvm, end);
> > +               mutex_unlock(&kvm->lock);
> > +       }
> > +}
> > +
> >  int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
> >                                   unsigned long npages, unsigned long enc)
> >  {
> > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> > index 501e82f5593c..442adbbb0641 100644
> > --- a/arch/x86/kvm/svm/svm.c
> > +++ b/arch/x86/kvm/svm/svm.c
> > @@ -4015,6 +4015,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
> >
> >         .check_nested_events = svm_check_nested_events,
> >
> > +       .commit_memory_region = svm_commit_memory_region,
> >         .page_enc_status_hc = svm_page_enc_status_hc,
> >         .get_page_enc_bitmap = svm_get_page_enc_bitmap,
> >         .set_page_enc_bitmap = svm_set_page_enc_bitmap,
> > diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> > index 2ebdcce50312..fd99e0a5417a 100644
> > --- a/arch/x86/kvm/svm/svm.h
> > +++ b/arch/x86/kvm/svm/svm.h
> > @@ -406,6 +406,7 @@ int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
> >                                   unsigned long npages, unsigned long enc);
> >  int svm_get_page_enc_bitmap(struct kvm *kvm, struct kvm_page_enc_bitmap *bmap);
> >  int svm_set_page_enc_bitmap(struct kvm *kvm, struct kvm_page_enc_bitmap *bmap);
> > +void svm_commit_memory_region(struct kvm *kvm, enum kvm_mr_change change);
> >
> >  /* avic.c */
> >
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index c4166d7a0493..8938de868d42 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -10133,6 +10133,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
> >                 kvm_mmu_change_mmu_pages(kvm,
> >                                 kvm_mmu_calculate_default_mmu_pages(kvm));
> >
> > +       if (change == KVM_MR_CREATE || change == KVM_MR_DELETE) {
> > +               if (kvm_x86_ops.commit_memory_region)
> > +                       kvm_x86_ops.commit_memory_region(kvm, change);
> Why not just call this every time (if it exists) and have the
> kvm_x86_op determine if it should do anything?
> 
> It seems like it's a nop anyway unless you are doing a create.
> 

Yes, this makes sense. 

I will call it unconditionally it it exits and let the callback
determine what to do eventually with it.

Thanks,
Ashish

> > +       }
> > +
> >         /*
> >          * Dirty logging tracks sptes in 4k granularity, meaning that large
> >          * sptes have to be split.  If live migration is successful, the guest
> > --
> > 2.17.1
> >
Paolo Bonzini Dec. 4, 2020, 11:08 a.m. UTC | #3
On 05/05/20 23:18, Ashish Kalra wrote:
> Add support for static 
> allocation of the unified Page encryption bitmap by extending 
> kvm_arch_commit_memory_region() callack to add svm specific x86_ops 
> which can read the userspace provided memory region/memslots and 
> calculate the amount of guest RAM managed by the KVM and grow the bitmap 
> based on that information, i.e. the highest guest PA that is mapped by a 
> memslot.

Hi Ashish,

the commit message should explain why this is needed or useful.

Paolo
Kalra, Ashish Dec. 4, 2020, 9:38 p.m. UTC | #4
Hello Paolo,

On Fri, Dec 04, 2020 at 12:08:20PM +0100, Paolo Bonzini wrote:
> On 05/05/20 23:18, Ashish Kalra wrote:
> > Add support for static allocation of the unified Page encryption bitmap
> > by extending kvm_arch_commit_memory_region() callack to add svm specific
> > x86_ops which can read the userspace provided memory region/memslots and
> > calculate the amount of guest RAM managed by the KVM and grow the bitmap
> > based on that information, i.e. the highest guest PA that is mapped by a
> > memslot.
> 
> Hi Ashish,
> 
> the commit message should explain why this is needed or useful.
> 

Earlier we used to dynamic resizing of the page encryption bitmap based
on the guest hypercall, but potentially a malicious guest can make a hypercall
which can trigger a really large memory allocation on the host side and may
eventually cause denial of service.

Hence now we don't do dynamic resizing of the page encryption bitmap as
per the hypercall and allocate it statically based on guest memory 
allocation by walking through memslots and computing it's size.

I will add the above comment to the fresh series of the patch-set i am
going to post. 

Thanks,
Ashish
Paolo Bonzini Dec. 6, 2020, 10:19 a.m. UTC | #5
On 04/12/20 22:38, Ashish Kalra wrote:
> Earlier we used to dynamic resizing of the page encryption bitmap based
> on the guest hypercall, but potentially a malicious guest can make a hypercall
> which can trigger a really large memory allocation on the host side and may
> eventually cause denial of service.
> 
> Hence now we don't do dynamic resizing of the page encryption bitmap as
> per the hypercall and allocate it statically based on guest memory
> allocation by walking through memslots and computing it's size.
> 
> I will add the above comment to the fresh series of the patch-set i am
> going to post.

Sounds good, thanks.  If there are no other changes I can include this 
in the commit message myself.

Paolo
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fc74144d5ab0..b573ea85b57e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1254,6 +1254,7 @@  struct kvm_x86_ops {
 
 	bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
 	int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
+	void (*commit_memory_region)(struct kvm *kvm, enum kvm_mr_change change);
 	int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa,
 				  unsigned long sz, unsigned long mode);
 	int (*get_page_enc_bitmap)(struct kvm *kvm,
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 30efc1068707..c0d7043a0627 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1377,6 +1377,41 @@  static int sev_resize_page_enc_bitmap(struct kvm *kvm, unsigned long new_size)
 	return 0;
 }
 
+void svm_commit_memory_region(struct kvm *kvm, enum kvm_mr_change change)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	gfn_t start, end = 0;
+
+	spin_lock(&kvm->mmu_lock);
+	if (change == KVM_MR_CREATE) {
+		slots = kvm_memslots(kvm);
+		kvm_for_each_memslot(memslot, slots) {
+			start = memslot->base_gfn;
+			end = memslot->base_gfn + memslot->npages;
+			/*
+			 * KVM memslots is a sorted list, starting with
+			 * the highest mapped guest PA, so pick the topmost
+			 * valid guest PA.
+			 */
+			if (memslot->npages)
+				break;
+		}
+	}
+	spin_unlock(&kvm->mmu_lock);
+
+	if (end) {
+		/*
+		 * NORE: This callback is invoked in vm ioctl
+		 * set_user_memory_region, hence we can use a
+		 * mutex here.
+		 */
+		mutex_lock(&kvm->lock);
+		sev_resize_page_enc_bitmap(kvm, end);
+		mutex_unlock(&kvm->lock);
+	}
+}
+
 int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
 				  unsigned long npages, unsigned long enc)
 {
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 501e82f5593c..442adbbb0641 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4015,6 +4015,7 @@  static struct kvm_x86_ops svm_x86_ops __initdata = {
 
 	.check_nested_events = svm_check_nested_events,
 
+	.commit_memory_region = svm_commit_memory_region,
 	.page_enc_status_hc = svm_page_enc_status_hc,
 	.get_page_enc_bitmap = svm_get_page_enc_bitmap,
 	.set_page_enc_bitmap = svm_set_page_enc_bitmap,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 2ebdcce50312..fd99e0a5417a 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -406,6 +406,7 @@  int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa,
 				  unsigned long npages, unsigned long enc);
 int svm_get_page_enc_bitmap(struct kvm *kvm, struct kvm_page_enc_bitmap *bmap);
 int svm_set_page_enc_bitmap(struct kvm *kvm, struct kvm_page_enc_bitmap *bmap);
+void svm_commit_memory_region(struct kvm *kvm, enum kvm_mr_change change);
 
 /* avic.c */
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c4166d7a0493..8938de868d42 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10133,6 +10133,11 @@  void kvm_arch_commit_memory_region(struct kvm *kvm,
 		kvm_mmu_change_mmu_pages(kvm,
 				kvm_mmu_calculate_default_mmu_pages(kvm));
 
+	if (change == KVM_MR_CREATE || change == KVM_MR_DELETE) {
+		if (kvm_x86_ops.commit_memory_region)
+			kvm_x86_ops.commit_memory_region(kvm, change);
+	}
+
 	/*
 	 * Dirty logging tracks sptes in 4k granularity, meaning that large
 	 * sptes have to be split.  If live migration is successful, the guest