Message ID | fc5e111e0a4eda0e6ea1ee3923327384906aff36.1581555616.git.ashish.kalra@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | SEV Live Migration Patchset. | expand |
On Wed, Feb 12, 2020 at 5:17 PM Ashish Kalra <Ashish.Kalra@amd.com> wrote: > +static int sev_resize_page_enc_bitmap(struct kvm *kvm, unsigned long new_size) > +{ > + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > + unsigned long *map; > + unsigned long sz; > + > + if (sev->page_enc_bmap_size >= new_size) > + return 0; > + > + sz = ALIGN(new_size, BITS_PER_LONG) / 8; > + > + map = vmalloc(sz); > + if (!map) { > + pr_err_once("Failed to allocate encrypted bitmap size %lx\n", > + sz); > + return -ENOMEM; > + } > + > + /* mark the page encrypted (by default) */ > + memset(map, 0xff, sz); > + > + bitmap_copy(map, sev->page_enc_bmap, sev->page_enc_bmap_size); Personally, I would do the arithmetic and swap the `memset(map, 0xff, sz);` for `memset(map + sev->page_enc_bmap_size, 0xff, sz - sev->page_enc_bmap_size);`, but gcc might be smart enough to do this for you. > + kvfree(sev->page_enc_bmap); > + > + sev->page_enc_bmap = map; > + sev->page_enc_bmap_size = new_size; > + > + return 0; > +} > + > +static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, > + unsigned long npages, unsigned long enc) > +{ > + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > + gfn_t gfn_start, gfn_end; > + int ret; > + > + if (!sev_guest(kvm)) > + return -EINVAL; > + > + if (!npages) > + return 0; > + > + gfn_start = gpa_to_gfn(gpa); > + gfn_end = gfn_start + npages; > + > + /* out of bound access error check */ > + if (gfn_end <= gfn_start) > + return -EINVAL; > + > + /* lets make sure that gpa exist in our memslot */ > + pfn_start = gfn_to_pfn(kvm, gfn_start); > + pfn_end = gfn_to_pfn(kvm, gfn_end); I believe these functions assume as_id==0, which is probably fine in practice. If one were to want to migrate a VM with SMM support (which I believe is the only current usage of non-zero as_ids), it feels like SMM would need to be in control of its own c-bit tracking, but that doesn't seem super feasible (otherwise the guest kernel could corrupt SMM by passing invalid c-bit statuses). I'm not certain anyone wants SMM with SEV anyway? > + > + if (is_error_noslot_pfn(pfn_start) && !is_noslot_pfn(pfn_start)) { > + /* > + * Allow guest MMIO range(s) to be added > + * to the page encryption bitmap. > + */ > + return -EINVAL; > + } > + > + if (is_error_noslot_pfn(pfn_end) && !is_noslot_pfn(pfn_end)) { > + /* > + * Allow guest MMIO range(s) to be added > + * to the page encryption bitmap. > + */ > + return -EINVAL; > + } > + > + mutex_lock(&kvm->lock); > + ret = sev_resize_page_enc_bitmap(kvm, gfn_end); > + if (ret) > + goto unlock; > + > + if (enc) > + __bitmap_set(sev->page_enc_bmap, gfn_start, > + gfn_end - gfn_start); > + else > + __bitmap_clear(sev->page_enc_bmap, gfn_start, > + gfn_end - gfn_start); > + > +unlock: > + mutex_unlock(&kvm->lock); > + return ret; > +} > + > static int svm_mem_enc_op(struct kvm *kvm, void __user *argp) > { > struct kvm_sev_cmd sev_cmd; > @@ -7972,6 +8064,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { > .need_emulation_on_page_fault = svm_need_emulation_on_page_fault, > > .apic_init_signal_blocked = svm_apic_init_signal_blocked, > + > + .page_enc_status_hc = svm_page_enc_status_hc, > }; > > static int __init svm_init(void) > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 9a6664886f2e..7963f2979fdf 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -7879,6 +7879,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { > .nested_get_evmcs_version = NULL, > .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault, > .apic_init_signal_blocked = vmx_apic_init_signal_blocked, > + .page_enc_status_hc = NULL, > }; > > static void vmx_cleanup_l1d_flush(void) > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index fbabb2f06273..298627fa3d39 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -7547,6 +7547,12 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) > kvm_sched_yield(vcpu->kvm, a0); > ret = 0; > break; > + case KVM_HC_PAGE_ENC_STATUS: > + ret = -KVM_ENOSYS; > + if (kvm_x86_ops->page_enc_status_hc) > + ret = kvm_x86_ops->page_enc_status_hc(vcpu->kvm, > + a0, a1, a2); > + break; > default: > ret = -KVM_ENOSYS; > break; Add a cap to kvm_vm_ioctl_enable_cap so that the vmm can configure whether or not this hypercall is offered. Moving to an enable cap would also allow the vmm to pass down the expected size of the c-bit tracking buffer, so that you don't need to handle dynamic resizing in response to guest hypercall, otherwise KVM will sporadically start copying around large buffers when working with large VMs. Stepping back a bit, I'm a little surprised by the fact that you don't treat the c-bit buffers the same way as the dirty tracking buffers and put them alongside the memslots. That's probably more effort, and the strategy of using one large buffer should work fine (assuming you don't need to support non-zero as_ids).
On Wed, Feb 19, 2020 at 06:39:39PM -0800, Steve Rutherford wrote: > On Wed, Feb 12, 2020 at 5:17 PM Ashish Kalra <Ashish.Kalra@amd.com> wrote: > > > > static void vmx_cleanup_l1d_flush(void) > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index fbabb2f06273..298627fa3d39 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -7547,6 +7547,12 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) > > kvm_sched_yield(vcpu->kvm, a0); > > ret = 0; > > break; > > + case KVM_HC_PAGE_ENC_STATUS: > > + ret = -KVM_ENOSYS; > > + if (kvm_x86_ops->page_enc_status_hc) > > + ret = kvm_x86_ops->page_enc_status_hc(vcpu->kvm, > > + a0, a1, a2); > > + break; > > default: > > ret = -KVM_ENOSYS; > > break; > Add a cap to kvm_vm_ioctl_enable_cap so that the vmm can configure > whether or not this hypercall is offered. Moving to an enable cap > would also allow the vmm to pass down the expected size of the c-bit > tracking buffer, so that you don't need to handle dynamic resizing in > response to guest hypercall, otherwise KVM will sporadically start > copying around large buffers when working with large VMs. > Yes, that is something we have been looking at adding. But, how will VMM know the expected size of the c-bit tracking buffer ? The guest kernel and firmware make the hypercall to mark page encryption status and depending on the GPA range being marked, the kernel's page encryption bitmap needs to be dynamically resized as response to the guest hypercall. > Stepping back a bit, I'm a little surprised by the fact that you don't > treat the c-bit buffers the same way as the dirty tracking buffers and > put them alongside the memslots. That's probably more effort, and the > strategy of using one large buffer should work fine (assuming you > don't need to support non-zero as_ids). Thanks, Ashish
On Thu, Feb 20, 2020 at 05:28:21AM +0000, Ashish Kalra wrote: > On Wed, Feb 19, 2020 at 06:39:39PM -0800, Steve Rutherford wrote: > > On Wed, Feb 12, 2020 at 5:17 PM Ashish Kalra <Ashish.Kalra@amd.com> wrote: > > > > > > static void vmx_cleanup_l1d_flush(void) > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > > index fbabb2f06273..298627fa3d39 100644 > > > --- a/arch/x86/kvm/x86.c > > > +++ b/arch/x86/kvm/x86.c > > > @@ -7547,6 +7547,12 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) > > > kvm_sched_yield(vcpu->kvm, a0); > > > ret = 0; > > > break; > > > + case KVM_HC_PAGE_ENC_STATUS: > > > + ret = -KVM_ENOSYS; > > > + if (kvm_x86_ops->page_enc_status_hc) > > > + ret = kvm_x86_ops->page_enc_status_hc(vcpu->kvm, > > > + a0, a1, a2); > > > + break; > > > default: > > > ret = -KVM_ENOSYS; > > > break; > > Add a cap to kvm_vm_ioctl_enable_cap so that the vmm can configure > > whether or not this hypercall is offered. Moving to an enable cap > > would also allow the vmm to pass down the expected size of the c-bit > > tracking buffer, so that you don't need to handle dynamic resizing in > > response to guest hypercall, otherwise KVM will sporadically start > > copying around large buffers when working with large VMs. > > > > Yes, that is something we have been looking at adding. > > But, how will VMM know the expected size of the c-bit tracking buffer ? > > The guest kernel and firmware make the hypercall to mark page encryption > status and depending on the GPA range being marked, the kernel's page > encryption bitmap needs to be dynamically resized as response to the guest > hypercall. > Discussed this with Brijesh, though KVM can provide a hint about the expected (max.) size of the c-bit tracking buffer, but there is still an issue for hotplugged guest memory, hence dynamically sized encryption bitmap is probably the right approach. Thanks, Ashish
diff --git a/Documentation/virt/kvm/hypercalls.txt b/Documentation/virt/kvm/hypercalls.txt index 5f6d291bd004..8ff0e4adcb13 100644 --- a/Documentation/virt/kvm/hypercalls.txt +++ b/Documentation/virt/kvm/hypercalls.txt @@ -152,3 +152,17 @@ a0: destination APIC ID Usage example: When sending a call-function IPI-many to vCPUs, yield if any of the IPI target vCPUs was preempted. + +8. KVM_HC_PAGE_ENC_STATUS +------------------------- +Architecture: x86 +Status: active +Purpose: Notify the encryption status changes in guest page table (SEV guest) + +a0: the guest physical address of the start page +a1: the number of pages +a2: encryption attribute + + Where: + * 1: Encryption attribute is set + * 0: Encryption attribute is cleared diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4dffbc10d3f8..4ae7293033b2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1256,6 +1256,8 @@ struct kvm_x86_ops { bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); + int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa, + unsigned long sz, unsigned long mode); }; struct kvm_arch_async_pf { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d86b02bece3a..f09791109075 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -134,6 +134,8 @@ struct kvm_sev_info { int fd; /* SEV device fd */ unsigned long pages_locked; /* Number of pages locked */ struct list_head regions_list; /* List of registered regions */ + unsigned long *page_enc_bmap; + unsigned long page_enc_bmap_size; }; struct kvm_svm { @@ -1992,6 +1994,9 @@ static void sev_vm_destroy(struct kvm *kvm) sev_unbind_asid(kvm, sev->handle); sev_asid_free(sev->asid); + + kvfree(sev->page_enc_bmap); + sev->page_enc_bmap = NULL; } static void avic_vm_destroy(struct kvm *kvm) @@ -7581,6 +7586,93 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) return ret; } +static int sev_resize_page_enc_bitmap(struct kvm *kvm, unsigned long new_size) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + unsigned long *map; + unsigned long sz; + + if (sev->page_enc_bmap_size >= new_size) + return 0; + + sz = ALIGN(new_size, BITS_PER_LONG) / 8; + + map = vmalloc(sz); + if (!map) { + pr_err_once("Failed to allocate encrypted bitmap size %lx\n", + sz); + return -ENOMEM; + } + + /* mark the page encrypted (by default) */ + memset(map, 0xff, sz); + + bitmap_copy(map, sev->page_enc_bmap, sev->page_enc_bmap_size); + kvfree(sev->page_enc_bmap); + + sev->page_enc_bmap = map; + sev->page_enc_bmap_size = new_size; + + return 0; +} + +static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, + unsigned long npages, unsigned long enc) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + gfn_t gfn_start, gfn_end; + int ret; + + if (!sev_guest(kvm)) + return -EINVAL; + + if (!npages) + return 0; + + gfn_start = gpa_to_gfn(gpa); + gfn_end = gfn_start + npages; + + /* out of bound access error check */ + if (gfn_end <= gfn_start) + return -EINVAL; + + /* lets make sure that gpa exist in our memslot */ + pfn_start = gfn_to_pfn(kvm, gfn_start); + pfn_end = gfn_to_pfn(kvm, gfn_end); + + if (is_error_noslot_pfn(pfn_start) && !is_noslot_pfn(pfn_start)) { + /* + * Allow guest MMIO range(s) to be added + * to the page encryption bitmap. + */ + return -EINVAL; + } + + if (is_error_noslot_pfn(pfn_end) && !is_noslot_pfn(pfn_end)) { + /* + * Allow guest MMIO range(s) to be added + * to the page encryption bitmap. + */ + return -EINVAL; + } + + mutex_lock(&kvm->lock); + ret = sev_resize_page_enc_bitmap(kvm, gfn_end); + if (ret) + goto unlock; + + if (enc) + __bitmap_set(sev->page_enc_bmap, gfn_start, + gfn_end - gfn_start); + else + __bitmap_clear(sev->page_enc_bmap, gfn_start, + gfn_end - gfn_start); + +unlock: + mutex_unlock(&kvm->lock); + return ret; +} + static int svm_mem_enc_op(struct kvm *kvm, void __user *argp) { struct kvm_sev_cmd sev_cmd; @@ -7972,6 +8064,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .need_emulation_on_page_fault = svm_need_emulation_on_page_fault, .apic_init_signal_blocked = svm_apic_init_signal_blocked, + + .page_enc_status_hc = svm_page_enc_status_hc, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 9a6664886f2e..7963f2979fdf 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7879,6 +7879,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .nested_get_evmcs_version = NULL, .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault, .apic_init_signal_blocked = vmx_apic_init_signal_blocked, + .page_enc_status_hc = NULL, }; static void vmx_cleanup_l1d_flush(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fbabb2f06273..298627fa3d39 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7547,6 +7547,12 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) kvm_sched_yield(vcpu->kvm, a0); ret = 0; break; + case KVM_HC_PAGE_ENC_STATUS: + ret = -KVM_ENOSYS; + if (kvm_x86_ops->page_enc_status_hc) + ret = kvm_x86_ops->page_enc_status_hc(vcpu->kvm, + a0, a1, a2); + break; default: ret = -KVM_ENOSYS; break; diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h index 8b86609849b9..847b83b75dc8 100644 --- a/include/uapi/linux/kvm_para.h +++ b/include/uapi/linux/kvm_para.h @@ -29,6 +29,7 @@ #define KVM_HC_CLOCK_PAIRING 9 #define KVM_HC_SEND_IPI 10 #define KVM_HC_SCHED_YIELD 11 +#define KVM_HC_PAGE_ENC_STATUS 12 /* * hypercalls use architecture specific