Message ID | 5d05799fc61994684aa2b2ddb8c5b326a3279e25.1655761627.git.ashish.kalra@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add AMD Secure Nested Paging (SEV-SNP) | expand |
On Mon, Jun 20, 2022 at 5:13 PM Ashish Kalra <Ashish.Kalra@amd.com> wrote: > > From: Brijesh Singh <brijesh.singh@amd.com> > > Version 2 of GHCB specification added the support for two SNP Guest > Request Message NAE events. The events allows for an SEV-SNP guest to > make request to the SEV-SNP firmware through hypervisor using the > SNP_GUEST_REQUEST API define in the SEV-SNP firmware specification. > > The SNP_EXT_GUEST_REQUEST is similar to SNP_GUEST_REQUEST with the > difference of an additional certificate blob that can be passed through > the SNP_SET_CONFIG ioctl defined in the CCP driver. The CCP driver > provides snp_guest_ext_guest_request() that is used by the KVM to get > both the report and certificate data at once. > > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> > --- > arch/x86/kvm/svm/sev.c | 196 +++++++++++++++++++++++++++++++++++++++-- > arch/x86/kvm/svm/svm.h | 2 + > 2 files changed, 192 insertions(+), 6 deletions(-) > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > index 7fc0fad87054..089af21a4efe 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > @@ -343,6 +343,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) > > spin_lock_init(&sev->psc_lock); > ret = sev_snp_init(&argp->error); > + mutex_init(&sev->guest_req_lock); > } else { > ret = sev_platform_init(&argp->error); > } > @@ -1884,23 +1885,39 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) > > static void *snp_context_create(struct kvm *kvm, struct kvm_sev_cmd *argp) > { > + void *context = NULL, *certs_data = NULL, *resp_page = NULL; Is the NULL setting here unnecessary since all of these are set via functions snp_alloc_firmware_page(), kmalloc(), and snp_alloc_firmware_page() respectively? > + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > struct sev_data_snp_gctx_create data = {}; > - void *context; > int rc; > > + /* Allocate memory used for the certs data in SNP guest request */ > + certs_data = kmalloc(SEV_FW_BLOB_MAX_SIZE, GFP_KERNEL_ACCOUNT); > + if (!certs_data) > + return NULL; I think we want to use kzalloc() here to ensure we never give the guest uninitialized kernel memory. > + > /* Allocate memory for context page */ > context = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT); > if (!context) > - return NULL; > + goto e_free; > + > + /* Allocate a firmware buffer used during the guest command handling. */ > + resp_page = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT); > + if (!resp_page) > + goto e_free; |resp_page| doesn't appear to be used anywhere? > > data.gctx_paddr = __psp_pa(context); > rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_GCTX_CREATE, &data, &argp->error); > - if (rc) { > - snp_free_firmware_page(context); > - return NULL; > - } > + if (rc) > + goto e_free; > + > + sev->snp_certs_data = certs_data; > > return context; > + > +e_free: > + snp_free_firmware_page(context); > + kfree(certs_data); > + return NULL; > } > > static int snp_bind_asid(struct kvm *kvm, int *error) > @@ -2565,6 +2582,8 @@ static int snp_decommission_context(struct kvm *kvm) > snp_free_firmware_page(sev->snp_context); > sev->snp_context = NULL; > > + kfree(sev->snp_certs_data); > + > return 0; > } > > @@ -3077,6 +3096,8 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code) > case SVM_VMGEXIT_UNSUPPORTED_EVENT: > case SVM_VMGEXIT_HV_FEATURES: > case SVM_VMGEXIT_PSC: > + case SVM_VMGEXIT_GUEST_REQUEST: > + case SVM_VMGEXIT_EXT_GUEST_REQUEST: > break; > default: > reason = GHCB_ERR_INVALID_EVENT; > @@ -3502,6 +3523,155 @@ static unsigned long snp_handle_page_state_change(struct vcpu_svm *svm) > return rc ? map_to_psc_vmgexit_code(rc) : 0; > } > > +static unsigned long snp_setup_guest_buf(struct vcpu_svm *svm, > + struct sev_data_snp_guest_request *data, > + gpa_t req_gpa, gpa_t resp_gpa) > +{ > + struct kvm_vcpu *vcpu = &svm->vcpu; > + struct kvm *kvm = vcpu->kvm; > + kvm_pfn_t req_pfn, resp_pfn; > + struct kvm_sev_info *sev; > + > + sev = &to_kvm_svm(kvm)->sev_info; This is normally done at declaration in this file. Why not here? struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > + > + if (!IS_ALIGNED(req_gpa, PAGE_SIZE) || !IS_ALIGNED(resp_gpa, PAGE_SIZE)) > + return SEV_RET_INVALID_PARAM; > + > + req_pfn = gfn_to_pfn(kvm, gpa_to_gfn(req_gpa)); > + if (is_error_noslot_pfn(req_pfn)) > + return SEV_RET_INVALID_ADDRESS; > + > + resp_pfn = gfn_to_pfn(kvm, gpa_to_gfn(resp_gpa)); > + if (is_error_noslot_pfn(resp_pfn)) > + return SEV_RET_INVALID_ADDRESS; > + > + if (rmp_make_private(resp_pfn, 0, PG_LEVEL_4K, 0, true)) > + return SEV_RET_INVALID_ADDRESS; > + > + data->gctx_paddr = __psp_pa(sev->snp_context); > + data->req_paddr = __sme_set(req_pfn << PAGE_SHIFT); > + data->res_paddr = __sme_set(resp_pfn << PAGE_SHIFT); > + > + return 0; > +} > + > +static void snp_cleanup_guest_buf(struct sev_data_snp_guest_request *data, unsigned long *rc) > +{ > + u64 pfn = __sme_clr(data->res_paddr) >> PAGE_SHIFT; > + int ret; > + > + ret = snp_page_reclaim(pfn); > + if (ret) > + *rc = SEV_RET_INVALID_ADDRESS; Do we need a diff error code here? This means the page the guest gives us is now "stuck" in the FW owned state. How would the guest know this is the case? We return the exact same error in snp_setup_guest_buf() if the resp_gpa isn't page aligned so now if the guest ever sees a SEV_RET_INVALID_ADDRESS I think its only safe option is to either try and page_state_change it to a know state or mark it as unusable memory. > + > + ret = rmp_make_shared(pfn, PG_LEVEL_4K); > + if (ret) > + *rc = SEV_RET_INVALID_ADDRESS; Ditto here I think we need some way to signal to the guest what state this page is on return to guest execution. Also these errors shadow over FW successes, this means the guest's guest-request-sequence-numbers are now out of sync meaning this VMPCK is unusable less the guest risk reusing the AES IV (which would break the confidentiality/integrity). Should we have a way to signal to the guest the FW has successfully run your command but we could not change the page states back correctly, so the guest should increment their sequence numbers. > +} > + > +static void snp_handle_guest_request(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa) > +{ > + struct sev_data_snp_guest_request data = {0}; > + struct kvm_vcpu *vcpu = &svm->vcpu; > + struct kvm *kvm = vcpu->kvm; > + struct kvm_sev_info *sev; > + unsigned long rc; > + int err; > + > + if (!sev_snp_guest(vcpu->kvm)) { > + rc = SEV_RET_INVALID_GUEST; > + goto e_fail; > + } > + > + sev = &to_kvm_svm(kvm)->sev_info; Ditto why not due this above? > + > + mutex_lock(&sev->guest_req_lock); > + > + rc = snp_setup_guest_buf(svm, &data, req_gpa, resp_gpa); > + if (rc) > + goto unlock; > + > + rc = sev_issue_cmd(kvm, SEV_CMD_SNP_GUEST_REQUEST, &data, &err); > + if (rc) > + /* use the firmware error code */ > + rc = err; > + > + snp_cleanup_guest_buf(&data, &rc); > + > +unlock: > + mutex_unlock(&sev->guest_req_lock); > + > +e_fail: > + svm_set_ghcb_sw_exit_info_2(vcpu, rc); > +} > + > +static void snp_handle_ext_guest_request(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa) > +{ > + struct sev_data_snp_guest_request req = {0}; > + struct kvm_vcpu *vcpu = &svm->vcpu; > + struct kvm *kvm = vcpu->kvm; > + unsigned long data_npages; > + struct kvm_sev_info *sev; > + unsigned long rc, err; > + u64 data_gpa; > + > + if (!sev_snp_guest(vcpu->kvm)) { > + rc = SEV_RET_INVALID_GUEST; > + goto e_fail; > + } > + > + sev = &to_kvm_svm(kvm)->sev_info; > + > + data_gpa = vcpu->arch.regs[VCPU_REGS_RAX]; > + data_npages = vcpu->arch.regs[VCPU_REGS_RBX]; > + > + if (!IS_ALIGNED(data_gpa, PAGE_SIZE)) { > + rc = SEV_RET_INVALID_ADDRESS; > + goto e_fail; > + } > + > + /* Verify that requested blob will fit in certificate buffer */ > + if ((data_npages << PAGE_SHIFT) > SEV_FW_BLOB_MAX_SIZE) { > + rc = SEV_RET_INVALID_PARAM; > + goto e_fail; > + } > + > + mutex_lock(&sev->guest_req_lock); > + > + rc = snp_setup_guest_buf(svm, &req, req_gpa, resp_gpa); > + if (rc) > + goto unlock; > + > + rc = snp_guest_ext_guest_request(&req, (unsigned long)sev->snp_certs_data, > + &data_npages, &err); > + if (rc) { > + /* > + * If buffer length is small then return the expected > + * length in rbx. > + */ > + if (err == SNP_GUEST_REQ_INVALID_LEN) > + vcpu->arch.regs[VCPU_REGS_RBX] = data_npages; > + > + /* pass the firmware error code */ > + rc = err; > + goto cleanup; > + } > + > + /* Copy the certificate blob in the guest memory */ > + if (data_npages && > + kvm_write_guest(kvm, data_gpa, sev->snp_certs_data, data_npages << PAGE_SHIFT)) > + rc = SEV_RET_INVALID_ADDRESS; Since at this point the PSP FW has correctly executed the command and incremented the VMPCK sequence number I think we need another error signal here since this will tell the guest the PSP had an error so it will not know if the VMPCK sequence number should be incremented. > + > +cleanup: > + snp_cleanup_guest_buf(&req, &rc); > + > +unlock: > + mutex_unlock(&sev->guest_req_lock); > + > +e_fail: > + svm_set_ghcb_sw_exit_info_2(vcpu, rc); > +} > + > static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) > { > struct vmcb_control_area *control = &svm->vmcb->control; > @@ -3753,6 +3923,20 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) > svm_set_ghcb_sw_exit_info_2(vcpu, rc); > break; > } > + case SVM_VMGEXIT_GUEST_REQUEST: { > + snp_handle_guest_request(svm, control->exit_info_1, control->exit_info_2); > + > + ret = 1; > + break; > + } > + case SVM_VMGEXIT_EXT_GUEST_REQUEST: { > + snp_handle_ext_guest_request(svm, > + control->exit_info_1, > + control->exit_info_2); > + > + ret = 1; > + break; > + } > case SVM_VMGEXIT_UNSUPPORTED_EVENT: > vcpu_unimpl(vcpu, > "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", > diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h > index 3fd95193ed8d..3be24da1a743 100644 > --- a/arch/x86/kvm/svm/svm.h > +++ b/arch/x86/kvm/svm/svm.h > @@ -98,6 +98,8 @@ struct kvm_sev_info { > u64 snp_init_flags; > void *snp_context; /* SNP guest context page */ > spinlock_t psc_lock; > + void *snp_certs_data; > + struct mutex guest_req_lock; > }; > > struct kvm_svm { > -- > 2.25.1 >
[Public] Hello Peter, -----Original Message----- From: Peter Gonda <pgonda@google.com> Sent: Friday, June 24, 2022 11:25 AM To: Kalra, Ashish <Ashish.Kalra@amd.com> Cc: the arch/x86 maintainers <x86@kernel.org>; LKML <linux-kernel@vger.kernel.org>; kvm list <kvm@vger.kernel.org>; linux-coco@lists.linux.dev; linux-mm@kvack.org; Linux Crypto Mailing List <linux-crypto@vger.kernel.org>; Thomas Gleixner <tglx@linutronix.de>; Ingo Molnar <mingo@redhat.com>; Joerg Roedel <jroedel@suse.de>; Lendacky, Thomas <Thomas.Lendacky@amd.com>; H. Peter Anvin <hpa@zytor.com>; Ard Biesheuvel <ardb@kernel.org>; Paolo Bonzini <pbonzini@redhat.com>; Sean Christopherson <seanjc@google.com>; Vitaly Kuznetsov <vkuznets@redhat.com>; Jim Mattson <jmattson@google.com>; Andy Lutomirski <luto@kernel.org>; Dave Hansen <dave.hansen@linux.intel.com>; Sergio Lopez <slp@redhat.com>; Peter Zijlstra <peterz@infradead.org>; Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>; David Rientjes <rientjes@google.com>; Dov Murik <dovmurik@linux.ibm.com>; Tobin Feldman-Fitzthum <tobin@ibm.com>; Borislav Petkov <bp@alien8.de>; Roth, Michael <Michael.Roth@amd.com>; Vlastimil Babka <vbabka@suse.cz>; Kirill A . Shutemov <kirill@shutemov.name>; Andi Kleen <ak@linux.intel.com>; Tony Luck <tony.luck@intel.com>; Marc Orr <marcorr@google.com>; Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@linux.intel.com>; Alper Gun <alpergun@google.com>; Dr. David Alan Gilbert <dgilbert@redhat.com>; jarkko@kernel.org Subject: Re: [PATCH Part2 v6 42/49] KVM: SVM: Provide support for SNP_GUEST_REQUEST NAE event On Mon, Jun 20, 2022 at 5:13 PM Ashish Kalra <Ashish.Kalra@amd.com> wrote: > > From: Brijesh Singh <brijesh.singh@amd.com> > > Version 2 of GHCB specification added the support for two SNP Guest > Request Message NAE events. The events allows for an SEV-SNP guest to > make request to the SEV-SNP firmware through hypervisor using the > SNP_GUEST_REQUEST API define in the SEV-SNP firmware specification. > > The SNP_EXT_GUEST_REQUEST is similar to SNP_GUEST_REQUEST with the > difference of an additional certificate blob that can be passed > through the SNP_SET_CONFIG ioctl defined in the CCP driver. The CCP > driver provides snp_guest_ext_guest_request() that is used by the KVM > to get both the report and certificate data at once. > > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> > --- > arch/x86/kvm/svm/sev.c | 196 +++++++++++++++++++++++++++++++++++++++-- > arch/x86/kvm/svm/svm.h | 2 + > 2 files changed, 192 insertions(+), 6 deletions(-) > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index > 7fc0fad87054..089af21a4efe 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > @@ -343,6 +343,7 @@ static int sev_guest_init(struct kvm *kvm, struct > kvm_sev_cmd *argp) > > spin_lock_init(&sev->psc_lock); > ret = sev_snp_init(&argp->error); > + mutex_init(&sev->guest_req_lock); > } else { > ret = sev_platform_init(&argp->error); > } > @@ -1884,23 +1885,39 @@ int sev_vm_move_enc_context_from(struct kvm > *kvm, unsigned int source_fd) > > static void *snp_context_create(struct kvm *kvm, struct kvm_sev_cmd > *argp) { > + void *context = NULL, *certs_data = NULL, *resp_page = NULL; >Is the NULL setting here unnecessary since all of these are set via functions snp_alloc_firmware_page(), kmalloc(), and >snp_alloc_firmware_page() respectively? Yes, they don't need to be set to NULL. > + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > struct sev_data_snp_gctx_create data = {}; > - void *context; > int rc; > > + /* Allocate memory used for the certs data in SNP guest request */ > + certs_data = kmalloc(SEV_FW_BLOB_MAX_SIZE, GFP_KERNEL_ACCOUNT); > + if (!certs_data) > + return NULL; >I think we want to use kzalloc() here to ensure we never give the guest uninitialized kernel memory. Yes. > + > /* Allocate memory for context page */ > context = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT); > if (!context) > - return NULL; > + goto e_free; > + > + /* Allocate a firmware buffer used during the guest command handling. */ > + resp_page = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT); > + if (!resp_page) > + goto e_free; >|resp_page| doesn't appear to be used anywhere? > > data.gctx_paddr = __psp_pa(context); > rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_GCTX_CREATE, &data, &argp->error); > - if (rc) { > - snp_free_firmware_page(context); > - return NULL; > - } > + if (rc) > + goto e_free; > + > + sev->snp_certs_data = certs_data; > > return context; > + > +e_free: > + snp_free_firmware_page(context); > + kfree(certs_data); > + return NULL; > } > > static int snp_bind_asid(struct kvm *kvm, int *error) @@ -2565,6 > +2582,8 @@ static int snp_decommission_context(struct kvm *kvm) > snp_free_firmware_page(sev->snp_context); > sev->snp_context = NULL; > > + kfree(sev->snp_certs_data); > + > return 0; > } > > @@ -3077,6 +3096,8 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code) > case SVM_VMGEXIT_UNSUPPORTED_EVENT: > case SVM_VMGEXIT_HV_FEATURES: > case SVM_VMGEXIT_PSC: > + case SVM_VMGEXIT_GUEST_REQUEST: > + case SVM_VMGEXIT_EXT_GUEST_REQUEST: > break; > default: > reason = GHCB_ERR_INVALID_EVENT; @@ -3502,6 +3523,155 > @@ static unsigned long snp_handle_page_state_change(struct vcpu_svm *svm) > return rc ? map_to_psc_vmgexit_code(rc) : 0; } > > +static unsigned long snp_setup_guest_buf(struct vcpu_svm *svm, > + struct sev_data_snp_guest_request *data, > + gpa_t req_gpa, gpa_t > +resp_gpa) { > + struct kvm_vcpu *vcpu = &svm->vcpu; > + struct kvm *kvm = vcpu->kvm; > + kvm_pfn_t req_pfn, resp_pfn; > + struct kvm_sev_info *sev; > + > + sev = &to_kvm_svm(kvm)->sev_info; >This is normally done at declaration in this file. Why not here? > struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; Ok. > + > + if (!IS_ALIGNED(req_gpa, PAGE_SIZE) || !IS_ALIGNED(resp_gpa, PAGE_SIZE)) > + return SEV_RET_INVALID_PARAM; > + > + req_pfn = gfn_to_pfn(kvm, gpa_to_gfn(req_gpa)); > + if (is_error_noslot_pfn(req_pfn)) > + return SEV_RET_INVALID_ADDRESS; > + > + resp_pfn = gfn_to_pfn(kvm, gpa_to_gfn(resp_gpa)); > + if (is_error_noslot_pfn(resp_pfn)) > + return SEV_RET_INVALID_ADDRESS; > + > + if (rmp_make_private(resp_pfn, 0, PG_LEVEL_4K, 0, true)) > + return SEV_RET_INVALID_ADDRESS; > + > + data->gctx_paddr = __psp_pa(sev->snp_context); > + data->req_paddr = __sme_set(req_pfn << PAGE_SHIFT); > + data->res_paddr = __sme_set(resp_pfn << PAGE_SHIFT); > + > + return 0; > +} > + > +static void snp_cleanup_guest_buf(struct sev_data_snp_guest_request > +*data, unsigned long *rc) { > + u64 pfn = __sme_clr(data->res_paddr) >> PAGE_SHIFT; > + int ret; > + > + ret = snp_page_reclaim(pfn); > + if (ret) > + *rc = SEV_RET_INVALID_ADDRESS; >Do we need a diff error code here? This means the page the guest gives us is now "stuck" in the FW owned state. How would the guest know this is the case? We return the exact same error in snp_setup_guest_buf() if the resp_gpa isn't page aligned >so now if the guest ever sees a SEV_RET_INVALID_ADDRESS I think its only safe option is to either try and page_state_change it to a know state or mark it as unusable memory. If snp_page_reclaim() fails, it will invoke snp_leak_pages() which will indicate memory failure and trigger memory recovery mechanisms and that should drop the pages or mark them unusable. > + > + ret = rmp_make_shared(pfn, PG_LEVEL_4K); > + if (ret) > + *rc = SEV_RET_INVALID_ADDRESS; >Ditto here I think we need some way to signal to the guest what state this page is on return to guest execution. >Also these errors shadow over FW successes, this means the guest's guest-request-sequence-numbers are now out of sync meaning this VMPCK is unusable less the guest risk reusing the AES IV (which would break the confidentiality/integrity). >Should we have a way to signal to the guest the FW has successfully run your command but we could not change the page states back correctly, so the guest should increment their sequence numbers. Yes, that is an important observation, and the sequence numbers are now out of sync. But as this is an error path, so what's the guarantee that the next guest message request will succeed completely, isn’t it better to let the FW reject any subsequent guest messages once it has detected that the sequence numbers are out of sync ? > +} > + > +static void snp_handle_guest_request(struct vcpu_svm *svm, gpa_t > +req_gpa, gpa_t resp_gpa) { > + struct sev_data_snp_guest_request data = {0}; > + struct kvm_vcpu *vcpu = &svm->vcpu; > + struct kvm *kvm = vcpu->kvm; > + struct kvm_sev_info *sev; > + unsigned long rc; > + int err; > + > + if (!sev_snp_guest(vcpu->kvm)) { > + rc = SEV_RET_INVALID_GUEST; > + goto e_fail; > + } > + > + sev = &to_kvm_svm(kvm)->sev_info; >Ditto why not due this above? Ok. > + > + mutex_lock(&sev->guest_req_lock); > + > + rc = snp_setup_guest_buf(svm, &data, req_gpa, resp_gpa); > + if (rc) > + goto unlock; > + > + rc = sev_issue_cmd(kvm, SEV_CMD_SNP_GUEST_REQUEST, &data, &err); > + if (rc) > + /* use the firmware error code */ > + rc = err; > + > + snp_cleanup_guest_buf(&data, &rc); > + > +unlock: > + mutex_unlock(&sev->guest_req_lock); > + > +e_fail: > + svm_set_ghcb_sw_exit_info_2(vcpu, rc); } > + > +static void snp_handle_ext_guest_request(struct vcpu_svm *svm, gpa_t > +req_gpa, gpa_t resp_gpa) { > + struct sev_data_snp_guest_request req = {0}; > + struct kvm_vcpu *vcpu = &svm->vcpu; > + struct kvm *kvm = vcpu->kvm; > + unsigned long data_npages; > + struct kvm_sev_info *sev; > + unsigned long rc, err; > + u64 data_gpa; > + > + if (!sev_snp_guest(vcpu->kvm)) { > + rc = SEV_RET_INVALID_GUEST; > + goto e_fail; > + } > + > + sev = &to_kvm_svm(kvm)->sev_info; > + > + data_gpa = vcpu->arch.regs[VCPU_REGS_RAX]; > + data_npages = vcpu->arch.regs[VCPU_REGS_RBX]; > + > + if (!IS_ALIGNED(data_gpa, PAGE_SIZE)) { > + rc = SEV_RET_INVALID_ADDRESS; > + goto e_fail; > + } > + > + /* Verify that requested blob will fit in certificate buffer */ > + if ((data_npages << PAGE_SHIFT) > SEV_FW_BLOB_MAX_SIZE) { > + rc = SEV_RET_INVALID_PARAM; > + goto e_fail; > + } > + > + mutex_lock(&sev->guest_req_lock); > + > + rc = snp_setup_guest_buf(svm, &req, req_gpa, resp_gpa); > + if (rc) > + goto unlock; > + > + rc = snp_guest_ext_guest_request(&req, (unsigned long)sev->snp_certs_data, > + &data_npages, &err); > + if (rc) { > + /* > + * If buffer length is small then return the expected > + * length in rbx. > + */ > + if (err == SNP_GUEST_REQ_INVALID_LEN) > + vcpu->arch.regs[VCPU_REGS_RBX] = data_npages; > + > + /* pass the firmware error code */ > + rc = err; > + goto cleanup; > + } > + > + /* Copy the certificate blob in the guest memory */ > + if (data_npages && > + kvm_write_guest(kvm, data_gpa, sev->snp_certs_data, data_npages << PAGE_SHIFT)) > + rc = SEV_RET_INVALID_ADDRESS; >Since at this point the PSP FW has correctly executed the command and incremented the VMPCK sequence number I think we need another error signal here since this will tell the guest the PSP had an error so it will not know if the VMPCK sequence >number should be incremented. Similarly as above, as this is an error path, so what's the guarantee that the next guest message request will succeed completely, isn’t it better to let the FW reject any subsequent guest messages once it has detected that the sequence numbers are out of sync ? > + > +cleanup: > + snp_cleanup_guest_buf(&req, &rc); > + > +unlock: > + mutex_unlock(&sev->guest_req_lock); > + > +e_fail: > + svm_set_ghcb_sw_exit_info_2(vcpu, rc); } > + > static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) { > struct vmcb_control_area *control = &svm->vmcb->control; @@ > -3753,6 +3923,20 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) > svm_set_ghcb_sw_exit_info_2(vcpu, rc); > break; > } > + case SVM_VMGEXIT_GUEST_REQUEST: { > + snp_handle_guest_request(svm, control->exit_info_1, > + control->exit_info_2); > + > + ret = 1; > + break; > + } > + case SVM_VMGEXIT_EXT_GUEST_REQUEST: { > + snp_handle_ext_guest_request(svm, > + control->exit_info_1, > + control->exit_info_2); > + > + ret = 1; > + break; > + } > case SVM_VMGEXIT_UNSUPPORTED_EVENT: > vcpu_unimpl(vcpu, > "vmgexit: unsupported event - > exit_info_1=%#llx, exit_info_2=%#llx\n", diff --git > a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index > 3fd95193ed8d..3be24da1a743 100644 > --- a/arch/x86/kvm/svm/svm.h > +++ b/arch/x86/kvm/svm/svm.h > @@ -98,6 +98,8 @@ struct kvm_sev_info { > u64 snp_init_flags; > void *snp_context; /* SNP guest context page */ > spinlock_t psc_lock; > + void *snp_certs_data; > + struct mutex guest_req_lock; > }; > > struct kvm_svm { > -- > 2.25.1 >
[Public] >> +static void snp_handle_ext_guest_request(struct vcpu_svm *svm, gpa_t >> +req_gpa, gpa_t resp_gpa) { >> + struct sev_data_snp_guest_request req = {0}; >> + struct kvm_vcpu *vcpu = &svm->vcpu; >> + struct kvm *kvm = vcpu->kvm; >> + unsigned long data_npages; >> + struct kvm_sev_info *sev; >> + unsigned long rc, err; >> + u64 data_gpa; >> + >> + if (!sev_snp_guest(vcpu->kvm)) { >> + rc = SEV_RET_INVALID_GUEST; >> + goto e_fail; >> + } >> + >> + sev = &to_kvm_svm(kvm)->sev_info; >> + >> + data_gpa = vcpu->arch.regs[VCPU_REGS_RAX]; >> + data_npages = vcpu->arch.regs[VCPU_REGS_RBX]; >> + >> + if (!IS_ALIGNED(data_gpa, PAGE_SIZE)) { >> + rc = SEV_RET_INVALID_ADDRESS; >> + goto e_fail; >> + } >> + >> + /* Verify that requested blob will fit in certificate buffer */ >> + if ((data_npages << PAGE_SHIFT) > SEV_FW_BLOB_MAX_SIZE) { >> + rc = SEV_RET_INVALID_PARAM; >> + goto e_fail; >> + } >> + >> + mutex_lock(&sev->guest_req_lock); >> + >> + rc = snp_setup_guest_buf(svm, &req, req_gpa, resp_gpa); >> + if (rc) >> + goto unlock; >> + >> + rc = snp_guest_ext_guest_request(&req, (unsigned long)sev->snp_certs_data, >> + &data_npages, &err); >> + if (rc) { >> + /* >> + * If buffer length is small then return the expected >> + * length in rbx. >> + */ >> + if (err == SNP_GUEST_REQ_INVALID_LEN) >> + vcpu->arch.regs[VCPU_REGS_RBX] = data_npages; >> + >> + /* pass the firmware error code */ >> + rc = err; >> + goto cleanup; >> + } >> + >> + /* Copy the certificate blob in the guest memory */ >> + if (data_npages && >> + kvm_write_guest(kvm, data_gpa, sev->snp_certs_data, data_npages << PAGE_SHIFT)) >> + rc = SEV_RET_INVALID_ADDRESS; >>Since at this point the PSP FW has correctly executed the command and incremented the VMPCK sequence number I think we need another error signal here since this will tell the guest the PSP had an error so it will not know if the VMPCK sequence >number should be incremented. >Similarly as above, as this is an error path, so what's the guarantee that the next guest message request will succeed completely, isn’t it better to let the >FW reject any subsequent guest messages once it has detected that the sequence numbers are out of sync ? Alternately, we probably can return SEV_RET_INVALID_PAGE_STATE/SEV_RET_INVALID_PAGE_OWNER here, but that still does not indicate to the guest that the FW has successfully executed the command and the error occurred during cleanup/result phase and it needs to increment the VMPCK sequence number. There is nothing as such defined in SNP FW API specs to indicate such kind of failures to guest. As I mentioned earlier, this is probably indicative of a bigger system failure and it is better to let the FW reject subsequent guest messages/requests once it has detected that the sequence numbers are out of sync. Thanks, Ashish
On Wed, Jun 29, 2022 at 1:15 PM Kalra, Ashish <Ashish.Kalra@amd.com> wrote: > > [Public] > > > >> +static void snp_handle_ext_guest_request(struct vcpu_svm *svm, gpa_t > >> +req_gpa, gpa_t resp_gpa) { > >> + struct sev_data_snp_guest_request req = {0}; > >> + struct kvm_vcpu *vcpu = &svm->vcpu; > >> + struct kvm *kvm = vcpu->kvm; > >> + unsigned long data_npages; > >> + struct kvm_sev_info *sev; > >> + unsigned long rc, err; > >> + u64 data_gpa; > >> + > >> + if (!sev_snp_guest(vcpu->kvm)) { > >> + rc = SEV_RET_INVALID_GUEST; > >> + goto e_fail; > >> + } > >> + > >> + sev = &to_kvm_svm(kvm)->sev_info; > >> + > >> + data_gpa = vcpu->arch.regs[VCPU_REGS_RAX]; > >> + data_npages = vcpu->arch.regs[VCPU_REGS_RBX]; > >> + > >> + if (!IS_ALIGNED(data_gpa, PAGE_SIZE)) { > >> + rc = SEV_RET_INVALID_ADDRESS; > >> + goto e_fail; > >> + } > >> + > >> + /* Verify that requested blob will fit in certificate buffer */ > >> + if ((data_npages << PAGE_SHIFT) > SEV_FW_BLOB_MAX_SIZE) { > >> + rc = SEV_RET_INVALID_PARAM; > >> + goto e_fail; > >> + } > >> + > >> + mutex_lock(&sev->guest_req_lock); > >> + > >> + rc = snp_setup_guest_buf(svm, &req, req_gpa, resp_gpa); > >> + if (rc) > >> + goto unlock; > >> + > >> + rc = snp_guest_ext_guest_request(&req, (unsigned long)sev->snp_certs_data, > >> + &data_npages, &err); > >> + if (rc) { > >> + /* > >> + * If buffer length is small then return the expected > >> + * length in rbx. > >> + */ > >> + if (err == SNP_GUEST_REQ_INVALID_LEN) > >> + vcpu->arch.regs[VCPU_REGS_RBX] = data_npages; > >> + > >> + /* pass the firmware error code */ > >> + rc = err; > >> + goto cleanup; > >> + } > >> + > >> + /* Copy the certificate blob in the guest memory */ > >> + if (data_npages && > >> + kvm_write_guest(kvm, data_gpa, sev->snp_certs_data, data_npages << PAGE_SHIFT)) > >> + rc = SEV_RET_INVALID_ADDRESS; > > >>Since at this point the PSP FW has correctly executed the command and incremented the VMPCK sequence number I think we need another error signal here since this will tell the guest the PSP had an error so it will not know if the VMPCK sequence >number should be incremented. > > >Similarly as above, as this is an error path, so what's the guarantee that the next guest message request will succeed completely, isn’t it better to let the > >FW reject any subsequent guest messages once it has detected that the sequence numbers are out of sync ? > > Alternately, we probably can return SEV_RET_INVALID_PAGE_STATE/SEV_RET_INVALID_PAGE_OWNER here, but that still does not indicate to the guest > that the FW has successfully executed the command and the error occurred during cleanup/result phase and it needs to increment the VMPCK sequence number. There is nothing as such defined in SNP FW API specs to indicate such kind of failures to guest. As I mentioned earlier, this is probably indicative of > a bigger system failure and it is better to let the FW reject subsequent guest messages/requests once it has detected that the sequence numbers are out of sync. Hmm I think the guest must be careful here because the guest could not trust the hypervisor here to be truthful about the sequence numbers incrementing. That's unfortunate since this means if these operations do fail with a well behaved hypervisor the guest cannot use that VMPCK again. But there is no harm in the guest re-issuing the SNP_GUEST_REQUEST (or extended version) with the exact same request just in at a different address. The GHCB spec actually calls this out " It is recommended that the hypervisor validate the guest physical address of the response page before invoking the SNP_GUEST_REQUEST API so that the sequence numbers do not get out of sync for the guest, possibly resulting in all successive requests failing". Currently SVM_VMGEXIT_GUEST_REQUEST and SVM_VMGEXIT_EXT_GUEST_REQUEST have different hypervisor -> guest usage for SW_EXITINFO2. I think they both should be defined as what SVM_VMGEXIT_EXT_GUEST_REQUEST is now: the high 32bits are the hypervisor error code, the low 32bits are the FW error code. This would allow for both NAEs to have some signal to the guest say SEV_RET_INVALID_REQ_ADDRESS. The hypervisor can use this error code when doing the validation on the request and response regions, if some is wrong with them the guest can retry with the exact same request (so no IV reuse) in a corrected region. But another reason I think SVM_VMGEXIT_GUEST_REQUEST SW_EXITINFO2 hypervisor->guest state should include this change is because in this patch we are currently overloading the lower 32bits with hypervisor error codes. In snp_handle_guest_request() if sev_snp_guest(), snp_setup_guest_buf(), or snp_cleanup_guest_buf() fails we use the low 32bits of SW_EXITINFO2 to return hypervisor errors to the guest. > > Thanks, > Ashish
On 6/20/22 18:13, Ashish Kalra wrote: > From: Brijesh Singh <brijesh.singh@amd.com> > > Version 2 of GHCB specification added the support for two SNP Guest > Request Message NAE events. The events allows for an SEV-SNP guest to > make request to the SEV-SNP firmware through hypervisor using the > SNP_GUEST_REQUEST API define in the SEV-SNP firmware specification. > > The SNP_EXT_GUEST_REQUEST is similar to SNP_GUEST_REQUEST with the > difference of an additional certificate blob that can be passed through > the SNP_SET_CONFIG ioctl defined in the CCP driver. The CCP driver > provides snp_guest_ext_guest_request() that is used by the KVM to get > both the report and certificate data at once. > > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> > --- > arch/x86/kvm/svm/sev.c | 196 +++++++++++++++++++++++++++++++++++++++-- > arch/x86/kvm/svm/svm.h | 2 + > 2 files changed, 192 insertions(+), 6 deletions(-) > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > index 7fc0fad87054..089af21a4efe 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > +static void snp_handle_ext_guest_request(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa) > +{ > + struct sev_data_snp_guest_request req = {0}; > + struct kvm_vcpu *vcpu = &svm->vcpu; > + struct kvm *kvm = vcpu->kvm; > + unsigned long data_npages; > + struct kvm_sev_info *sev; > + unsigned long rc, err; > + u64 data_gpa; > + > + if (!sev_snp_guest(vcpu->kvm)) { > + rc = SEV_RET_INVALID_GUEST; > + goto e_fail; > + } > + > + sev = &to_kvm_svm(kvm)->sev_info; > + > + data_gpa = vcpu->arch.regs[VCPU_REGS_RAX]; > + data_npages = vcpu->arch.regs[VCPU_REGS_RBX]; > + > + if (!IS_ALIGNED(data_gpa, PAGE_SIZE)) { > + rc = SEV_RET_INVALID_ADDRESS; > + goto e_fail; > + } > + > + /* Verify that requested blob will fit in certificate buffer */ > + if ((data_npages << PAGE_SHIFT) > SEV_FW_BLOB_MAX_SIZE) { Not sure this is a valid check... Isn't it OK if the guest has supplied more room than is required? If the guest supplies 8 pages and the hypervisor only needs to copy 1 page of data (or the SEV_FW_BLOB_MAX_SIZE number of pages) that shouldn't be an error. I think this check can go, right? Thanks, Tom > + rc = SEV_RET_INVALID_PARAM; > + goto e_fail; > + } > + > + mutex_lock(&sev->guest_req_lock); > + > + rc = snp_setup_guest_buf(svm, &req, req_gpa, resp_gpa); > + if (rc) > + goto unlock; > + > + rc = snp_guest_ext_guest_request(&req, (unsigned long)sev->snp_certs_data, > + &data_npages, &err); > + if (rc) { > + /* > + * If buffer length is small then return the expected > + * length in rbx. > + */ > + if (err == SNP_GUEST_REQ_INVALID_LEN) > + vcpu->arch.regs[VCPU_REGS_RBX] = data_npages; > + > + /* pass the firmware error code */ > + rc = err; > + goto cleanup; > + } > + > + /* Copy the certificate blob in the guest memory */ > + if (data_npages && > + kvm_write_guest(kvm, data_gpa, sev->snp_certs_data, data_npages << PAGE_SHIFT)) > + rc = SEV_RET_INVALID_ADDRESS; > + > +cleanup: > + snp_cleanup_guest_buf(&req, &rc); > + > +unlock: > + mutex_unlock(&sev->guest_req_lock); > + > +e_fail: > + svm_set_ghcb_sw_exit_info_2(vcpu, rc); > +} > +
Hello Tom, On 10/21/2022 2:06 PM, Tom Lendacky wrote: > On 6/20/22 18:13, Ashish Kalra wrote: >> From: Brijesh Singh <brijesh.singh@amd.com> >> >> Version 2 of GHCB specification added the support for two SNP Guest >> Request Message NAE events. The events allows for an SEV-SNP guest to >> make request to the SEV-SNP firmware through hypervisor using the >> SNP_GUEST_REQUEST API define in the SEV-SNP firmware specification. >> >> The SNP_EXT_GUEST_REQUEST is similar to SNP_GUEST_REQUEST with the >> difference of an additional certificate blob that can be passed through >> the SNP_SET_CONFIG ioctl defined in the CCP driver. The CCP driver >> provides snp_guest_ext_guest_request() that is used by the KVM to get >> both the report and certificate data at once. >> >> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> >> --- >> arch/x86/kvm/svm/sev.c | 196 +++++++++++++++++++++++++++++++++++++++-- >> arch/x86/kvm/svm/svm.h | 2 + >> 2 files changed, 192 insertions(+), 6 deletions(-) >> >> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c >> index 7fc0fad87054..089af21a4efe 100644 >> --- a/arch/x86/kvm/svm/sev.c >> +++ b/arch/x86/kvm/svm/sev.c > >> +static void snp_handle_ext_guest_request(struct vcpu_svm *svm, gpa_t >> req_gpa, gpa_t resp_gpa) >> +{ >> + struct sev_data_snp_guest_request req = {0}; >> + struct kvm_vcpu *vcpu = &svm->vcpu; >> + struct kvm *kvm = vcpu->kvm; >> + unsigned long data_npages; >> + struct kvm_sev_info *sev; >> + unsigned long rc, err; >> + u64 data_gpa; >> + >> + if (!sev_snp_guest(vcpu->kvm)) { >> + rc = SEV_RET_INVALID_GUEST; >> + goto e_fail; >> + } >> + >> + sev = &to_kvm_svm(kvm)->sev_info; >> + >> + data_gpa = vcpu->arch.regs[VCPU_REGS_RAX]; >> + data_npages = vcpu->arch.regs[VCPU_REGS_RBX]; >> + >> + if (!IS_ALIGNED(data_gpa, PAGE_SIZE)) { >> + rc = SEV_RET_INVALID_ADDRESS; >> + goto e_fail; >> + } >> + >> + /* Verify that requested blob will fit in certificate buffer */ >> + if ((data_npages << PAGE_SHIFT) > SEV_FW_BLOB_MAX_SIZE) { > > Not sure this is a valid check... Isn't it OK if the guest has supplied > more room than is required? If the guest supplies 8 pages and the > hypervisor only needs to copy 1 page of data (or the > SEV_FW_BLOB_MAX_SIZE number of pages) that shouldn't be an error. I > think this check can go, right? > Agreed. The check should probably be if ((data_npages << PAGE_SHIFT) < SEV_FW_BLOB_MAX_SIZE) and that check already exists in: snp_guest_ext_guest_request(...) { ... ... /* * Check if there is enough space to copy the certificate chain. Otherwise * return ERROR code defined in the GHCB specification. */ expected_npages = sev->snp_certs_len >> PAGE_SHIFT; if (*npages < expected_npages) { *npages = expected_npages; *fw_err = SNP_GUEST_REQ_INVALID_LEN; return -EINVAL; } ... Thanks, Ashish > Thanks, > Tom > >> + rc = SEV_RET_INVALID_PARAM; >> + goto e_fail; >> + } >> + >> + mutex_lock(&sev->guest_req_lock); >> + >> + rc = snp_setup_guest_buf(svm, &req, req_gpa, resp_gpa); >> + if (rc) >> + goto unlock; >> + >> + rc = snp_guest_ext_guest_request(&req, (unsigned >> long)sev->snp_certs_data, >> + &data_npages, &err); >> + if (rc) { >> + /* >> + * If buffer length is small then return the expected >> + * length in rbx. >> + */ >> + if (err == SNP_GUEST_REQ_INVALID_LEN) >> + vcpu->arch.regs[VCPU_REGS_RBX] = data_npages; >> + >> + /* pass the firmware error code */ >> + rc = err; >> + goto cleanup; >> + } >> + >> + /* Copy the certificate blob in the guest memory */ >> + if (data_npages && >> + kvm_write_guest(kvm, data_gpa, sev->snp_certs_data, >> data_npages << PAGE_SHIFT)) >> + rc = SEV_RET_INVALID_ADDRESS; >> + >> +cleanup: >> + snp_cleanup_guest_buf(&req, &rc); >> + >> +unlock: >> + mutex_unlock(&sev->guest_req_lock); >> + >> +e_fail: >> + svm_set_ghcb_sw_exit_info_2(vcpu, rc); >> +} >> +
On 10/21/22 16:12, Kalra, Ashish wrote: > Hello Tom, > > On 10/21/2022 2:06 PM, Tom Lendacky wrote: >> On 6/20/22 18:13, Ashish Kalra wrote: >>> From: Brijesh Singh <brijesh.singh@amd.com> >>> >>> Version 2 of GHCB specification added the support for two SNP Guest >>> Request Message NAE events. The events allows for an SEV-SNP guest to >>> make request to the SEV-SNP firmware through hypervisor using the >>> SNP_GUEST_REQUEST API define in the SEV-SNP firmware specification. >>> >>> The SNP_EXT_GUEST_REQUEST is similar to SNP_GUEST_REQUEST with the >>> difference of an additional certificate blob that can be passed through >>> the SNP_SET_CONFIG ioctl defined in the CCP driver. The CCP driver >>> provides snp_guest_ext_guest_request() that is used by the KVM to get >>> both the report and certificate data at once. >>> >>> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> >>> --- >>> arch/x86/kvm/svm/sev.c | 196 +++++++++++++++++++++++++++++++++++++++-- >>> arch/x86/kvm/svm/svm.h | 2 + >>> 2 files changed, 192 insertions(+), 6 deletions(-) >>> >>> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c >>> index 7fc0fad87054..089af21a4efe 100644 >>> --- a/arch/x86/kvm/svm/sev.c >>> +++ b/arch/x86/kvm/svm/sev.c >> >>> +static void snp_handle_ext_guest_request(struct vcpu_svm *svm, gpa_t >>> req_gpa, gpa_t resp_gpa) >>> +{ >>> + struct sev_data_snp_guest_request req = {0}; >>> + struct kvm_vcpu *vcpu = &svm->vcpu; >>> + struct kvm *kvm = vcpu->kvm; >>> + unsigned long data_npages; >>> + struct kvm_sev_info *sev; >>> + unsigned long rc, err; >>> + u64 data_gpa; >>> + >>> + if (!sev_snp_guest(vcpu->kvm)) { >>> + rc = SEV_RET_INVALID_GUEST; >>> + goto e_fail; >>> + } >>> + >>> + sev = &to_kvm_svm(kvm)->sev_info; >>> + >>> + data_gpa = vcpu->arch.regs[VCPU_REGS_RAX]; >>> + data_npages = vcpu->arch.regs[VCPU_REGS_RBX]; >>> + >>> + if (!IS_ALIGNED(data_gpa, PAGE_SIZE)) { >>> + rc = SEV_RET_INVALID_ADDRESS; >>> + goto e_fail; >>> + } >>> + >>> + /* Verify that requested blob will fit in certificate buffer */ >>> + if ((data_npages << PAGE_SHIFT) > SEV_FW_BLOB_MAX_SIZE) { >> >> Not sure this is a valid check... Isn't it OK if the guest has supplied >> more room than is required? If the guest supplies 8 pages and the >> hypervisor only needs to copy 1 page of data (or the >> SEV_FW_BLOB_MAX_SIZE number of pages) that shouldn't be an error. I >> think this check can go, right? >> > > Agreed. > > The check should probably be > if ((data_npages << PAGE_SHIFT) < SEV_FW_BLOB_MAX_SIZE) No, the check should just be removed. If the number of pages required to hold the cert data is only 1, then a data_npages value of 1 is just fine (see below). > > and that check already exists in: > > snp_guest_ext_guest_request(...) > { > ... > ... > /* > * Check if there is enough space to copy the certificate chain. > Otherwise > * return ERROR code defined in the GHCB specification. > */ > expected_npages = sev->snp_certs_len >> PAGE_SHIFT; > if (*npages < expected_npages) { If expected_npages is 1, then an *npages value of 1 is OK. But if you put the check in above that you want, you would never get here with an *npages value of 1. Thanks, Tom > *npages = expected_npages; > *fw_err = SNP_GUEST_REQ_INVALID_LEN; > return -EINVAL; > } > ... > > Thanks, > Ashish > >> Thanks, >> Tom >> >>> + rc = SEV_RET_INVALID_PARAM; >>> + goto e_fail; >>> + } >>> + >>> + mutex_lock(&sev->guest_req_lock); >>> + >>> + rc = snp_setup_guest_buf(svm, &req, req_gpa, resp_gpa); >>> + if (rc) >>> + goto unlock; >>> + >>> + rc = snp_guest_ext_guest_request(&req, (unsigned >>> long)sev->snp_certs_data, >>> + &data_npages, &err); >>> + if (rc) { >>> + /* >>> + * If buffer length is small then return the expected >>> + * length in rbx. >>> + */ >>> + if (err == SNP_GUEST_REQ_INVALID_LEN) >>> + vcpu->arch.regs[VCPU_REGS_RBX] = data_npages; >>> + >>> + /* pass the firmware error code */ >>> + rc = err; >>> + goto cleanup; >>> + } >>> + >>> + /* Copy the certificate blob in the guest memory */ >>> + if (data_npages && >>> + kvm_write_guest(kvm, data_gpa, sev->snp_certs_data, >>> data_npages << PAGE_SHIFT)) >>> + rc = SEV_RET_INVALID_ADDRESS; >>> + >>> +cleanup: >>> + snp_cleanup_guest_buf(&req, &rc); >>> + >>> +unlock: >>> + mutex_unlock(&sev->guest_req_lock); >>> + >>> +e_fail: >>> + svm_set_ghcb_sw_exit_info_2(vcpu, rc); >>> +} >>> +
On 10/21/2022 4:30 PM, Tom Lendacky wrote: > On 10/21/22 16:12, Kalra, Ashish wrote: >> Hello Tom, >> >> On 10/21/2022 2:06 PM, Tom Lendacky wrote: >>> On 6/20/22 18:13, Ashish Kalra wrote: >>>> From: Brijesh Singh <brijesh.singh@amd.com> >>>> >>>> Version 2 of GHCB specification added the support for two SNP Guest >>>> Request Message NAE events. The events allows for an SEV-SNP guest to >>>> make request to the SEV-SNP firmware through hypervisor using the >>>> SNP_GUEST_REQUEST API define in the SEV-SNP firmware specification. >>>> >>>> The SNP_EXT_GUEST_REQUEST is similar to SNP_GUEST_REQUEST with the >>>> difference of an additional certificate blob that can be passed through >>>> the SNP_SET_CONFIG ioctl defined in the CCP driver. The CCP driver >>>> provides snp_guest_ext_guest_request() that is used by the KVM to get >>>> both the report and certificate data at once. >>>> >>>> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> >>>> --- >>>> arch/x86/kvm/svm/sev.c | 196 >>>> +++++++++++++++++++++++++++++++++++++++-- >>>> arch/x86/kvm/svm/svm.h | 2 + >>>> 2 files changed, 192 insertions(+), 6 deletions(-) >>>> >>>> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c >>>> index 7fc0fad87054..089af21a4efe 100644 >>>> --- a/arch/x86/kvm/svm/sev.c >>>> +++ b/arch/x86/kvm/svm/sev.c >>> >>>> +static void snp_handle_ext_guest_request(struct vcpu_svm *svm, >>>> gpa_t req_gpa, gpa_t resp_gpa) >>>> +{ >>>> + struct sev_data_snp_guest_request req = {0}; >>>> + struct kvm_vcpu *vcpu = &svm->vcpu; >>>> + struct kvm *kvm = vcpu->kvm; >>>> + unsigned long data_npages; >>>> + struct kvm_sev_info *sev; >>>> + unsigned long rc, err; >>>> + u64 data_gpa; >>>> + >>>> + if (!sev_snp_guest(vcpu->kvm)) { >>>> + rc = SEV_RET_INVALID_GUEST; >>>> + goto e_fail; >>>> + } >>>> + >>>> + sev = &to_kvm_svm(kvm)->sev_info; >>>> + >>>> + data_gpa = vcpu->arch.regs[VCPU_REGS_RAX]; >>>> + data_npages = vcpu->arch.regs[VCPU_REGS_RBX]; >>>> + >>>> + if (!IS_ALIGNED(data_gpa, PAGE_SIZE)) { >>>> + rc = SEV_RET_INVALID_ADDRESS; >>>> + goto e_fail; >>>> + } >>>> + >>>> + /* Verify that requested blob will fit in certificate buffer */ >>>> + if ((data_npages << PAGE_SHIFT) > SEV_FW_BLOB_MAX_SIZE) { >>> >>> Not sure this is a valid check... Isn't it OK if the guest has >>> supplied more room than is required? If the guest supplies 8 pages >>> and the hypervisor only needs to copy 1 page of data (or the >>> SEV_FW_BLOB_MAX_SIZE number of pages) that shouldn't be an error. I >>> think this check can go, right? >>> >> >> Agreed. >> >> The check should probably be >> if ((data_npages << PAGE_SHIFT) < SEV_FW_BLOB_MAX_SIZE) > > No, the check should just be removed. If the number of pages required to > hold the cert data is only 1, then a data_npages value of 1 is just fine > (see below). > Ok. >> >> and that check already exists in: >> >> snp_guest_ext_guest_request(...) >> { >> ... >> ... >> /* >> * Check if there is enough space to copy the certificate >> chain. Otherwise >> * return ERROR code defined in the GHCB specification. >> */ >> expected_npages = sev->snp_certs_len >> PAGE_SHIFT; >> if (*npages < expected_npages) { > > If expected_npages is 1, then an *npages value of 1 is OK. But if you > put the check in above that you want, you would never get here with an > *npages value of 1. Yes that is correct, i incorrectly assumed that sev->snp_certs_len == SEV_FW_BLOB_MAX_SIZE Thanks, Ashish > > Thanks, > Tom > >> *npages = expected_npages; >> *fw_err = SNP_GUEST_REQ_INVALID_LEN; >> return -EINVAL; >> } >> ... >> >> Thanks, >> Ashish >> >>> Thanks, >>> Tom >>> >>>> + rc = SEV_RET_INVALID_PARAM; >>>> + goto e_fail; >>>> + } >>>> + >>>> + mutex_lock(&sev->guest_req_lock); >>>> + >>>> + rc = snp_setup_guest_buf(svm, &req, req_gpa, resp_gpa); >>>> + if (rc) >>>> + goto unlock; >>>> + >>>> + rc = snp_guest_ext_guest_request(&req, (unsigned >>>> long)sev->snp_certs_data, >>>> + &data_npages, &err); >>>> + if (rc) { >>>> + /* >>>> + * If buffer length is small then return the expected >>>> + * length in rbx. >>>> + */ >>>> + if (err == SNP_GUEST_REQ_INVALID_LEN) >>>> + vcpu->arch.regs[VCPU_REGS_RBX] = data_npages; >>>> + >>>> + /* pass the firmware error code */ >>>> + rc = err; >>>> + goto cleanup; >>>> + } >>>> + >>>> + /* Copy the certificate blob in the guest memory */ >>>> + if (data_npages && >>>> + kvm_write_guest(kvm, data_gpa, sev->snp_certs_data, >>>> data_npages << PAGE_SHIFT)) >>>> + rc = SEV_RET_INVALID_ADDRESS; >>>> + >>>> +cleanup: >>>> + snp_cleanup_guest_buf(&req, &rc); >>>> + >>>> +unlock: >>>> + mutex_unlock(&sev->guest_req_lock); >>>> + >>>> +e_fail: >>>> + svm_set_ghcb_sw_exit_info_2(vcpu, rc); >>>> +} >>>> +
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 7fc0fad87054..089af21a4efe 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -343,6 +343,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) spin_lock_init(&sev->psc_lock); ret = sev_snp_init(&argp->error); + mutex_init(&sev->guest_req_lock); } else { ret = sev_platform_init(&argp->error); } @@ -1884,23 +1885,39 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) static void *snp_context_create(struct kvm *kvm, struct kvm_sev_cmd *argp) { + void *context = NULL, *certs_data = NULL, *resp_page = NULL; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_snp_gctx_create data = {}; - void *context; int rc; + /* Allocate memory used for the certs data in SNP guest request */ + certs_data = kmalloc(SEV_FW_BLOB_MAX_SIZE, GFP_KERNEL_ACCOUNT); + if (!certs_data) + return NULL; + /* Allocate memory for context page */ context = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT); if (!context) - return NULL; + goto e_free; + + /* Allocate a firmware buffer used during the guest command handling. */ + resp_page = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT); + if (!resp_page) + goto e_free; data.gctx_paddr = __psp_pa(context); rc = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_GCTX_CREATE, &data, &argp->error); - if (rc) { - snp_free_firmware_page(context); - return NULL; - } + if (rc) + goto e_free; + + sev->snp_certs_data = certs_data; return context; + +e_free: + snp_free_firmware_page(context); + kfree(certs_data); + return NULL; } static int snp_bind_asid(struct kvm *kvm, int *error) @@ -2565,6 +2582,8 @@ static int snp_decommission_context(struct kvm *kvm) snp_free_firmware_page(sev->snp_context); sev->snp_context = NULL; + kfree(sev->snp_certs_data); + return 0; } @@ -3077,6 +3096,8 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code) case SVM_VMGEXIT_UNSUPPORTED_EVENT: case SVM_VMGEXIT_HV_FEATURES: case SVM_VMGEXIT_PSC: + case SVM_VMGEXIT_GUEST_REQUEST: + case SVM_VMGEXIT_EXT_GUEST_REQUEST: break; default: reason = GHCB_ERR_INVALID_EVENT; @@ -3502,6 +3523,155 @@ static unsigned long snp_handle_page_state_change(struct vcpu_svm *svm) return rc ? map_to_psc_vmgexit_code(rc) : 0; } +static unsigned long snp_setup_guest_buf(struct vcpu_svm *svm, + struct sev_data_snp_guest_request *data, + gpa_t req_gpa, gpa_t resp_gpa) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + struct kvm *kvm = vcpu->kvm; + kvm_pfn_t req_pfn, resp_pfn; + struct kvm_sev_info *sev; + + sev = &to_kvm_svm(kvm)->sev_info; + + if (!IS_ALIGNED(req_gpa, PAGE_SIZE) || !IS_ALIGNED(resp_gpa, PAGE_SIZE)) + return SEV_RET_INVALID_PARAM; + + req_pfn = gfn_to_pfn(kvm, gpa_to_gfn(req_gpa)); + if (is_error_noslot_pfn(req_pfn)) + return SEV_RET_INVALID_ADDRESS; + + resp_pfn = gfn_to_pfn(kvm, gpa_to_gfn(resp_gpa)); + if (is_error_noslot_pfn(resp_pfn)) + return SEV_RET_INVALID_ADDRESS; + + if (rmp_make_private(resp_pfn, 0, PG_LEVEL_4K, 0, true)) + return SEV_RET_INVALID_ADDRESS; + + data->gctx_paddr = __psp_pa(sev->snp_context); + data->req_paddr = __sme_set(req_pfn << PAGE_SHIFT); + data->res_paddr = __sme_set(resp_pfn << PAGE_SHIFT); + + return 0; +} + +static void snp_cleanup_guest_buf(struct sev_data_snp_guest_request *data, unsigned long *rc) +{ + u64 pfn = __sme_clr(data->res_paddr) >> PAGE_SHIFT; + int ret; + + ret = snp_page_reclaim(pfn); + if (ret) + *rc = SEV_RET_INVALID_ADDRESS; + + ret = rmp_make_shared(pfn, PG_LEVEL_4K); + if (ret) + *rc = SEV_RET_INVALID_ADDRESS; +} + +static void snp_handle_guest_request(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa) +{ + struct sev_data_snp_guest_request data = {0}; + struct kvm_vcpu *vcpu = &svm->vcpu; + struct kvm *kvm = vcpu->kvm; + struct kvm_sev_info *sev; + unsigned long rc; + int err; + + if (!sev_snp_guest(vcpu->kvm)) { + rc = SEV_RET_INVALID_GUEST; + goto e_fail; + } + + sev = &to_kvm_svm(kvm)->sev_info; + + mutex_lock(&sev->guest_req_lock); + + rc = snp_setup_guest_buf(svm, &data, req_gpa, resp_gpa); + if (rc) + goto unlock; + + rc = sev_issue_cmd(kvm, SEV_CMD_SNP_GUEST_REQUEST, &data, &err); + if (rc) + /* use the firmware error code */ + rc = err; + + snp_cleanup_guest_buf(&data, &rc); + +unlock: + mutex_unlock(&sev->guest_req_lock); + +e_fail: + svm_set_ghcb_sw_exit_info_2(vcpu, rc); +} + +static void snp_handle_ext_guest_request(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa) +{ + struct sev_data_snp_guest_request req = {0}; + struct kvm_vcpu *vcpu = &svm->vcpu; + struct kvm *kvm = vcpu->kvm; + unsigned long data_npages; + struct kvm_sev_info *sev; + unsigned long rc, err; + u64 data_gpa; + + if (!sev_snp_guest(vcpu->kvm)) { + rc = SEV_RET_INVALID_GUEST; + goto e_fail; + } + + sev = &to_kvm_svm(kvm)->sev_info; + + data_gpa = vcpu->arch.regs[VCPU_REGS_RAX]; + data_npages = vcpu->arch.regs[VCPU_REGS_RBX]; + + if (!IS_ALIGNED(data_gpa, PAGE_SIZE)) { + rc = SEV_RET_INVALID_ADDRESS; + goto e_fail; + } + + /* Verify that requested blob will fit in certificate buffer */ + if ((data_npages << PAGE_SHIFT) > SEV_FW_BLOB_MAX_SIZE) { + rc = SEV_RET_INVALID_PARAM; + goto e_fail; + } + + mutex_lock(&sev->guest_req_lock); + + rc = snp_setup_guest_buf(svm, &req, req_gpa, resp_gpa); + if (rc) + goto unlock; + + rc = snp_guest_ext_guest_request(&req, (unsigned long)sev->snp_certs_data, + &data_npages, &err); + if (rc) { + /* + * If buffer length is small then return the expected + * length in rbx. + */ + if (err == SNP_GUEST_REQ_INVALID_LEN) + vcpu->arch.regs[VCPU_REGS_RBX] = data_npages; + + /* pass the firmware error code */ + rc = err; + goto cleanup; + } + + /* Copy the certificate blob in the guest memory */ + if (data_npages && + kvm_write_guest(kvm, data_gpa, sev->snp_certs_data, data_npages << PAGE_SHIFT)) + rc = SEV_RET_INVALID_ADDRESS; + +cleanup: + snp_cleanup_guest_buf(&req, &rc); + +unlock: + mutex_unlock(&sev->guest_req_lock); + +e_fail: + svm_set_ghcb_sw_exit_info_2(vcpu, rc); +} + static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -3753,6 +3923,20 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) svm_set_ghcb_sw_exit_info_2(vcpu, rc); break; } + case SVM_VMGEXIT_GUEST_REQUEST: { + snp_handle_guest_request(svm, control->exit_info_1, control->exit_info_2); + + ret = 1; + break; + } + case SVM_VMGEXIT_EXT_GUEST_REQUEST: { + snp_handle_ext_guest_request(svm, + control->exit_info_1, + control->exit_info_2); + + ret = 1; + break; + } case SVM_VMGEXIT_UNSUPPORTED_EVENT: vcpu_unimpl(vcpu, "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 3fd95193ed8d..3be24da1a743 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -98,6 +98,8 @@ struct kvm_sev_info { u64 snp_init_flags; void *snp_context; /* SNP guest context page */ spinlock_t psc_lock; + void *snp_certs_data; + struct mutex guest_req_lock; }; struct kvm_svm {