Message ID | 20210707183616.5620-35-brijesh.singh@amd.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Add AMD Secure Nested Paging (SEV-SNP) Hypervisor Support | expand |
On Wed, Jul 07, 2021, Brijesh Singh wrote: > +static unsigned long snp_handle_psc(struct vcpu_svm *svm, struct ghcb *ghcb) > +{ > + struct kvm_vcpu *vcpu = &svm->vcpu; > + int level, op, rc = PSC_UNDEF_ERR; > + struct snp_psc_desc *info; > + struct psc_entry *entry; > + gpa_t gpa; > + > + if (!sev_snp_guest(vcpu->kvm)) > + goto out; > + > + if (!setup_vmgexit_scratch(svm, true, sizeof(ghcb->save.sw_scratch))) { > + pr_err("vmgexit: scratch area is not setup.\n"); > + rc = PSC_INVALID_HDR; > + goto out; > + } > + > + info = (struct snp_psc_desc *)svm->ghcb_sa; > + entry = &info->entries[info->hdr.cur_entry]; Grabbing "entry" here is unnecessary and confusing. > + > + if ((info->hdr.cur_entry >= VMGEXIT_PSC_MAX_ENTRY) || > + (info->hdr.end_entry >= VMGEXIT_PSC_MAX_ENTRY) || > + (info->hdr.cur_entry > info->hdr.end_entry)) { There's a TOCTOU bug here if the guest uses the GHCB instead of a scratch area. If the guest uses the scratch area, then KVM makes a full copy into kernel memory. But if the guest uses the GHCB, then KVM maps the GHCB into kernel address space but doesn't make a full copy, i.e. the guest can modify the data while it's being processed by KVM. IIRC, Peter and I discussed the sketchiness of the GHCB mapping offline a few times, but determined that there were no existing SEV-ES bugs because the guest could only submarine its own emulation request. But here, it could coerce KVM into running off the end of a buffer. I think you can get away with capturing cur_entry/end_entry locally, though copying the GHCB would be more robust. That would also make the code a bit prettier, e.g. cur = info->hdr.cur_entry; end = info->hdr.end_entry; > + rc = PSC_INVALID_ENTRY; > + goto out; > + } > + > + while (info->hdr.cur_entry <= info->hdr.end_entry) { Make this a for loop? for ( ; cur_entry < end_entry; cur_entry++) > + entry = &info->entries[info->hdr.cur_entry]; Does this need array_index_nospec() treatment? > + gpa = gfn_to_gpa(entry->gfn); > + level = RMP_TO_X86_PG_LEVEL(entry->pagesize); > + op = entry->operation; > + > + if (!IS_ALIGNED(gpa, page_level_size(level))) { > + rc = PSC_INVALID_ENTRY; > + goto out; > + } > + > + rc = __snp_handle_psc(vcpu, op, gpa, level); > + if (rc) > + goto out; > + > + info->hdr.cur_entry++; > + } > + > +out: And for the copy case: info->hdr.cur_entry = cur; > + return rc ? map_to_psc_vmgexit_code(rc) : 0; > +}
On 7/16/21 4:14 PM, Sean Christopherson wrote: > On Wed, Jul 07, 2021, Brijesh Singh wrote: >> +static unsigned long snp_handle_psc(struct vcpu_svm *svm, struct ghcb *ghcb) >> +{ >> + struct kvm_vcpu *vcpu = &svm->vcpu; >> + int level, op, rc = PSC_UNDEF_ERR; >> + struct snp_psc_desc *info; >> + struct psc_entry *entry; >> + gpa_t gpa; >> + >> + if (!sev_snp_guest(vcpu->kvm)) >> + goto out; >> + >> + if (!setup_vmgexit_scratch(svm, true, sizeof(ghcb->save.sw_scratch))) { >> + pr_err("vmgexit: scratch area is not setup.\n"); >> + rc = PSC_INVALID_HDR; >> + goto out; >> + } >> + >> + info = (struct snp_psc_desc *)svm->ghcb_sa; >> + entry = &info->entries[info->hdr.cur_entry]; > > Grabbing "entry" here is unnecessary and confusing. Noted. > >> + >> + if ((info->hdr.cur_entry >= VMGEXIT_PSC_MAX_ENTRY) || >> + (info->hdr.end_entry >= VMGEXIT_PSC_MAX_ENTRY) || >> + (info->hdr.cur_entry > info->hdr.end_entry)) { > > There's a TOCTOU bug here if the guest uses the GHCB instead of a scratch area. > If the guest uses the scratch area, then KVM makes a full copy into kernel memory. > But if the guest uses the GHCB, then KVM maps the GHCB into kernel address space > but doesn't make a full copy, i.e. the guest can modify the data while it's being > processed by KVM. > Sure, I can make a full copy of the page-state change buffer. > IIRC, Peter and I discussed the sketchiness of the GHCB mapping offline a few > times, but determined that there were no existing SEV-ES bugs because the guest > could only submarine its own emulation request. But here, it could coerce KVM > into running off the end of a buffer. > > I think you can get away with capturing cur_entry/end_entry locally, though > copying the GHCB would be more robust. That would also make the code a bit > prettier, e.g. > > cur = info->hdr.cur_entry; > end = info->hdr.end_entry; > >> + rc = PSC_INVALID_ENTRY; >> + goto out; >> + } >> + >> + while (info->hdr.cur_entry <= info->hdr.end_entry) { > > Make this a for loop? Sure, I can use the for loop. IIRC, in previous review feedbacks I got the feeling that while() was preferred in the part1 so I used the similar approach here. > > for ( ; cur_entry < end_entry; cur_entry++) > >> + entry = &info->entries[info->hdr.cur_entry]; > > Does this need array_index_nospec() treatment? > I don't think so. thanks
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 2561413cb316..a02175752f2d 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -101,6 +101,13 @@ /* SNP Page State Change NAE event */ #define VMGEXIT_PSC_MAX_ENTRY 253 +/* The page state change hdr structure in not valid */ +#define PSC_INVALID_HDR 1 +/* The hdr.cur_entry or hdr.end_entry is not valid */ +#define PSC_INVALID_ENTRY 2 +/* Page state change encountered undefined error */ +#define PSC_UNDEF_ERR 3 + struct __packed psc_hdr { u16 cur_entry; u16 end_entry; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 68d275b2a660..0155d9b3127d 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2662,6 +2662,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) case SVM_VMGEXIT_AP_JUMP_TABLE: case SVM_VMGEXIT_UNSUPPORTED_EVENT: case SVM_VMGEXIT_HV_FT: + case SVM_VMGEXIT_PSC: break; default: goto vmgexit_err; @@ -2910,7 +2911,8 @@ static int snp_make_page_private(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn static int __snp_handle_psc(struct kvm_vcpu *vcpu, int op, gpa_t gpa, int level) { struct kvm *kvm = vcpu->kvm; - int rc, tdp_level; + int rc = PSC_UNDEF_ERR; + int tdp_level; kvm_pfn_t pfn; gpa_t gpa_end; @@ -2945,8 +2947,11 @@ static int __snp_handle_psc(struct kvm_vcpu *vcpu, int op, gpa_t gpa, int level) case SNP_PAGE_STATE_PRIVATE: rc = snp_make_page_private(vcpu, gpa, pfn, level); break; + case SNP_PAGE_STATE_PSMASH: + case SNP_PAGE_STATE_UNSMASH: + /* TODO: Add support to handle it */ default: - rc = -EINVAL; + rc = PSC_INVALID_ENTRY; break; } @@ -2965,6 +2970,68 @@ static int __snp_handle_psc(struct kvm_vcpu *vcpu, int op, gpa_t gpa, int level) return rc; } +static inline unsigned long map_to_psc_vmgexit_code(int rc) +{ + switch (rc) { + case PSC_INVALID_HDR: + return ((1ul << 32) | 1); + case PSC_INVALID_ENTRY: + return ((1ul << 32) | 2); + case RMPUPDATE_FAIL_OVERLAP: + return ((3ul << 32) | 2); + default: return (4ul << 32); + } +} + +static unsigned long snp_handle_psc(struct vcpu_svm *svm, struct ghcb *ghcb) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + int level, op, rc = PSC_UNDEF_ERR; + struct snp_psc_desc *info; + struct psc_entry *entry; + gpa_t gpa; + + if (!sev_snp_guest(vcpu->kvm)) + goto out; + + if (!setup_vmgexit_scratch(svm, true, sizeof(ghcb->save.sw_scratch))) { + pr_err("vmgexit: scratch area is not setup.\n"); + rc = PSC_INVALID_HDR; + goto out; + } + + info = (struct snp_psc_desc *)svm->ghcb_sa; + entry = &info->entries[info->hdr.cur_entry]; + + if ((info->hdr.cur_entry >= VMGEXIT_PSC_MAX_ENTRY) || + (info->hdr.end_entry >= VMGEXIT_PSC_MAX_ENTRY) || + (info->hdr.cur_entry > info->hdr.end_entry)) { + rc = PSC_INVALID_ENTRY; + goto out; + } + + while (info->hdr.cur_entry <= info->hdr.end_entry) { + entry = &info->entries[info->hdr.cur_entry]; + gpa = gfn_to_gpa(entry->gfn); + level = RMP_TO_X86_PG_LEVEL(entry->pagesize); + op = entry->operation; + + if (!IS_ALIGNED(gpa, page_level_size(level))) { + rc = PSC_INVALID_ENTRY; + goto out; + } + + rc = __snp_handle_psc(vcpu, op, gpa, level); + if (rc) + goto out; + + info->hdr.cur_entry++; + } + +out: + return rc ? map_to_psc_vmgexit_code(rc) : 0; +} + static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -3209,6 +3276,15 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) ret = 1; break; } + case SVM_VMGEXIT_PSC: { + unsigned long rc; + + ret = 1; + + rc = snp_handle_psc(svm, ghcb); + ghcb_set_sw_exit_info_2(ghcb, rc); + break; + } case SVM_VMGEXIT_UNSUPPORTED_EVENT: vcpu_unimpl(vcpu, "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", diff --git a/include/linux/sev.h b/include/linux/sev.h index 82e804a2ee0d..d96900b52aa5 100644 --- a/include/linux/sev.h +++ b/include/linux/sev.h @@ -57,6 +57,9 @@ struct rmpupdate { */ #define FAIL_INUSE 3 +/* RMUPDATE detected 4K page and 2MB page overlap. */ +#define RMPUPDATE_FAIL_OVERLAP 7 + #ifdef CONFIG_AMD_MEM_ENCRYPT struct rmpentry *snp_lookup_page_in_rmptable(struct page *page, int *level); int psmash(struct page *page);
SEV-SNP VMs can ask the hypervisor to change the page state in the RMP table to be private or shared using the Page State Change NAE event as defined in the GHCB specification section 4.1.6. Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> --- arch/x86/include/asm/sev-common.h | 7 +++ arch/x86/kvm/svm/sev.c | 80 ++++++++++++++++++++++++++++++- include/linux/sev.h | 3 ++ 3 files changed, 88 insertions(+), 2 deletions(-)