Message ID | 20230220183847.59159-45-michael.roth@amd.com (mailing list archive) |
---|---|
State | Not Applicable |
Delegated to: | Herbert Xu |
Headers | show |
Series | Add AMD Secure Nested Paging (SEV-SNP) Hypervisor Support | expand |
On Mon, 20 Feb 2023 12:38:35 -0600 Michael Roth <michael.roth@amd.com> wrote: > From: Brijesh Singh <brijesh.singh@amd.com> > > When SEV-SNP is enabled in the guest, the hardware places restrictions > on all memory accesses based on the contents of the RMP table. When > hardware encounters RMP check failure caused by the guest memory access > it raises the #NPF. The error code contains additional information on > the access type. See the APM volume 2 for additional information. > > Page state changes are handled by userspace, so if an RMP fault is > triggered as a result of an RMP NPT fault, exit to userspace just like > with explicit page-state change requests. > > RMP NPT faults can also occur if the guest pvalidates a 2M page as 4K, > in which case the RMP entries need to be PSMASH'd. Handle this case > immediately in the kernel. > > Co-developed-by: Michael Roth <michael.roth@amd.com> > Signed-off-by: Michael Roth <michael.roth@amd.com> > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> > --- > arch/x86/kvm/svm/sev.c | 84 ++++++++++++++++++++++++++++++++++++++++++ > arch/x86/kvm/svm/svm.c | 21 +++++++++-- > arch/x86/kvm/svm/svm.h | 1 + > 3 files changed, 102 insertions(+), 4 deletions(-) > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > index 102966c43e28..197b1f904567 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > @@ -3347,6 +3347,13 @@ static void set_ghcb_msr(struct vcpu_svm *svm, u64 value) > svm->vmcb->control.ghcb_gpa = value; > } > > +static int snp_rmptable_psmash(struct kvm *kvm, kvm_pfn_t pfn) > +{ > + pfn = pfn & ~(KVM_PAGES_PER_HPAGE(PG_LEVEL_2M) - 1); > + > + return psmash(pfn); > +} > + > /* > * TODO: need to get the value set by userspace in vcpu->run->vmgexit.ghcb_msr > * and process that here accordingly. > @@ -3872,3 +3879,80 @@ void sev_adjust_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *le > pr_debug("%s: GFN: 0x%llx, PFN: 0x%llx, level: %d, rmp_level: %d, level_orig: %d, assigned: %d\n", > __func__, gfn, pfn, *level, rmp_level, level_orig, assigned); > } > + > +void handle_rmp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code) > +{ > + int order, rmp_level, assigned, ret; > + struct kvm_memory_slot *slot; > + struct kvm *kvm = vcpu->kvm; > + kvm_pfn_t pfn; > + gfn_t gfn; > + > + /* > + * Private memslots punt handling of implicit page state changes to ^put > + * userspace, so the only RMP faults expected here for > + * PFERR_GUEST_SIZEM_MASK. Anything else suggests that the RMP table has > + * gotten out of sync with the private memslot. > + * > + * TODO: However, this case has also been noticed when an access occurs > + * to an NPT mapping that has just been split/PSMASHED, in which case > + * PFERR_GUEST_SIZEM_MASK might not be set. In those cases it should be > + * safe to ignore and let the guest retry, but log these just in case > + * for now. > + */ > + if (!(error_code & PFERR_GUEST_SIZEM_MASK)) { > + pr_warn_ratelimited("Unexpected RMP fault for GPA 0x%llx, error_code 0x%llx", > + gpa, error_code); > + return; > + } > + > + gfn = gpa >> PAGE_SHIFT; > + > + /* > + * Only RMPADJUST/PVALIDATE should cause PFERR_GUEST_SIZEM. > + * > + * For PVALIDATE, this should only happen if a guest PVALIDATEs a 4K GFN > + * that is backed by a huge page in the host whose RMP entry has the > + * hugepage/assigned bits set. With UPM, that should only ever happen > + * for private pages. > + * > + * For RMPADJUST, this assumption might not hold, in which case handling > + * for obtaining the PFN from HVA-backed memory may be needed. For now, > + * just print warnings. > + */ > + if (!kvm_mem_is_private(kvm, gfn)) { > + pr_warn_ratelimited("Unexpected RMP fault, size-mismatch for non-private GPA 0x%llx\n", > + gpa); > + return; > + } > + > + slot = gfn_to_memslot(kvm, gfn); > + if (!kvm_slot_can_be_private(slot)) { > + pr_warn_ratelimited("Unexpected RMP fault, non-private slot for GPA 0x%llx\n", > + gpa); > + return; > + } > + > + ret = kvm_restrictedmem_get_pfn(slot, gfn, &pfn, &order); > + if (ret) { > + pr_warn_ratelimited("Unexpected RMP fault, no private backing page for GPA 0x%llx\n", > + gpa); > + return; > + } > + > + assigned = snp_lookup_rmpentry(pfn, &rmp_level); > + if (assigned != 1) { > + pr_warn_ratelimited("Unexpected RMP fault, no assigned RMP entry for GPA 0x%llx\n", > + gpa); > + goto out; > + } > + > + ret = snp_rmptable_psmash(kvm, pfn); > + if (ret) > + pr_err_ratelimited("Unable to split RMP entries for GPA 0x%llx PFN 0x%llx ret %d\n", > + gpa, pfn, ret); > + > +out: > + kvm_zap_gfn_range(kvm, gfn, gfn + PTRS_PER_PMD); > + put_page(pfn_to_page(pfn)); > +} > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c > index 9eb750c8b04c..f9ab4bf6d245 100644 > --- a/arch/x86/kvm/svm/svm.c > +++ b/arch/x86/kvm/svm/svm.c > @@ -1976,15 +1976,28 @@ static int pf_interception(struct kvm_vcpu *vcpu) > static int npf_interception(struct kvm_vcpu *vcpu) > { > struct vcpu_svm *svm = to_svm(vcpu); > + int rc; > > u64 fault_address = svm->vmcb->control.exit_info_2; > u64 error_code = svm->vmcb->control.exit_info_1; > > trace_kvm_page_fault(vcpu, fault_address, error_code); > - return kvm_mmu_page_fault(vcpu, fault_address, error_code, > - static_cpu_has(X86_FEATURE_DECODEASSISTS) ? > - svm->vmcb->control.insn_bytes : NULL, > - svm->vmcb->control.insn_len); > + rc = kvm_mmu_page_fault(vcpu, fault_address, error_code, > + static_cpu_has(X86_FEATURE_DECODEASSISTS) ? > + svm->vmcb->control.insn_bytes : NULL, > + svm->vmcb->control.insn_len); > + > + /* > + * rc == 0 indicates a userspace exit is needed to handle page > + * transitions, so do that first before updating the RMP table. > + */ > + if (error_code & PFERR_GUEST_RMP_MASK) { > + if (rc == 0) > + return rc; > + handle_rmp_page_fault(vcpu, fault_address, error_code); > + } > + > + return rc; > } > > static int db_interception(struct kvm_vcpu *vcpu) > diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h > index 0c655a4d32d5..13b00233b315 100644 > --- a/arch/x86/kvm/svm/svm.h > +++ b/arch/x86/kvm/svm/svm.h > @@ -714,6 +714,7 @@ void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa); > void sev_es_unmap_ghcb(struct vcpu_svm *svm); > struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu); > void sev_adjust_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *level); > +void handle_rmp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code); > > /* vmenter.S */ >
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 102966c43e28..197b1f904567 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3347,6 +3347,13 @@ static void set_ghcb_msr(struct vcpu_svm *svm, u64 value) svm->vmcb->control.ghcb_gpa = value; } +static int snp_rmptable_psmash(struct kvm *kvm, kvm_pfn_t pfn) +{ + pfn = pfn & ~(KVM_PAGES_PER_HPAGE(PG_LEVEL_2M) - 1); + + return psmash(pfn); +} + /* * TODO: need to get the value set by userspace in vcpu->run->vmgexit.ghcb_msr * and process that here accordingly. @@ -3872,3 +3879,80 @@ void sev_adjust_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *le pr_debug("%s: GFN: 0x%llx, PFN: 0x%llx, level: %d, rmp_level: %d, level_orig: %d, assigned: %d\n", __func__, gfn, pfn, *level, rmp_level, level_orig, assigned); } + +void handle_rmp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code) +{ + int order, rmp_level, assigned, ret; + struct kvm_memory_slot *slot; + struct kvm *kvm = vcpu->kvm; + kvm_pfn_t pfn; + gfn_t gfn; + + /* + * Private memslots punt handling of implicit page state changes to + * userspace, so the only RMP faults expected here for + * PFERR_GUEST_SIZEM_MASK. Anything else suggests that the RMP table has + * gotten out of sync with the private memslot. + * + * TODO: However, this case has also been noticed when an access occurs + * to an NPT mapping that has just been split/PSMASHED, in which case + * PFERR_GUEST_SIZEM_MASK might not be set. In those cases it should be + * safe to ignore and let the guest retry, but log these just in case + * for now. + */ + if (!(error_code & PFERR_GUEST_SIZEM_MASK)) { + pr_warn_ratelimited("Unexpected RMP fault for GPA 0x%llx, error_code 0x%llx", + gpa, error_code); + return; + } + + gfn = gpa >> PAGE_SHIFT; + + /* + * Only RMPADJUST/PVALIDATE should cause PFERR_GUEST_SIZEM. + * + * For PVALIDATE, this should only happen if a guest PVALIDATEs a 4K GFN + * that is backed by a huge page in the host whose RMP entry has the + * hugepage/assigned bits set. With UPM, that should only ever happen + * for private pages. + * + * For RMPADJUST, this assumption might not hold, in which case handling + * for obtaining the PFN from HVA-backed memory may be needed. For now, + * just print warnings. + */ + if (!kvm_mem_is_private(kvm, gfn)) { + pr_warn_ratelimited("Unexpected RMP fault, size-mismatch for non-private GPA 0x%llx\n", + gpa); + return; + } + + slot = gfn_to_memslot(kvm, gfn); + if (!kvm_slot_can_be_private(slot)) { + pr_warn_ratelimited("Unexpected RMP fault, non-private slot for GPA 0x%llx\n", + gpa); + return; + } + + ret = kvm_restrictedmem_get_pfn(slot, gfn, &pfn, &order); + if (ret) { + pr_warn_ratelimited("Unexpected RMP fault, no private backing page for GPA 0x%llx\n", + gpa); + return; + } + + assigned = snp_lookup_rmpentry(pfn, &rmp_level); + if (assigned != 1) { + pr_warn_ratelimited("Unexpected RMP fault, no assigned RMP entry for GPA 0x%llx\n", + gpa); + goto out; + } + + ret = snp_rmptable_psmash(kvm, pfn); + if (ret) + pr_err_ratelimited("Unable to split RMP entries for GPA 0x%llx PFN 0x%llx ret %d\n", + gpa, pfn, ret); + +out: + kvm_zap_gfn_range(kvm, gfn, gfn + PTRS_PER_PMD); + put_page(pfn_to_page(pfn)); +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9eb750c8b04c..f9ab4bf6d245 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1976,15 +1976,28 @@ static int pf_interception(struct kvm_vcpu *vcpu) static int npf_interception(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + int rc; u64 fault_address = svm->vmcb->control.exit_info_2; u64 error_code = svm->vmcb->control.exit_info_1; trace_kvm_page_fault(vcpu, fault_address, error_code); - return kvm_mmu_page_fault(vcpu, fault_address, error_code, - static_cpu_has(X86_FEATURE_DECODEASSISTS) ? - svm->vmcb->control.insn_bytes : NULL, - svm->vmcb->control.insn_len); + rc = kvm_mmu_page_fault(vcpu, fault_address, error_code, + static_cpu_has(X86_FEATURE_DECODEASSISTS) ? + svm->vmcb->control.insn_bytes : NULL, + svm->vmcb->control.insn_len); + + /* + * rc == 0 indicates a userspace exit is needed to handle page + * transitions, so do that first before updating the RMP table. + */ + if (error_code & PFERR_GUEST_RMP_MASK) { + if (rc == 0) + return rc; + handle_rmp_page_fault(vcpu, fault_address, error_code); + } + + return rc; } static int db_interception(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 0c655a4d32d5..13b00233b315 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -714,6 +714,7 @@ void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa); void sev_es_unmap_ghcb(struct vcpu_svm *svm); struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu); void sev_adjust_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *level); +void handle_rmp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code); /* vmenter.S */