diff mbox series

[Part2,v5,41/45] KVM: SVM: Add support to handle the RMP nested page fault

Message ID 20210820155918.7518-42-brijesh.singh@amd.com (mailing list archive)
State New
Headers show
Series Add AMD Secure Nested Paging (SEV-SNP) Hypervisor Support | expand

Commit Message

Brijesh Singh Aug. 20, 2021, 3:59 p.m. UTC
When SEV-SNP is enabled in the guest, the hardware places restrictions on
all memory accesses based on the contents of the RMP table. When hardware
encounters RMP check failure caused by the guest memory access it raises
the #NPF. The error code contains additional information on the access
type. See the APM volume 2 for additional information.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
 arch/x86/kvm/svm/sev.c | 76 ++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/svm/svm.c | 14 +++++---
 arch/x86/kvm/svm/svm.h |  1 +
 3 files changed, 87 insertions(+), 4 deletions(-)

Comments

Dr. David Alan Gilbert Sept. 29, 2021, 12:24 p.m. UTC | #1
* Brijesh Singh (brijesh.singh@amd.com) wrote:
> When SEV-SNP is enabled in the guest, the hardware places restrictions on
> all memory accesses based on the contents of the RMP table. When hardware
> encounters RMP check failure caused by the guest memory access it raises
> the #NPF. The error code contains additional information on the access
> type. See the APM volume 2 for additional information.
> 
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
>  arch/x86/kvm/svm/sev.c | 76 ++++++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/svm/svm.c | 14 +++++---
>  arch/x86/kvm/svm/svm.h |  1 +
>  3 files changed, 87 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 65b578463271..712e8907bc39 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -3651,3 +3651,79 @@ void sev_post_unmap_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int token)
>  
>  	srcu_read_unlock(&sev->psc_srcu, token);
>  }
> +
> +void handle_rmp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code)
> +{
> +	int rmp_level, npt_level, rc, assigned;
> +	struct kvm *kvm = vcpu->kvm;
> +	gfn_t gfn = gpa_to_gfn(gpa);
> +	bool need_psc = false;
> +	enum psc_op psc_op;
> +	kvm_pfn_t pfn;
> +	bool private;
> +
> +	write_lock(&kvm->mmu_lock);
> +
> +	if (unlikely(!kvm_mmu_get_tdp_walk(vcpu, gpa, &pfn, &npt_level)))
> +		goto unlock;
> +
> +	assigned = snp_lookup_rmpentry(pfn, &rmp_level);
> +	if (unlikely(assigned < 0))
> +		goto unlock;
> +
> +	private = !!(error_code & PFERR_GUEST_ENC_MASK);
> +
> +	/*
> +	 * If the fault was due to size mismatch, or NPT and RMP page level's
> +	 * are not in sync, then use PSMASH to split the RMP entry into 4K.
> +	 */
> +	if ((error_code & PFERR_GUEST_SIZEM_MASK) ||
> +	    (npt_level == PG_LEVEL_4K && rmp_level == PG_LEVEL_2M && private)) {
> +		rc = snp_rmptable_psmash(kvm, pfn);
> +		if (rc)
> +			pr_err_ratelimited("psmash failed, gpa 0x%llx pfn 0x%llx rc %d\n",
> +					   gpa, pfn, rc);
> +		goto out;
> +	}
> +
> +	/*
> +	 * If it's a private access, and the page is not assigned in the
> +	 * RMP table, create a new private RMP entry. This can happen if
> +	 * guest did not use the PSC VMGEXIT to transition the page state
> +	 * before the access.
> +	 */
> +	if (!assigned && private) {
> +		need_psc = 1;
> +		psc_op = SNP_PAGE_STATE_PRIVATE;
> +		goto out;
> +	}
> +
> +	/*
> +	 * If it's a shared access, but the page is private in the RMP table
> +	 * then make the page shared in the RMP table. This can happen if
> +	 * the guest did not use the PSC VMGEXIT to transition the page
> +	 * state before the access.
> +	 */
> +	if (assigned && !private) {
> +		need_psc = 1;
> +		psc_op = SNP_PAGE_STATE_SHARED;
> +	}
> +
> +out:
> +	write_unlock(&kvm->mmu_lock);
> +
> +	if (need_psc)
> +		rc = __snp_handle_page_state_change(vcpu, psc_op, gpa, PG_LEVEL_4K);

That 'rc' never goes anywhere - should it?

> +	/*
> +	 * The fault handler has updated the RMP pagesize, zap the existing
> +	 * rmaps for large entry ranges so that nested page table gets rebuilt
> +	 * with the updated RMP pagesize.
> +	 */
> +	gfn = gpa_to_gfn(gpa) & ~(KVM_PAGES_PER_HPAGE(PG_LEVEL_2M) - 1);
> +	kvm_zap_gfn_range(kvm, gfn, gfn + PTRS_PER_PMD);
> +	return;
> +
> +unlock:
> +	write_unlock(&kvm->mmu_lock);
> +}
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 3784d389247b..3ba62f21b113 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1933,15 +1933,21 @@ static int pf_interception(struct kvm_vcpu *vcpu)
>  static int npf_interception(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
> +	int rc;
>  
>  	u64 fault_address = svm->vmcb->control.exit_info_2;
>  	u64 error_code = svm->vmcb->control.exit_info_1;
>  
>  	trace_kvm_page_fault(fault_address, error_code);
> -	return kvm_mmu_page_fault(vcpu, fault_address, error_code,
> -			static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
> -			svm->vmcb->control.insn_bytes : NULL,
> -			svm->vmcb->control.insn_len);
> +	rc = kvm_mmu_page_fault(vcpu, fault_address, error_code,
> +				static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
> +				svm->vmcb->control.insn_bytes : NULL,
> +				svm->vmcb->control.insn_len);

If kvm_mmu_page_fault failed, (rc!=0) do you still want to call your
handler?

Dave

> +	if (error_code & PFERR_GUEST_RMP_MASK)
> +		handle_rmp_page_fault(vcpu, fault_address, error_code);
> +
> +	return rc;
>  }
>  
>  static int db_interception(struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index ff91184f9b4a..280072995306 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -626,6 +626,7 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
>  void sev_rmp_page_level_adjust(struct kvm *kvm, kvm_pfn_t pfn, int *level);
>  int sev_post_map_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *token);
>  void sev_post_unmap_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int token);
> +void handle_rmp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code);
>  
>  /* vmenter.S */
>  
> -- 
> 2.17.1
> 
>
Sean Christopherson Oct. 13, 2021, 5:57 p.m. UTC | #2
On Fri, Aug 20, 2021, Brijesh Singh wrote:
> When SEV-SNP is enabled in the guest, the hardware places restrictions on
> all memory accesses based on the contents of the RMP table. When hardware
> encounters RMP check failure caused by the guest memory access it raises
> the #NPF. The error code contains additional information on the access
> type. See the APM volume 2 for additional information.
> 
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
>  arch/x86/kvm/svm/sev.c | 76 ++++++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/svm/svm.c | 14 +++++---
>  arch/x86/kvm/svm/svm.h |  1 +
>  3 files changed, 87 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 65b578463271..712e8907bc39 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -3651,3 +3651,79 @@ void sev_post_unmap_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int token)
>  
>  	srcu_read_unlock(&sev->psc_srcu, token);
>  }
> +
> +void handle_rmp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code)
> +{
> +	int rmp_level, npt_level, rc, assigned;

Really silly nit: can you use 'r' or 'ret' instead of 'rc'?  Outside of the
emulator, which should never be the gold standard for code ;-), 'rc' isn't used
in x86 KVM.

> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 3784d389247b..3ba62f21b113 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1933,15 +1933,21 @@ static int pf_interception(struct kvm_vcpu *vcpu)
>  static int npf_interception(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
> +	int rc;
>  
>  	u64 fault_address = svm->vmcb->control.exit_info_2;
>  	u64 error_code = svm->vmcb->control.exit_info_1;
>  
>  	trace_kvm_page_fault(fault_address, error_code);
> -	return kvm_mmu_page_fault(vcpu, fault_address, error_code,
> -			static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
> -			svm->vmcb->control.insn_bytes : NULL,
> -			svm->vmcb->control.insn_len);
> +	rc = kvm_mmu_page_fault(vcpu, fault_address, error_code,
> +				static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
> +				svm->vmcb->control.insn_bytes : NULL,
> +				svm->vmcb->control.insn_len);
> +
> +	if (error_code & PFERR_GUEST_RMP_MASK)

Don't think it will matter in the end, but shouldn't this consult 'rc' before
diving into RMP handling?

> +		handle_rmp_page_fault(vcpu, fault_address, error_code);

Similar to my comments on the PSC patches, I think this is backwards.  The MMU
should provide the control logic to convert between private and shared, and vendor
code should only get involved when there are side effects from the conversion.

Once I've made a dent in my review backlog, I'll fiddle with this and some of the
other MMU interactions, and hopefully come up with a workable sketch for the MMU
stuff.  Yell at me if you haven't gotten an update by Wednesday or so.
diff mbox series

Patch

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 65b578463271..712e8907bc39 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -3651,3 +3651,79 @@  void sev_post_unmap_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int token)
 
 	srcu_read_unlock(&sev->psc_srcu, token);
 }
+
+void handle_rmp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code)
+{
+	int rmp_level, npt_level, rc, assigned;
+	struct kvm *kvm = vcpu->kvm;
+	gfn_t gfn = gpa_to_gfn(gpa);
+	bool need_psc = false;
+	enum psc_op psc_op;
+	kvm_pfn_t pfn;
+	bool private;
+
+	write_lock(&kvm->mmu_lock);
+
+	if (unlikely(!kvm_mmu_get_tdp_walk(vcpu, gpa, &pfn, &npt_level)))
+		goto unlock;
+
+	assigned = snp_lookup_rmpentry(pfn, &rmp_level);
+	if (unlikely(assigned < 0))
+		goto unlock;
+
+	private = !!(error_code & PFERR_GUEST_ENC_MASK);
+
+	/*
+	 * If the fault was due to size mismatch, or NPT and RMP page level's
+	 * are not in sync, then use PSMASH to split the RMP entry into 4K.
+	 */
+	if ((error_code & PFERR_GUEST_SIZEM_MASK) ||
+	    (npt_level == PG_LEVEL_4K && rmp_level == PG_LEVEL_2M && private)) {
+		rc = snp_rmptable_psmash(kvm, pfn);
+		if (rc)
+			pr_err_ratelimited("psmash failed, gpa 0x%llx pfn 0x%llx rc %d\n",
+					   gpa, pfn, rc);
+		goto out;
+	}
+
+	/*
+	 * If it's a private access, and the page is not assigned in the
+	 * RMP table, create a new private RMP entry. This can happen if
+	 * guest did not use the PSC VMGEXIT to transition the page state
+	 * before the access.
+	 */
+	if (!assigned && private) {
+		need_psc = 1;
+		psc_op = SNP_PAGE_STATE_PRIVATE;
+		goto out;
+	}
+
+	/*
+	 * If it's a shared access, but the page is private in the RMP table
+	 * then make the page shared in the RMP table. This can happen if
+	 * the guest did not use the PSC VMGEXIT to transition the page
+	 * state before the access.
+	 */
+	if (assigned && !private) {
+		need_psc = 1;
+		psc_op = SNP_PAGE_STATE_SHARED;
+	}
+
+out:
+	write_unlock(&kvm->mmu_lock);
+
+	if (need_psc)
+		rc = __snp_handle_page_state_change(vcpu, psc_op, gpa, PG_LEVEL_4K);
+
+	/*
+	 * The fault handler has updated the RMP pagesize, zap the existing
+	 * rmaps for large entry ranges so that nested page table gets rebuilt
+	 * with the updated RMP pagesize.
+	 */
+	gfn = gpa_to_gfn(gpa) & ~(KVM_PAGES_PER_HPAGE(PG_LEVEL_2M) - 1);
+	kvm_zap_gfn_range(kvm, gfn, gfn + PTRS_PER_PMD);
+	return;
+
+unlock:
+	write_unlock(&kvm->mmu_lock);
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 3784d389247b..3ba62f21b113 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1933,15 +1933,21 @@  static int pf_interception(struct kvm_vcpu *vcpu)
 static int npf_interception(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
+	int rc;
 
 	u64 fault_address = svm->vmcb->control.exit_info_2;
 	u64 error_code = svm->vmcb->control.exit_info_1;
 
 	trace_kvm_page_fault(fault_address, error_code);
-	return kvm_mmu_page_fault(vcpu, fault_address, error_code,
-			static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
-			svm->vmcb->control.insn_bytes : NULL,
-			svm->vmcb->control.insn_len);
+	rc = kvm_mmu_page_fault(vcpu, fault_address, error_code,
+				static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
+				svm->vmcb->control.insn_bytes : NULL,
+				svm->vmcb->control.insn_len);
+
+	if (error_code & PFERR_GUEST_RMP_MASK)
+		handle_rmp_page_fault(vcpu, fault_address, error_code);
+
+	return rc;
 }
 
 static int db_interception(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index ff91184f9b4a..280072995306 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -626,6 +626,7 @@  struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
 void sev_rmp_page_level_adjust(struct kvm *kvm, kvm_pfn_t pfn, int *level);
 int sev_post_map_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int *token);
 void sev_post_unmap_gfn(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int token);
+void handle_rmp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code);
 
 /* vmenter.S */