diff mbox series

[Part2,RFC,v4,34/40] KVM: SVM: Add support to handle Page State Change VMGEXIT

Message ID 20210707183616.5620-35-brijesh.singh@amd.com (mailing list archive)
State New
Headers show
Series Add AMD Secure Nested Paging (SEV-SNP) Hypervisor Support | expand

Commit Message

Brijesh Singh July 7, 2021, 6:36 p.m. UTC
SEV-SNP VMs can ask the hypervisor to change the page state in the RMP
table to be private or shared using the Page State Change NAE event
as defined in the GHCB specification section 4.1.6.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
 arch/x86/include/asm/sev-common.h |  7 +++
 arch/x86/kvm/svm/sev.c            | 80 ++++++++++++++++++++++++++++++-
 include/linux/sev.h               |  3 ++
 3 files changed, 88 insertions(+), 2 deletions(-)

Comments

Sean Christopherson July 16, 2021, 9:14 p.m. UTC | #1
On Wed, Jul 07, 2021, Brijesh Singh wrote:
> +static unsigned long snp_handle_psc(struct vcpu_svm *svm, struct ghcb *ghcb)
> +{
> +	struct kvm_vcpu *vcpu = &svm->vcpu;
> +	int level, op, rc = PSC_UNDEF_ERR;
> +	struct snp_psc_desc *info;
> +	struct psc_entry *entry;
> +	gpa_t gpa;
> +
> +	if (!sev_snp_guest(vcpu->kvm))
> +		goto out;
> +
> +	if (!setup_vmgexit_scratch(svm, true, sizeof(ghcb->save.sw_scratch))) {
> +		pr_err("vmgexit: scratch area is not setup.\n");
> +		rc = PSC_INVALID_HDR;
> +		goto out;
> +	}
> +
> +	info = (struct snp_psc_desc *)svm->ghcb_sa;
> +	entry = &info->entries[info->hdr.cur_entry];

Grabbing "entry" here is unnecessary and confusing.

> +
> +	if ((info->hdr.cur_entry >= VMGEXIT_PSC_MAX_ENTRY) ||
> +	    (info->hdr.end_entry >= VMGEXIT_PSC_MAX_ENTRY) ||
> +	    (info->hdr.cur_entry > info->hdr.end_entry)) {

There's a TOCTOU bug here if the guest uses the GHCB instead of a scratch area.
If the guest uses the scratch area, then KVM makes a full copy into kernel memory.
But if the guest uses the GHCB, then KVM maps the GHCB into kernel address space
but doesn't make a full copy, i.e. the guest can modify the data while it's being
processed by KVM.

IIRC, Peter and I discussed the sketchiness of the GHCB mapping offline a few
times, but determined that there were no existing SEV-ES bugs because the guest
could only submarine its own emulation request.  But here, it could coerce KVM
into running off the end of a buffer.

I think you can get away with capturing cur_entry/end_entry locally, though
copying the GHCB would be more robust.  That would also make the code a bit
prettier, e.g.

	cur = info->hdr.cur_entry;
	end = info->hdr.end_entry;

> +		rc = PSC_INVALID_ENTRY;
> +		goto out;
> +	}
> +
> +	while (info->hdr.cur_entry <= info->hdr.end_entry) {

Make this a for loop?

	for ( ; cur_entry < end_entry; cur_entry++)

> +		entry = &info->entries[info->hdr.cur_entry];

Does this need array_index_nospec() treatment?

> +		gpa = gfn_to_gpa(entry->gfn);
> +		level = RMP_TO_X86_PG_LEVEL(entry->pagesize);
> +		op = entry->operation;
> +
> +		if (!IS_ALIGNED(gpa, page_level_size(level))) {
> +			rc = PSC_INVALID_ENTRY;
> +			goto out;
> +		}
> +
> +		rc = __snp_handle_psc(vcpu, op, gpa, level);
> +		if (rc)
> +			goto out;
> +
> +		info->hdr.cur_entry++;
> +	}
> +
> +out:

And for the copy case:

	info->hdr.cur_entry = cur;

> +	return rc ? map_to_psc_vmgexit_code(rc) : 0;
> +}
Brijesh Singh July 19, 2021, 2:24 p.m. UTC | #2
On 7/16/21 4:14 PM, Sean Christopherson wrote:
> On Wed, Jul 07, 2021, Brijesh Singh wrote:
>> +static unsigned long snp_handle_psc(struct vcpu_svm *svm, struct ghcb *ghcb)
>> +{
>> +	struct kvm_vcpu *vcpu = &svm->vcpu;
>> +	int level, op, rc = PSC_UNDEF_ERR;
>> +	struct snp_psc_desc *info;
>> +	struct psc_entry *entry;
>> +	gpa_t gpa;
>> +
>> +	if (!sev_snp_guest(vcpu->kvm))
>> +		goto out;
>> +
>> +	if (!setup_vmgexit_scratch(svm, true, sizeof(ghcb->save.sw_scratch))) {
>> +		pr_err("vmgexit: scratch area is not setup.\n");
>> +		rc = PSC_INVALID_HDR;
>> +		goto out;
>> +	}
>> +
>> +	info = (struct snp_psc_desc *)svm->ghcb_sa;
>> +	entry = &info->entries[info->hdr.cur_entry];
> 
> Grabbing "entry" here is unnecessary and confusing.

Noted.

> 
>> +
>> +	if ((info->hdr.cur_entry >= VMGEXIT_PSC_MAX_ENTRY) ||
>> +	    (info->hdr.end_entry >= VMGEXIT_PSC_MAX_ENTRY) ||
>> +	    (info->hdr.cur_entry > info->hdr.end_entry)) {
> 
> There's a TOCTOU bug here if the guest uses the GHCB instead of a scratch area.
> If the guest uses the scratch area, then KVM makes a full copy into kernel memory.
> But if the guest uses the GHCB, then KVM maps the GHCB into kernel address space
> but doesn't make a full copy, i.e. the guest can modify the data while it's being
> processed by KVM.
> 
Sure, I can make a full copy of the page-state change buffer.


> IIRC, Peter and I discussed the sketchiness of the GHCB mapping offline a few
> times, but determined that there were no existing SEV-ES bugs because the guest
> could only submarine its own emulation request.  But here, it could coerce KVM
> into running off the end of a buffer.
> 
> I think you can get away with capturing cur_entry/end_entry locally, though
> copying the GHCB would be more robust.  That would also make the code a bit
> prettier, e.g.
> 
> 	cur = info->hdr.cur_entry;
> 	end = info->hdr.end_entry;
> 
>> +		rc = PSC_INVALID_ENTRY;
>> +		goto out;
>> +	}
>> +
>> +	while (info->hdr.cur_entry <= info->hdr.end_entry) {
> 
> Make this a for loop?

Sure, I can use the for loop. IIRC, in previous review feedbacks I got 
the feeling that while() was preferred in the part1 so I used the 
similar approach here.

> 
> 	for ( ; cur_entry < end_entry; cur_entry++)
> 
>> +		entry = &info->entries[info->hdr.cur_entry];
> 
> Does this need array_index_nospec() treatment?
> 

I don't think so.

thanks
diff mbox series

Patch

diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index 2561413cb316..a02175752f2d 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -101,6 +101,13 @@ 
 /* SNP Page State Change NAE event */
 #define VMGEXIT_PSC_MAX_ENTRY		253
 
+/* The page state change hdr structure in not valid */
+#define PSC_INVALID_HDR			1
+/* The hdr.cur_entry or hdr.end_entry is not valid */
+#define PSC_INVALID_ENTRY		2
+/* Page state change encountered undefined error */
+#define PSC_UNDEF_ERR			3
+
 struct __packed psc_hdr {
 	u16 cur_entry;
 	u16 end_entry;
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 68d275b2a660..0155d9b3127d 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2662,6 +2662,7 @@  static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
 	case SVM_VMGEXIT_AP_JUMP_TABLE:
 	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
 	case SVM_VMGEXIT_HV_FT:
+	case SVM_VMGEXIT_PSC:
 		break;
 	default:
 		goto vmgexit_err;
@@ -2910,7 +2911,8 @@  static int snp_make_page_private(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn
 static int __snp_handle_psc(struct kvm_vcpu *vcpu, int op, gpa_t gpa, int level)
 {
 	struct kvm *kvm = vcpu->kvm;
-	int rc, tdp_level;
+	int rc = PSC_UNDEF_ERR;
+	int tdp_level;
 	kvm_pfn_t pfn;
 	gpa_t gpa_end;
 
@@ -2945,8 +2947,11 @@  static int __snp_handle_psc(struct kvm_vcpu *vcpu, int op, gpa_t gpa, int level)
 		case SNP_PAGE_STATE_PRIVATE:
 			rc = snp_make_page_private(vcpu, gpa, pfn, level);
 			break;
+		case SNP_PAGE_STATE_PSMASH:
+		case SNP_PAGE_STATE_UNSMASH:
+			/* TODO: Add support to handle it */
 		default:
-			rc = -EINVAL;
+			rc = PSC_INVALID_ENTRY;
 			break;
 		}
 
@@ -2965,6 +2970,68 @@  static int __snp_handle_psc(struct kvm_vcpu *vcpu, int op, gpa_t gpa, int level)
 	return rc;
 }
 
+static inline unsigned long map_to_psc_vmgexit_code(int rc)
+{
+	switch (rc) {
+	case PSC_INVALID_HDR:
+		return ((1ul << 32) | 1);
+	case PSC_INVALID_ENTRY:
+		return ((1ul << 32) | 2);
+	case RMPUPDATE_FAIL_OVERLAP:
+		return ((3ul << 32) | 2);
+	default: return (4ul << 32);
+	}
+}
+
+static unsigned long snp_handle_psc(struct vcpu_svm *svm, struct ghcb *ghcb)
+{
+	struct kvm_vcpu *vcpu = &svm->vcpu;
+	int level, op, rc = PSC_UNDEF_ERR;
+	struct snp_psc_desc *info;
+	struct psc_entry *entry;
+	gpa_t gpa;
+
+	if (!sev_snp_guest(vcpu->kvm))
+		goto out;
+
+	if (!setup_vmgexit_scratch(svm, true, sizeof(ghcb->save.sw_scratch))) {
+		pr_err("vmgexit: scratch area is not setup.\n");
+		rc = PSC_INVALID_HDR;
+		goto out;
+	}
+
+	info = (struct snp_psc_desc *)svm->ghcb_sa;
+	entry = &info->entries[info->hdr.cur_entry];
+
+	if ((info->hdr.cur_entry >= VMGEXIT_PSC_MAX_ENTRY) ||
+	    (info->hdr.end_entry >= VMGEXIT_PSC_MAX_ENTRY) ||
+	    (info->hdr.cur_entry > info->hdr.end_entry)) {
+		rc = PSC_INVALID_ENTRY;
+		goto out;
+	}
+
+	while (info->hdr.cur_entry <= info->hdr.end_entry) {
+		entry = &info->entries[info->hdr.cur_entry];
+		gpa = gfn_to_gpa(entry->gfn);
+		level = RMP_TO_X86_PG_LEVEL(entry->pagesize);
+		op = entry->operation;
+
+		if (!IS_ALIGNED(gpa, page_level_size(level))) {
+			rc = PSC_INVALID_ENTRY;
+			goto out;
+		}
+
+		rc = __snp_handle_psc(vcpu, op, gpa, level);
+		if (rc)
+			goto out;
+
+		info->hdr.cur_entry++;
+	}
+
+out:
+	return rc ? map_to_psc_vmgexit_code(rc) : 0;
+}
+
 static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
@@ -3209,6 +3276,15 @@  int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
 		ret = 1;
 		break;
 	}
+	case SVM_VMGEXIT_PSC: {
+		unsigned long rc;
+
+		ret = 1;
+
+		rc = snp_handle_psc(svm, ghcb);
+		ghcb_set_sw_exit_info_2(ghcb, rc);
+		break;
+	}
 	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
 		vcpu_unimpl(vcpu,
 			    "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
diff --git a/include/linux/sev.h b/include/linux/sev.h
index 82e804a2ee0d..d96900b52aa5 100644
--- a/include/linux/sev.h
+++ b/include/linux/sev.h
@@ -57,6 +57,9 @@  struct rmpupdate {
  */
 #define FAIL_INUSE              3
 
+/* RMUPDATE detected 4K page and 2MB page overlap. */
+#define RMPUPDATE_FAIL_OVERLAP	7
+
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 struct rmpentry *snp_lookup_page_in_rmptable(struct page *page, int *level);
 int psmash(struct page *page);