diff mbox series

[v19,101/130] KVM: TDX: handle ept violation/misconfig exit

Message ID f05b978021522d70a259472337e0b53658d47636.1708933498.git.isaku.yamahata@intel.com (mailing list archive)
State New, archived
Headers show
Series [v19,001/130] x86/virt/tdx: Rename _offset to _member for TD_SYSINFO_MAP() macro | expand

Commit Message

Isaku Yamahata Feb. 26, 2024, 8:26 a.m. UTC
From: Isaku Yamahata <isaku.yamahata@intel.com>

On EPT violation, call a common function, __vmx_handle_ept_violation() to
trigger x86 MMU code.  On EPT misconfiguration, exit to ring 3 with
KVM_EXIT_UNKNOWN.  because EPT misconfiguration can't happen as MMIO is
trigged by TDG.VP.VMCALL. No point to set a misconfiguration value for the
fast path.

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>

---
v14 -> v15:
- use PFERR_GUEST_ENC_MASK to tell the fault is private

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
 arch/x86/kvm/vmx/common.h |  3 +++
 arch/x86/kvm/vmx/tdx.c    | 49 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+)

Comments

Chenyi Qiang March 4, 2024, 7:39 a.m. UTC | #1
On 2/26/2024 4:26 PM, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> On EPT violation, call a common function, __vmx_handle_ept_violation() to
> trigger x86 MMU code.  On EPT misconfiguration, exit to ring 3 with
> KVM_EXIT_UNKNOWN.  because EPT misconfiguration can't happen as MMIO is
> trigged by TDG.VP.VMCALL. No point to set a misconfiguration value for the

s/trigged/triggered

> fast path.
> 
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> ---
> v14 -> v15:
> - use PFERR_GUEST_ENC_MASK to tell the fault is private
> 
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>

duplicated SOB

> ---
>  arch/x86/kvm/vmx/common.h |  3 +++
>  arch/x86/kvm/vmx/tdx.c    | 49 +++++++++++++++++++++++++++++++++++++++
>  2 files changed, 52 insertions(+)
> 
> diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h
> index 632af7a76d0a..027aa4175d2c 100644
> --- a/arch/x86/kvm/vmx/common.h
> +++ b/arch/x86/kvm/vmx/common.h
> @@ -87,6 +87,9 @@ static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa,
>  	error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ?
>  	       PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
>  
> +	if (kvm_is_private_gpa(vcpu->kvm, gpa))
> +		error_code |= PFERR_GUEST_ENC_MASK;
> +
>  	return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
>  }
>  
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index 2f68e6f2b53a..0db80fa020d2 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -1285,6 +1285,51 @@ void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
>  	__vmx_deliver_posted_interrupt(vcpu, &tdx->pi_desc, vector);
>  }
>  
> +static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
> +{
> +	unsigned long exit_qual;
> +
> +	if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) {
> +		/*
> +		 * Always treat SEPT violations as write faults.  Ignore the
> +		 * EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations.
> +		 * TD private pages are always RWX in the SEPT tables,
> +		 * i.e. they're always mapped writable.  Just as importantly,
> +		 * treating SEPT violations as write faults is necessary to
> +		 * avoid COW allocations, which will cause TDAUGPAGE failures
> +		 * due to aliasing a single HPA to multiple GPAs.
> +		 */
> +#define TDX_SEPT_VIOLATION_EXIT_QUAL	EPT_VIOLATION_ACC_WRITE
> +		exit_qual = TDX_SEPT_VIOLATION_EXIT_QUAL;
> +	} else {
> +		exit_qual = tdexit_exit_qual(vcpu);
> +		if (exit_qual & EPT_VIOLATION_ACC_INSTR) {
> +			pr_warn("kvm: TDX instr fetch to shared GPA = 0x%lx @ RIP = 0x%lx\n",
> +				tdexit_gpa(vcpu), kvm_rip_read(vcpu));
> +			vcpu->run->exit_reason = KVM_EXIT_EXCEPTION;
> +			vcpu->run->ex.exception = PF_VECTOR;
> +			vcpu->run->ex.error_code = exit_qual;
> +			return 0;
> +		}
> +	}
> +
> +	trace_kvm_page_fault(vcpu, tdexit_gpa(vcpu), exit_qual);
> +	return __vmx_handle_ept_violation(vcpu, tdexit_gpa(vcpu), exit_qual);
> +}
> +
> +static int tdx_handle_ept_misconfig(struct kvm_vcpu *vcpu)
> +{
> +	WARN_ON_ONCE(1);
> +
> +	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
> +	vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
> +	vcpu->run->internal.ndata = 2;
> +	vcpu->run->internal.data[0] = EXIT_REASON_EPT_MISCONFIG;
> +	vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
> +
> +	return 0;
> +}
> +
>  int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath)
>  {
>  	union tdx_exit_reason exit_reason = to_tdx(vcpu)->exit_reason;
> @@ -1345,6 +1390,10 @@ int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath)
>  	WARN_ON_ONCE(fastpath != EXIT_FASTPATH_NONE);
>  
>  	switch (exit_reason.basic) {
> +	case EXIT_REASON_EPT_VIOLATION:
> +		return tdx_handle_ept_violation(vcpu);
> +	case EXIT_REASON_EPT_MISCONFIG:
> +		return tdx_handle_ept_misconfig(vcpu);
>  	case EXIT_REASON_OTHER_SMI:
>  		/*
>  		 * If reach here, it's not a Machine Check System Management
Chao Gao April 1, 2024, 4:10 a.m. UTC | #2
>+static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
>+{
>+	unsigned long exit_qual;
>+
>+	if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) {
>+		/*
>+		 * Always treat SEPT violations as write faults.  Ignore the
>+		 * EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations.
>+		 * TD private pages are always RWX in the SEPT tables,
>+		 * i.e. they're always mapped writable.  Just as importantly,
>+		 * treating SEPT violations as write faults is necessary to
>+		 * avoid COW allocations, which will cause TDAUGPAGE failures
>+		 * due to aliasing a single HPA to multiple GPAs.
>+		 */
>+#define TDX_SEPT_VIOLATION_EXIT_QUAL	EPT_VIOLATION_ACC_WRITE
>+		exit_qual = TDX_SEPT_VIOLATION_EXIT_QUAL;
>+	} else {
>+		exit_qual = tdexit_exit_qual(vcpu);
>+		if (exit_qual & EPT_VIOLATION_ACC_INSTR) {

Unless the CPU has a bug, instruction fetch in TD from shared memory causes a
#PF. I think you can add a comment for this.

Maybe KVM_BUG_ON() is more appropriate as it signifies a potential bug.

>+			pr_warn("kvm: TDX instr fetch to shared GPA = 0x%lx @ RIP = 0x%lx\n",
>+				tdexit_gpa(vcpu), kvm_rip_read(vcpu));
>+			vcpu->run->exit_reason = KVM_EXIT_EXCEPTION;
>+			vcpu->run->ex.exception = PF_VECTOR;
>+			vcpu->run->ex.error_code = exit_qual;
>+			return 0;
>+		}
>+	}
>+
>+	trace_kvm_page_fault(vcpu, tdexit_gpa(vcpu), exit_qual);
>+	return __vmx_handle_ept_violation(vcpu, tdexit_gpa(vcpu), exit_qual);
>+}
>+
>+static int tdx_handle_ept_misconfig(struct kvm_vcpu *vcpu)
>+{
>+	WARN_ON_ONCE(1);
>+
>+	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
>+	vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
>+	vcpu->run->internal.ndata = 2;
>+	vcpu->run->internal.data[0] = EXIT_REASON_EPT_MISCONFIG;
>+	vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
>+
>+	return 0;
>+}
>+
> int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath)
> {
> 	union tdx_exit_reason exit_reason = to_tdx(vcpu)->exit_reason;
>@@ -1345,6 +1390,10 @@ int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath)
> 	WARN_ON_ONCE(fastpath != EXIT_FASTPATH_NONE);
> 
> 	switch (exit_reason.basic) {
>+	case EXIT_REASON_EPT_VIOLATION:
>+		return tdx_handle_ept_violation(vcpu);
>+	case EXIT_REASON_EPT_MISCONFIG:
>+		return tdx_handle_ept_misconfig(vcpu);

Handling EPT misconfiguration can be dropped because the "default" case handles
all unexpected exits in the same way


> 	case EXIT_REASON_OTHER_SMI:
> 		/*
> 		 * If reach here, it's not a Machine Check System Management
>-- 
>2.25.1
>
>
Isaku Yamahata April 3, 2024, 6:42 p.m. UTC | #3
On Mon, Apr 01, 2024 at 12:10:58PM +0800,
Chao Gao <chao.gao@intel.com> wrote:

> >+static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
> >+{
> >+	unsigned long exit_qual;
> >+
> >+	if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) {
> >+		/*
> >+		 * Always treat SEPT violations as write faults.  Ignore the
> >+		 * EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations.
> >+		 * TD private pages are always RWX in the SEPT tables,
> >+		 * i.e. they're always mapped writable.  Just as importantly,
> >+		 * treating SEPT violations as write faults is necessary to
> >+		 * avoid COW allocations, which will cause TDAUGPAGE failures
> >+		 * due to aliasing a single HPA to multiple GPAs.
> >+		 */
> >+#define TDX_SEPT_VIOLATION_EXIT_QUAL	EPT_VIOLATION_ACC_WRITE
> >+		exit_qual = TDX_SEPT_VIOLATION_EXIT_QUAL;
> >+	} else {
> >+		exit_qual = tdexit_exit_qual(vcpu);
> >+		if (exit_qual & EPT_VIOLATION_ACC_INSTR) {
> 
> Unless the CPU has a bug, instruction fetch in TD from shared memory causes a
> #PF. I think you can add a comment for this.

Yes.


> Maybe KVM_BUG_ON() is more appropriate as it signifies a potential bug.

Bug of what component? CPU. If so, I think KVM_EXIT_INTERNAL_ERROR +
KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON is more appropriate.


> >+			pr_warn("kvm: TDX instr fetch to shared GPA = 0x%lx @ RIP = 0x%lx\n",
> >+				tdexit_gpa(vcpu), kvm_rip_read(vcpu));
> >+			vcpu->run->exit_reason = KVM_EXIT_EXCEPTION;
> >+			vcpu->run->ex.exception = PF_VECTOR;
> >+			vcpu->run->ex.error_code = exit_qual;
> >+			return 0;
> >+		}
> >+	}
> >+
> >+	trace_kvm_page_fault(vcpu, tdexit_gpa(vcpu), exit_qual);
> >+	return __vmx_handle_ept_violation(vcpu, tdexit_gpa(vcpu), exit_qual);
> >+}
> >+
> >+static int tdx_handle_ept_misconfig(struct kvm_vcpu *vcpu)
> >+{
> >+	WARN_ON_ONCE(1);
> >+
> >+	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
> >+	vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
> >+	vcpu->run->internal.ndata = 2;
> >+	vcpu->run->internal.data[0] = EXIT_REASON_EPT_MISCONFIG;
> >+	vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
> >+
> >+	return 0;
> >+}
> >+
> > int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath)
> > {
> > 	union tdx_exit_reason exit_reason = to_tdx(vcpu)->exit_reason;
> >@@ -1345,6 +1390,10 @@ int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath)
> > 	WARN_ON_ONCE(fastpath != EXIT_FASTPATH_NONE);
> > 
> > 	switch (exit_reason.basic) {
> >+	case EXIT_REASON_EPT_VIOLATION:
> >+		return tdx_handle_ept_violation(vcpu);
> >+	case EXIT_REASON_EPT_MISCONFIG:
> >+		return tdx_handle_ept_misconfig(vcpu);
> 
> Handling EPT misconfiguration can be dropped because the "default" case handles
> all unexpected exits in the same way

Ah, right. Will update it.
Reinette Chatre April 30, 2024, 8:47 p.m. UTC | #4
Hi Isaku,

On 4/3/2024 11:42 AM, Isaku Yamahata wrote:
> On Mon, Apr 01, 2024 at 12:10:58PM +0800,
> Chao Gao <chao.gao@intel.com> wrote:
> 
>>> +static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
>>> +{
>>> +	unsigned long exit_qual;
>>> +
>>> +	if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) {
>>> +		/*
>>> +		 * Always treat SEPT violations as write faults.  Ignore the
>>> +		 * EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations.
>>> +		 * TD private pages are always RWX in the SEPT tables,
>>> +		 * i.e. they're always mapped writable.  Just as importantly,
>>> +		 * treating SEPT violations as write faults is necessary to
>>> +		 * avoid COW allocations, which will cause TDAUGPAGE failures
>>> +		 * due to aliasing a single HPA to multiple GPAs.
>>> +		 */
>>> +#define TDX_SEPT_VIOLATION_EXIT_QUAL	EPT_VIOLATION_ACC_WRITE
>>> +		exit_qual = TDX_SEPT_VIOLATION_EXIT_QUAL;
>>> +	} else {
>>> +		exit_qual = tdexit_exit_qual(vcpu);
>>> +		if (exit_qual & EPT_VIOLATION_ACC_INSTR) {
>>
>> Unless the CPU has a bug, instruction fetch in TD from shared memory causes a
>> #PF. I think you can add a comment for this.
> 
> Yes.
> 
> 
>> Maybe KVM_BUG_ON() is more appropriate as it signifies a potential bug.
> 
> Bug of what component? CPU. If so, I think KVM_EXIT_INTERNAL_ERROR +
> KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON is more appropriate.
> 

Is below what you have in mind?

diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 499c6cd9633f..bd30b4c4d710 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -1305,11 +1305,18 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
 	} else {
 		exit_qual = tdexit_exit_qual(vcpu);
 		if (exit_qual & EPT_VIOLATION_ACC_INSTR) {
+			/*
+			 * Instruction fetch in TD from shared memory
+			 * causes a #PF.
+			 */
 			pr_warn("kvm: TDX instr fetch to shared GPA = 0x%lx @ RIP = 0x%lx\n",
 				tdexit_gpa(vcpu), kvm_rip_read(vcpu));
-			vcpu->run->exit_reason = KVM_EXIT_EXCEPTION;
-			vcpu->run->ex.exception = PF_VECTOR;
-			vcpu->run->ex.error_code = exit_qual;
+			vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+			vcpu->run->internal.suberror =
+				KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+			vcpu->run->internal.ndata = 2;
+			vcpu->run->internal.data[0] = EXIT_REASON_EPT_VIOLATION;
+			vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
 			return 0;
 		}
 	}

Thank you

Reinette
Isaku Yamahata May 1, 2024, 3:56 p.m. UTC | #5
On Tue, Apr 30, 2024 at 01:47:07PM -0700,
Reinette Chatre <reinette.chatre@intel.com> wrote:

> Hi Isaku,
> 
> On 4/3/2024 11:42 AM, Isaku Yamahata wrote:
> > On Mon, Apr 01, 2024 at 12:10:58PM +0800,
> > Chao Gao <chao.gao@intel.com> wrote:
> > 
> >>> +static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
> >>> +{
> >>> +	unsigned long exit_qual;
> >>> +
> >>> +	if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) {
> >>> +		/*
> >>> +		 * Always treat SEPT violations as write faults.  Ignore the
> >>> +		 * EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations.
> >>> +		 * TD private pages are always RWX in the SEPT tables,
> >>> +		 * i.e. they're always mapped writable.  Just as importantly,
> >>> +		 * treating SEPT violations as write faults is necessary to
> >>> +		 * avoid COW allocations, which will cause TDAUGPAGE failures
> >>> +		 * due to aliasing a single HPA to multiple GPAs.
> >>> +		 */
> >>> +#define TDX_SEPT_VIOLATION_EXIT_QUAL	EPT_VIOLATION_ACC_WRITE
> >>> +		exit_qual = TDX_SEPT_VIOLATION_EXIT_QUAL;
> >>> +	} else {
> >>> +		exit_qual = tdexit_exit_qual(vcpu);
> >>> +		if (exit_qual & EPT_VIOLATION_ACC_INSTR) {
> >>
> >> Unless the CPU has a bug, instruction fetch in TD from shared memory causes a
> >> #PF. I think you can add a comment for this.
> > 
> > Yes.
> > 
> > 
> >> Maybe KVM_BUG_ON() is more appropriate as it signifies a potential bug.
> > 
> > Bug of what component? CPU. If so, I think KVM_EXIT_INTERNAL_ERROR +
> > KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON is more appropriate.
> > 
> 
> Is below what you have in mind?

Yes. data[0] should be the raw value of exit reason if possible.
data[2] should be exit_qual.  Hmm, I don't find document on data[] for
KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON.
Qemu doesn't assumt ndata = 2. Just report all data within ndata.


> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index 499c6cd9633f..bd30b4c4d710 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -1305,11 +1305,18 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
>  	} else {
>  		exit_qual = tdexit_exit_qual(vcpu);
>  		if (exit_qual & EPT_VIOLATION_ACC_INSTR) {
> +			/*
> +			 * Instruction fetch in TD from shared memory
> +			 * causes a #PF.
> +			 */
>  			pr_warn("kvm: TDX instr fetch to shared GPA = 0x%lx @ RIP = 0x%lx\n",
>  				tdexit_gpa(vcpu), kvm_rip_read(vcpu));
> -			vcpu->run->exit_reason = KVM_EXIT_EXCEPTION;
> -			vcpu->run->ex.exception = PF_VECTOR;
> -			vcpu->run->ex.error_code = exit_qual;
> +			vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
> +			vcpu->run->internal.suberror =
> +				KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
> +			vcpu->run->internal.ndata = 2;
> +			vcpu->run->internal.data[0] = EXIT_REASON_EPT_VIOLATION;
> +			vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
>  			return 0;
>  		}
>  	}
> 
> Thank you
> 
> Reinette
> 
> 
>
Reinette Chatre May 1, 2024, 4:54 p.m. UTC | #6
Hi Isaku,

On 5/1/2024 8:56 AM, Isaku Yamahata wrote:
> On Tue, Apr 30, 2024 at 01:47:07PM -0700,
> Reinette Chatre <reinette.chatre@intel.com> wrote:
>> On 4/3/2024 11:42 AM, Isaku Yamahata wrote:
>>> On Mon, Apr 01, 2024 at 12:10:58PM +0800,
>>> Chao Gao <chao.gao@intel.com> wrote:
>>>
>>>>> +static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
>>>>> +{
>>>>> +	unsigned long exit_qual;
>>>>> +
>>>>> +	if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) {
>>>>> +		/*
>>>>> +		 * Always treat SEPT violations as write faults.  Ignore the
>>>>> +		 * EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations.
>>>>> +		 * TD private pages are always RWX in the SEPT tables,
>>>>> +		 * i.e. they're always mapped writable.  Just as importantly,
>>>>> +		 * treating SEPT violations as write faults is necessary to
>>>>> +		 * avoid COW allocations, which will cause TDAUGPAGE failures
>>>>> +		 * due to aliasing a single HPA to multiple GPAs.
>>>>> +		 */
>>>>> +#define TDX_SEPT_VIOLATION_EXIT_QUAL	EPT_VIOLATION_ACC_WRITE
>>>>> +		exit_qual = TDX_SEPT_VIOLATION_EXIT_QUAL;
>>>>> +	} else {
>>>>> +		exit_qual = tdexit_exit_qual(vcpu);
>>>>> +		if (exit_qual & EPT_VIOLATION_ACC_INSTR) {
>>>>
>>>> Unless the CPU has a bug, instruction fetch in TD from shared memory causes a
>>>> #PF. I think you can add a comment for this.
>>>
>>> Yes.
>>>
>>>
>>>> Maybe KVM_BUG_ON() is more appropriate as it signifies a potential bug.
>>>
>>> Bug of what component? CPU. If so, I think KVM_EXIT_INTERNAL_ERROR +
>>> KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON is more appropriate.
>>>
>>
>> Is below what you have in mind?
> 
> Yes. data[0] should be the raw value of exit reason if possible.
> data[2] should be exit_qual.  Hmm, I don't find document on data[] for

Did you perhaps intend to write "data[1] should be exit_qual" or would you
like to see ndata = 3? I followed existing usages, for example [1] and [2],
that have ndata = 2 with "data[1] = vcpu->arch.last_vmentry_cpu".

> KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON.
> Qemu doesn't assumt ndata = 2. Just report all data within ndata.

I am not sure I interpreted your response correctly so I share one possible
snippet below as I interpret it. Could you please check where I misinterpreted
you? I could also make ndata = 3 to break the existing custom and add
"data[2] = vcpu->arch.last_vmentry_cpu" to match existing pattern. What do you
think?

diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 499c6cd9633f..ba81e6f68c97 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -1305,11 +1305,20 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
 	} else {
 		exit_qual = tdexit_exit_qual(vcpu);
 		if (exit_qual & EPT_VIOLATION_ACC_INSTR) {
+			union tdx_exit_reason exit_reason = to_tdx(vcpu)->exit_reason;
+
+			/*
+			 * Instruction fetch in TD from shared memory
+			 * causes a #PF.
+			 */
 			pr_warn("kvm: TDX instr fetch to shared GPA = 0x%lx @ RIP = 0x%lx\n",
 				tdexit_gpa(vcpu), kvm_rip_read(vcpu));
-			vcpu->run->exit_reason = KVM_EXIT_EXCEPTION;
-			vcpu->run->ex.exception = PF_VECTOR;
-			vcpu->run->ex.error_code = exit_qual;
+			vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+			vcpu->run->internal.suberror =
+				KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+			vcpu->run->internal.ndata = 2;
+			vcpu->run->internal.data[0] = exit_reason.full;
+			vcpu->run->internal.data[1] = exit_qual;
 			return 0;
 		}
 	}

Reinette

[1] https://github.com/kvm-x86/linux/blob/next/arch/x86/kvm/vmx/vmx.c#L6587
[2] https://github.com/kvm-x86/linux/blob/next/arch/x86/kvm/svm/svm.c#L3436
Isaku Yamahata May 1, 2024, 6:19 p.m. UTC | #7
On Wed, May 01, 2024 at 09:54:07AM -0700,
Reinette Chatre <reinette.chatre@intel.com> wrote:

> Hi Isaku,
> 
> On 5/1/2024 8:56 AM, Isaku Yamahata wrote:
> > On Tue, Apr 30, 2024 at 01:47:07PM -0700,
> > Reinette Chatre <reinette.chatre@intel.com> wrote:
> >> On 4/3/2024 11:42 AM, Isaku Yamahata wrote:
> >>> On Mon, Apr 01, 2024 at 12:10:58PM +0800,
> >>> Chao Gao <chao.gao@intel.com> wrote:
> >>>
> >>>>> +static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
> >>>>> +{
> >>>>> +	unsigned long exit_qual;
> >>>>> +
> >>>>> +	if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) {
> >>>>> +		/*
> >>>>> +		 * Always treat SEPT violations as write faults.  Ignore the
> >>>>> +		 * EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations.
> >>>>> +		 * TD private pages are always RWX in the SEPT tables,
> >>>>> +		 * i.e. they're always mapped writable.  Just as importantly,
> >>>>> +		 * treating SEPT violations as write faults is necessary to
> >>>>> +		 * avoid COW allocations, which will cause TDAUGPAGE failures
> >>>>> +		 * due to aliasing a single HPA to multiple GPAs.
> >>>>> +		 */
> >>>>> +#define TDX_SEPT_VIOLATION_EXIT_QUAL	EPT_VIOLATION_ACC_WRITE
> >>>>> +		exit_qual = TDX_SEPT_VIOLATION_EXIT_QUAL;
> >>>>> +	} else {
> >>>>> +		exit_qual = tdexit_exit_qual(vcpu);
> >>>>> +		if (exit_qual & EPT_VIOLATION_ACC_INSTR) {
> >>>>
> >>>> Unless the CPU has a bug, instruction fetch in TD from shared memory causes a
> >>>> #PF. I think you can add a comment for this.
> >>>
> >>> Yes.
> >>>
> >>>
> >>>> Maybe KVM_BUG_ON() is more appropriate as it signifies a potential bug.
> >>>
> >>> Bug of what component? CPU. If so, I think KVM_EXIT_INTERNAL_ERROR +
> >>> KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON is more appropriate.
> >>>
> >>
> >> Is below what you have in mind?
> > 
> > Yes. data[0] should be the raw value of exit reason if possible.
> > data[2] should be exit_qual.  Hmm, I don't find document on data[] for
> 
> Did you perhaps intend to write "data[1] should be exit_qual" or would you
> like to see ndata = 3? I followed existing usages, for example [1] and [2],
> that have ndata = 2 with "data[1] = vcpu->arch.last_vmentry_cpu".
> 
> > KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON.
> > Qemu doesn't assumt ndata = 2. Just report all data within ndata.
> 
> I am not sure I interpreted your response correctly so I share one possible
> snippet below as I interpret it. Could you please check where I misinterpreted
> you? I could also make ndata = 3 to break the existing custom and add
> "data[2] = vcpu->arch.last_vmentry_cpu" to match existing pattern. What do you
> think?
> 
Sorry, I wasn't clear enough. I meant
  ndata = 3;
  data[0] = exit_reason.full;
  data[1] = vcpu->arch.last_vmentry_cpu;
  data[2] = exit_qual;

Because I hesitate to change the meaning of data[1] from other usage, I
appended exit_qual as data[2].
Reinette Chatre May 1, 2024, 6:22 p.m. UTC | #8
Hi Isaku,

On 5/1/2024 11:19 AM, Isaku Yamahata wrote:
> On Wed, May 01, 2024 at 09:54:07AM -0700,
> Reinette Chatre <reinette.chatre@intel.com> wrote:
> 
>> Hi Isaku,
>>
>> On 5/1/2024 8:56 AM, Isaku Yamahata wrote:
>>> On Tue, Apr 30, 2024 at 01:47:07PM -0700,
>>> Reinette Chatre <reinette.chatre@intel.com> wrote:
>>>> On 4/3/2024 11:42 AM, Isaku Yamahata wrote:
>>>>> On Mon, Apr 01, 2024 at 12:10:58PM +0800,
>>>>> Chao Gao <chao.gao@intel.com> wrote:
>>>>>
>>>>>>> +static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
>>>>>>> +{
>>>>>>> +	unsigned long exit_qual;
>>>>>>> +
>>>>>>> +	if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) {
>>>>>>> +		/*
>>>>>>> +		 * Always treat SEPT violations as write faults.  Ignore the
>>>>>>> +		 * EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations.
>>>>>>> +		 * TD private pages are always RWX in the SEPT tables,
>>>>>>> +		 * i.e. they're always mapped writable.  Just as importantly,
>>>>>>> +		 * treating SEPT violations as write faults is necessary to
>>>>>>> +		 * avoid COW allocations, which will cause TDAUGPAGE failures
>>>>>>> +		 * due to aliasing a single HPA to multiple GPAs.
>>>>>>> +		 */
>>>>>>> +#define TDX_SEPT_VIOLATION_EXIT_QUAL	EPT_VIOLATION_ACC_WRITE
>>>>>>> +		exit_qual = TDX_SEPT_VIOLATION_EXIT_QUAL;
>>>>>>> +	} else {
>>>>>>> +		exit_qual = tdexit_exit_qual(vcpu);
>>>>>>> +		if (exit_qual & EPT_VIOLATION_ACC_INSTR) {
>>>>>>
>>>>>> Unless the CPU has a bug, instruction fetch in TD from shared memory causes a
>>>>>> #PF. I think you can add a comment for this.
>>>>>
>>>>> Yes.
>>>>>
>>>>>
>>>>>> Maybe KVM_BUG_ON() is more appropriate as it signifies a potential bug.
>>>>>
>>>>> Bug of what component? CPU. If so, I think KVM_EXIT_INTERNAL_ERROR +
>>>>> KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON is more appropriate.
>>>>>
>>>>
>>>> Is below what you have in mind?
>>>
>>> Yes. data[0] should be the raw value of exit reason if possible.
>>> data[2] should be exit_qual.  Hmm, I don't find document on data[] for
>>
>> Did you perhaps intend to write "data[1] should be exit_qual" or would you
>> like to see ndata = 3? I followed existing usages, for example [1] and [2],
>> that have ndata = 2 with "data[1] = vcpu->arch.last_vmentry_cpu".
>>
>>> KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON.
>>> Qemu doesn't assumt ndata = 2. Just report all data within ndata.
>>
>> I am not sure I interpreted your response correctly so I share one possible
>> snippet below as I interpret it. Could you please check where I misinterpreted
>> you? I could also make ndata = 3 to break the existing custom and add
>> "data[2] = vcpu->arch.last_vmentry_cpu" to match existing pattern. What do you
>> think?
>>
> Sorry, I wasn't clear enough. I meant
>   ndata = 3;
>   data[0] = exit_reason.full;
>   data[1] = vcpu->arch.last_vmentry_cpu;
>   data[2] = exit_qual;
> 
> Because I hesitate to change the meaning of data[1] from other usage, I
> appended exit_qual as data[2].

I understand it now. Thank you very much Isaku.

Reinette
Chao Gao May 6, 2024, 7:21 a.m. UTC | #9
On Wed, May 01, 2024 at 09:54:07AM -0700, Reinette Chatre wrote:
>diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
>index 499c6cd9633f..ba81e6f68c97 100644
>--- a/arch/x86/kvm/vmx/tdx.c
>+++ b/arch/x86/kvm/vmx/tdx.c
>@@ -1305,11 +1305,20 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
> 	} else {
> 		exit_qual = tdexit_exit_qual(vcpu);
> 		if (exit_qual & EPT_VIOLATION_ACC_INSTR) {
>+			union tdx_exit_reason exit_reason = to_tdx(vcpu)->exit_reason;
>+
>+			/*
>+			 * Instruction fetch in TD from shared memory
>+			 * causes a #PF.
>+			 */

It is better to hoist this comment above the if-statement.

		/*
		 * Instruction fetch in TD from shared memory never causes EPT
		 * violation. Warn if such an EPT violation occurs as the CPU
		 * probably is buggy.
		 */
 		if (exit_qual & EPT_VIOLATION_ACC_INSTR) {
		...
		}


but, I am wondering why KVM needs to perform this check. I prefer to drop this
check if KVM doesn't rely on it to handle EPT violation correctly.

> 			pr_warn("kvm: TDX instr fetch to shared GPA = 0x%lx @ RIP = 0x%lx\n",
> 				tdexit_gpa(vcpu), kvm_rip_read(vcpu));

how about using vcpu_unimpl()? it is a wrapper for printing such a message.

>-			vcpu->run->exit_reason = KVM_EXIT_EXCEPTION;
>-			vcpu->run->ex.exception = PF_VECTOR;
>-			vcpu->run->ex.error_code = exit_qual;
>+			vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
>+			vcpu->run->internal.suberror =
>+				KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
>+			vcpu->run->internal.ndata = 2;
>+			vcpu->run->internal.data[0] = exit_reason.full;
>+			vcpu->run->internal.data[1] = exit_qual;
> 			return 0;
> 		}
> 	}
>
>Reinette
>
>[1] https://github.com/kvm-x86/linux/blob/next/arch/x86/kvm/vmx/vmx.c#L6587
>[2] https://github.com/kvm-x86/linux/blob/next/arch/x86/kvm/svm/svm.c#L3436
Sean Christopherson May 6, 2024, 2:21 p.m. UTC | #10
On Mon, May 06, 2024, Chao Gao wrote:
> On Wed, May 01, 2024 at 09:54:07AM -0700, Reinette Chatre wrote:
> >diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> >index 499c6cd9633f..ba81e6f68c97 100644
> >--- a/arch/x86/kvm/vmx/tdx.c
> >+++ b/arch/x86/kvm/vmx/tdx.c
> >@@ -1305,11 +1305,20 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
> > 	} else {
> > 		exit_qual = tdexit_exit_qual(vcpu);
> > 		if (exit_qual & EPT_VIOLATION_ACC_INSTR) {
> >+			union tdx_exit_reason exit_reason = to_tdx(vcpu)->exit_reason;
> >+
> >+			/*
> >+			 * Instruction fetch in TD from shared memory
> >+			 * causes a #PF.
> >+			 */
> 
> It is better to hoist this comment above the if-statement.
> 
> 		/*
> 		 * Instruction fetch in TD from shared memory never causes EPT
> 		 * violation. Warn if such an EPT violation occurs as the CPU
> 		 * probably is buggy.
> 		 */
>  		if (exit_qual & EPT_VIOLATION_ACC_INSTR) {
> 		...
> 		}
> 
> 
> but, I am wondering why KVM needs to perform this check. I prefer to drop this
> check if KVM doesn't rely on it to handle EPT violation correctly.
> 
> > 			pr_warn("kvm: TDX instr fetch to shared GPA = 0x%lx @ RIP = 0x%lx\n",
> > 				tdexit_gpa(vcpu), kvm_rip_read(vcpu));
> 
> how about using vcpu_unimpl()? it is a wrapper for printing such a message.

This isn't an "unimplemented" scenario in KVM, this is a "hardware is broken"
scenario.  Just WARN_ON_ONCE() and move on.  Or I suppose since EPT misconfig
needs to do something as well:

		if (KVM_BUG_ON(exit_qual & EPT_VIOLATION_ACC_INSTR, vcpu->kvm))
			return -EIO;
Sean Christopherson May 6, 2024, 2:22 p.m. UTC | #11
On Mon, Feb 26, 2024, isaku.yamahata@intel.com wrote:
> +static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
> +{
> +	unsigned long exit_qual;
> +
> +	if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) {
> +		/*
> +		 * Always treat SEPT violations as write faults.  Ignore the
> +		 * EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations.
> +		 * TD private pages are always RWX in the SEPT tables,
> +		 * i.e. they're always mapped writable.  Just as importantly,
> +		 * treating SEPT violations as write faults is necessary to
> +		 * avoid COW allocations, which will cause TDAUGPAGE failures
> +		 * due to aliasing a single HPA to multiple GPAs.
> +		 */
> +#define TDX_SEPT_VIOLATION_EXIT_QUAL	EPT_VIOLATION_ACC_WRITE

This does not needd a #define.  It's use in literally one place, one line below.
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h
index 632af7a76d0a..027aa4175d2c 100644
--- a/arch/x86/kvm/vmx/common.h
+++ b/arch/x86/kvm/vmx/common.h
@@ -87,6 +87,9 @@  static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa,
 	error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ?
 	       PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
 
+	if (kvm_is_private_gpa(vcpu->kvm, gpa))
+		error_code |= PFERR_GUEST_ENC_MASK;
+
 	return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
 }
 
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 2f68e6f2b53a..0db80fa020d2 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -1285,6 +1285,51 @@  void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
 	__vmx_deliver_posted_interrupt(vcpu, &tdx->pi_desc, vector);
 }
 
+static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
+{
+	unsigned long exit_qual;
+
+	if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) {
+		/*
+		 * Always treat SEPT violations as write faults.  Ignore the
+		 * EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations.
+		 * TD private pages are always RWX in the SEPT tables,
+		 * i.e. they're always mapped writable.  Just as importantly,
+		 * treating SEPT violations as write faults is necessary to
+		 * avoid COW allocations, which will cause TDAUGPAGE failures
+		 * due to aliasing a single HPA to multiple GPAs.
+		 */
+#define TDX_SEPT_VIOLATION_EXIT_QUAL	EPT_VIOLATION_ACC_WRITE
+		exit_qual = TDX_SEPT_VIOLATION_EXIT_QUAL;
+	} else {
+		exit_qual = tdexit_exit_qual(vcpu);
+		if (exit_qual & EPT_VIOLATION_ACC_INSTR) {
+			pr_warn("kvm: TDX instr fetch to shared GPA = 0x%lx @ RIP = 0x%lx\n",
+				tdexit_gpa(vcpu), kvm_rip_read(vcpu));
+			vcpu->run->exit_reason = KVM_EXIT_EXCEPTION;
+			vcpu->run->ex.exception = PF_VECTOR;
+			vcpu->run->ex.error_code = exit_qual;
+			return 0;
+		}
+	}
+
+	trace_kvm_page_fault(vcpu, tdexit_gpa(vcpu), exit_qual);
+	return __vmx_handle_ept_violation(vcpu, tdexit_gpa(vcpu), exit_qual);
+}
+
+static int tdx_handle_ept_misconfig(struct kvm_vcpu *vcpu)
+{
+	WARN_ON_ONCE(1);
+
+	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+	vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+	vcpu->run->internal.ndata = 2;
+	vcpu->run->internal.data[0] = EXIT_REASON_EPT_MISCONFIG;
+	vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
+
+	return 0;
+}
+
 int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath)
 {
 	union tdx_exit_reason exit_reason = to_tdx(vcpu)->exit_reason;
@@ -1345,6 +1390,10 @@  int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath)
 	WARN_ON_ONCE(fastpath != EXIT_FASTPATH_NONE);
 
 	switch (exit_reason.basic) {
+	case EXIT_REASON_EPT_VIOLATION:
+		return tdx_handle_ept_violation(vcpu);
+	case EXIT_REASON_EPT_MISCONFIG:
+		return tdx_handle_ept_misconfig(vcpu);
 	case EXIT_REASON_OTHER_SMI:
 		/*
 		 * If reach here, it's not a Machine Check System Management