Message ID | f05b978021522d70a259472337e0b53658d47636.1708933498.git.isaku.yamahata@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v19,001/130] x86/virt/tdx: Rename _offset to _member for TD_SYSINFO_MAP() macro | expand |
On 2/26/2024 4:26 PM, isaku.yamahata@intel.com wrote: > From: Isaku Yamahata <isaku.yamahata@intel.com> > > On EPT violation, call a common function, __vmx_handle_ept_violation() to > trigger x86 MMU code. On EPT misconfiguration, exit to ring 3 with > KVM_EXIT_UNKNOWN. because EPT misconfiguration can't happen as MMIO is > trigged by TDG.VP.VMCALL. No point to set a misconfiguration value for the s/trigged/triggered > fast path. > > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> > > --- > v14 -> v15: > - use PFERR_GUEST_ENC_MASK to tell the fault is private > > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> duplicated SOB > --- > arch/x86/kvm/vmx/common.h | 3 +++ > arch/x86/kvm/vmx/tdx.c | 49 +++++++++++++++++++++++++++++++++++++++ > 2 files changed, 52 insertions(+) > > diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h > index 632af7a76d0a..027aa4175d2c 100644 > --- a/arch/x86/kvm/vmx/common.h > +++ b/arch/x86/kvm/vmx/common.h > @@ -87,6 +87,9 @@ static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa, > error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ? > PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; > > + if (kvm_is_private_gpa(vcpu->kvm, gpa)) > + error_code |= PFERR_GUEST_ENC_MASK; > + > return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); > } > > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c > index 2f68e6f2b53a..0db80fa020d2 100644 > --- a/arch/x86/kvm/vmx/tdx.c > +++ b/arch/x86/kvm/vmx/tdx.c > @@ -1285,6 +1285,51 @@ void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, > __vmx_deliver_posted_interrupt(vcpu, &tdx->pi_desc, vector); > } > > +static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu) > +{ > + unsigned long exit_qual; > + > + if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) { > + /* > + * Always treat SEPT violations as write faults. Ignore the > + * EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations. > + * TD private pages are always RWX in the SEPT tables, > + * i.e. they're always mapped writable. Just as importantly, > + * treating SEPT violations as write faults is necessary to > + * avoid COW allocations, which will cause TDAUGPAGE failures > + * due to aliasing a single HPA to multiple GPAs. > + */ > +#define TDX_SEPT_VIOLATION_EXIT_QUAL EPT_VIOLATION_ACC_WRITE > + exit_qual = TDX_SEPT_VIOLATION_EXIT_QUAL; > + } else { > + exit_qual = tdexit_exit_qual(vcpu); > + if (exit_qual & EPT_VIOLATION_ACC_INSTR) { > + pr_warn("kvm: TDX instr fetch to shared GPA = 0x%lx @ RIP = 0x%lx\n", > + tdexit_gpa(vcpu), kvm_rip_read(vcpu)); > + vcpu->run->exit_reason = KVM_EXIT_EXCEPTION; > + vcpu->run->ex.exception = PF_VECTOR; > + vcpu->run->ex.error_code = exit_qual; > + return 0; > + } > + } > + > + trace_kvm_page_fault(vcpu, tdexit_gpa(vcpu), exit_qual); > + return __vmx_handle_ept_violation(vcpu, tdexit_gpa(vcpu), exit_qual); > +} > + > +static int tdx_handle_ept_misconfig(struct kvm_vcpu *vcpu) > +{ > + WARN_ON_ONCE(1); > + > + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; > + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; > + vcpu->run->internal.ndata = 2; > + vcpu->run->internal.data[0] = EXIT_REASON_EPT_MISCONFIG; > + vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; > + > + return 0; > +} > + > int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath) > { > union tdx_exit_reason exit_reason = to_tdx(vcpu)->exit_reason; > @@ -1345,6 +1390,10 @@ int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath) > WARN_ON_ONCE(fastpath != EXIT_FASTPATH_NONE); > > switch (exit_reason.basic) { > + case EXIT_REASON_EPT_VIOLATION: > + return tdx_handle_ept_violation(vcpu); > + case EXIT_REASON_EPT_MISCONFIG: > + return tdx_handle_ept_misconfig(vcpu); > case EXIT_REASON_OTHER_SMI: > /* > * If reach here, it's not a Machine Check System Management
>+static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu) >+{ >+ unsigned long exit_qual; >+ >+ if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) { >+ /* >+ * Always treat SEPT violations as write faults. Ignore the >+ * EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations. >+ * TD private pages are always RWX in the SEPT tables, >+ * i.e. they're always mapped writable. Just as importantly, >+ * treating SEPT violations as write faults is necessary to >+ * avoid COW allocations, which will cause TDAUGPAGE failures >+ * due to aliasing a single HPA to multiple GPAs. >+ */ >+#define TDX_SEPT_VIOLATION_EXIT_QUAL EPT_VIOLATION_ACC_WRITE >+ exit_qual = TDX_SEPT_VIOLATION_EXIT_QUAL; >+ } else { >+ exit_qual = tdexit_exit_qual(vcpu); >+ if (exit_qual & EPT_VIOLATION_ACC_INSTR) { Unless the CPU has a bug, instruction fetch in TD from shared memory causes a #PF. I think you can add a comment for this. Maybe KVM_BUG_ON() is more appropriate as it signifies a potential bug. >+ pr_warn("kvm: TDX instr fetch to shared GPA = 0x%lx @ RIP = 0x%lx\n", >+ tdexit_gpa(vcpu), kvm_rip_read(vcpu)); >+ vcpu->run->exit_reason = KVM_EXIT_EXCEPTION; >+ vcpu->run->ex.exception = PF_VECTOR; >+ vcpu->run->ex.error_code = exit_qual; >+ return 0; >+ } >+ } >+ >+ trace_kvm_page_fault(vcpu, tdexit_gpa(vcpu), exit_qual); >+ return __vmx_handle_ept_violation(vcpu, tdexit_gpa(vcpu), exit_qual); >+} >+ >+static int tdx_handle_ept_misconfig(struct kvm_vcpu *vcpu) >+{ >+ WARN_ON_ONCE(1); >+ >+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; >+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; >+ vcpu->run->internal.ndata = 2; >+ vcpu->run->internal.data[0] = EXIT_REASON_EPT_MISCONFIG; >+ vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; >+ >+ return 0; >+} >+ > int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath) > { > union tdx_exit_reason exit_reason = to_tdx(vcpu)->exit_reason; >@@ -1345,6 +1390,10 @@ int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath) > WARN_ON_ONCE(fastpath != EXIT_FASTPATH_NONE); > > switch (exit_reason.basic) { >+ case EXIT_REASON_EPT_VIOLATION: >+ return tdx_handle_ept_violation(vcpu); >+ case EXIT_REASON_EPT_MISCONFIG: >+ return tdx_handle_ept_misconfig(vcpu); Handling EPT misconfiguration can be dropped because the "default" case handles all unexpected exits in the same way > case EXIT_REASON_OTHER_SMI: > /* > * If reach here, it's not a Machine Check System Management >-- >2.25.1 > >
On Mon, Apr 01, 2024 at 12:10:58PM +0800, Chao Gao <chao.gao@intel.com> wrote: > >+static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu) > >+{ > >+ unsigned long exit_qual; > >+ > >+ if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) { > >+ /* > >+ * Always treat SEPT violations as write faults. Ignore the > >+ * EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations. > >+ * TD private pages are always RWX in the SEPT tables, > >+ * i.e. they're always mapped writable. Just as importantly, > >+ * treating SEPT violations as write faults is necessary to > >+ * avoid COW allocations, which will cause TDAUGPAGE failures > >+ * due to aliasing a single HPA to multiple GPAs. > >+ */ > >+#define TDX_SEPT_VIOLATION_EXIT_QUAL EPT_VIOLATION_ACC_WRITE > >+ exit_qual = TDX_SEPT_VIOLATION_EXIT_QUAL; > >+ } else { > >+ exit_qual = tdexit_exit_qual(vcpu); > >+ if (exit_qual & EPT_VIOLATION_ACC_INSTR) { > > Unless the CPU has a bug, instruction fetch in TD from shared memory causes a > #PF. I think you can add a comment for this. Yes. > Maybe KVM_BUG_ON() is more appropriate as it signifies a potential bug. Bug of what component? CPU. If so, I think KVM_EXIT_INTERNAL_ERROR + KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON is more appropriate. > >+ pr_warn("kvm: TDX instr fetch to shared GPA = 0x%lx @ RIP = 0x%lx\n", > >+ tdexit_gpa(vcpu), kvm_rip_read(vcpu)); > >+ vcpu->run->exit_reason = KVM_EXIT_EXCEPTION; > >+ vcpu->run->ex.exception = PF_VECTOR; > >+ vcpu->run->ex.error_code = exit_qual; > >+ return 0; > >+ } > >+ } > >+ > >+ trace_kvm_page_fault(vcpu, tdexit_gpa(vcpu), exit_qual); > >+ return __vmx_handle_ept_violation(vcpu, tdexit_gpa(vcpu), exit_qual); > >+} > >+ > >+static int tdx_handle_ept_misconfig(struct kvm_vcpu *vcpu) > >+{ > >+ WARN_ON_ONCE(1); > >+ > >+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; > >+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; > >+ vcpu->run->internal.ndata = 2; > >+ vcpu->run->internal.data[0] = EXIT_REASON_EPT_MISCONFIG; > >+ vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; > >+ > >+ return 0; > >+} > >+ > > int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath) > > { > > union tdx_exit_reason exit_reason = to_tdx(vcpu)->exit_reason; > >@@ -1345,6 +1390,10 @@ int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath) > > WARN_ON_ONCE(fastpath != EXIT_FASTPATH_NONE); > > > > switch (exit_reason.basic) { > >+ case EXIT_REASON_EPT_VIOLATION: > >+ return tdx_handle_ept_violation(vcpu); > >+ case EXIT_REASON_EPT_MISCONFIG: > >+ return tdx_handle_ept_misconfig(vcpu); > > Handling EPT misconfiguration can be dropped because the "default" case handles > all unexpected exits in the same way Ah, right. Will update it.
Hi Isaku, On 4/3/2024 11:42 AM, Isaku Yamahata wrote: > On Mon, Apr 01, 2024 at 12:10:58PM +0800, > Chao Gao <chao.gao@intel.com> wrote: > >>> +static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu) >>> +{ >>> + unsigned long exit_qual; >>> + >>> + if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) { >>> + /* >>> + * Always treat SEPT violations as write faults. Ignore the >>> + * EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations. >>> + * TD private pages are always RWX in the SEPT tables, >>> + * i.e. they're always mapped writable. Just as importantly, >>> + * treating SEPT violations as write faults is necessary to >>> + * avoid COW allocations, which will cause TDAUGPAGE failures >>> + * due to aliasing a single HPA to multiple GPAs. >>> + */ >>> +#define TDX_SEPT_VIOLATION_EXIT_QUAL EPT_VIOLATION_ACC_WRITE >>> + exit_qual = TDX_SEPT_VIOLATION_EXIT_QUAL; >>> + } else { >>> + exit_qual = tdexit_exit_qual(vcpu); >>> + if (exit_qual & EPT_VIOLATION_ACC_INSTR) { >> >> Unless the CPU has a bug, instruction fetch in TD from shared memory causes a >> #PF. I think you can add a comment for this. > > Yes. > > >> Maybe KVM_BUG_ON() is more appropriate as it signifies a potential bug. > > Bug of what component? CPU. If so, I think KVM_EXIT_INTERNAL_ERROR + > KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON is more appropriate. > Is below what you have in mind? diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 499c6cd9633f..bd30b4c4d710 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -1305,11 +1305,18 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu) } else { exit_qual = tdexit_exit_qual(vcpu); if (exit_qual & EPT_VIOLATION_ACC_INSTR) { + /* + * Instruction fetch in TD from shared memory + * causes a #PF. + */ pr_warn("kvm: TDX instr fetch to shared GPA = 0x%lx @ RIP = 0x%lx\n", tdexit_gpa(vcpu), kvm_rip_read(vcpu)); - vcpu->run->exit_reason = KVM_EXIT_EXCEPTION; - vcpu->run->ex.exception = PF_VECTOR; - vcpu->run->ex.error_code = exit_qual; + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = + KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; + vcpu->run->internal.ndata = 2; + vcpu->run->internal.data[0] = EXIT_REASON_EPT_VIOLATION; + vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; return 0; } } Thank you Reinette
On Tue, Apr 30, 2024 at 01:47:07PM -0700, Reinette Chatre <reinette.chatre@intel.com> wrote: > Hi Isaku, > > On 4/3/2024 11:42 AM, Isaku Yamahata wrote: > > On Mon, Apr 01, 2024 at 12:10:58PM +0800, > > Chao Gao <chao.gao@intel.com> wrote: > > > >>> +static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu) > >>> +{ > >>> + unsigned long exit_qual; > >>> + > >>> + if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) { > >>> + /* > >>> + * Always treat SEPT violations as write faults. Ignore the > >>> + * EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations. > >>> + * TD private pages are always RWX in the SEPT tables, > >>> + * i.e. they're always mapped writable. Just as importantly, > >>> + * treating SEPT violations as write faults is necessary to > >>> + * avoid COW allocations, which will cause TDAUGPAGE failures > >>> + * due to aliasing a single HPA to multiple GPAs. > >>> + */ > >>> +#define TDX_SEPT_VIOLATION_EXIT_QUAL EPT_VIOLATION_ACC_WRITE > >>> + exit_qual = TDX_SEPT_VIOLATION_EXIT_QUAL; > >>> + } else { > >>> + exit_qual = tdexit_exit_qual(vcpu); > >>> + if (exit_qual & EPT_VIOLATION_ACC_INSTR) { > >> > >> Unless the CPU has a bug, instruction fetch in TD from shared memory causes a > >> #PF. I think you can add a comment for this. > > > > Yes. > > > > > >> Maybe KVM_BUG_ON() is more appropriate as it signifies a potential bug. > > > > Bug of what component? CPU. If so, I think KVM_EXIT_INTERNAL_ERROR + > > KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON is more appropriate. > > > > Is below what you have in mind? Yes. data[0] should be the raw value of exit reason if possible. data[2] should be exit_qual. Hmm, I don't find document on data[] for KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON. Qemu doesn't assumt ndata = 2. Just report all data within ndata. > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c > index 499c6cd9633f..bd30b4c4d710 100644 > --- a/arch/x86/kvm/vmx/tdx.c > +++ b/arch/x86/kvm/vmx/tdx.c > @@ -1305,11 +1305,18 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu) > } else { > exit_qual = tdexit_exit_qual(vcpu); > if (exit_qual & EPT_VIOLATION_ACC_INSTR) { > + /* > + * Instruction fetch in TD from shared memory > + * causes a #PF. > + */ > pr_warn("kvm: TDX instr fetch to shared GPA = 0x%lx @ RIP = 0x%lx\n", > tdexit_gpa(vcpu), kvm_rip_read(vcpu)); > - vcpu->run->exit_reason = KVM_EXIT_EXCEPTION; > - vcpu->run->ex.exception = PF_VECTOR; > - vcpu->run->ex.error_code = exit_qual; > + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; > + vcpu->run->internal.suberror = > + KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; > + vcpu->run->internal.ndata = 2; > + vcpu->run->internal.data[0] = EXIT_REASON_EPT_VIOLATION; > + vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; > return 0; > } > } > > Thank you > > Reinette > > >
Hi Isaku, On 5/1/2024 8:56 AM, Isaku Yamahata wrote: > On Tue, Apr 30, 2024 at 01:47:07PM -0700, > Reinette Chatre <reinette.chatre@intel.com> wrote: >> On 4/3/2024 11:42 AM, Isaku Yamahata wrote: >>> On Mon, Apr 01, 2024 at 12:10:58PM +0800, >>> Chao Gao <chao.gao@intel.com> wrote: >>> >>>>> +static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu) >>>>> +{ >>>>> + unsigned long exit_qual; >>>>> + >>>>> + if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) { >>>>> + /* >>>>> + * Always treat SEPT violations as write faults. Ignore the >>>>> + * EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations. >>>>> + * TD private pages are always RWX in the SEPT tables, >>>>> + * i.e. they're always mapped writable. Just as importantly, >>>>> + * treating SEPT violations as write faults is necessary to >>>>> + * avoid COW allocations, which will cause TDAUGPAGE failures >>>>> + * due to aliasing a single HPA to multiple GPAs. >>>>> + */ >>>>> +#define TDX_SEPT_VIOLATION_EXIT_QUAL EPT_VIOLATION_ACC_WRITE >>>>> + exit_qual = TDX_SEPT_VIOLATION_EXIT_QUAL; >>>>> + } else { >>>>> + exit_qual = tdexit_exit_qual(vcpu); >>>>> + if (exit_qual & EPT_VIOLATION_ACC_INSTR) { >>>> >>>> Unless the CPU has a bug, instruction fetch in TD from shared memory causes a >>>> #PF. I think you can add a comment for this. >>> >>> Yes. >>> >>> >>>> Maybe KVM_BUG_ON() is more appropriate as it signifies a potential bug. >>> >>> Bug of what component? CPU. If so, I think KVM_EXIT_INTERNAL_ERROR + >>> KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON is more appropriate. >>> >> >> Is below what you have in mind? > > Yes. data[0] should be the raw value of exit reason if possible. > data[2] should be exit_qual. Hmm, I don't find document on data[] for Did you perhaps intend to write "data[1] should be exit_qual" or would you like to see ndata = 3? I followed existing usages, for example [1] and [2], that have ndata = 2 with "data[1] = vcpu->arch.last_vmentry_cpu". > KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON. > Qemu doesn't assumt ndata = 2. Just report all data within ndata. I am not sure I interpreted your response correctly so I share one possible snippet below as I interpret it. Could you please check where I misinterpreted you? I could also make ndata = 3 to break the existing custom and add "data[2] = vcpu->arch.last_vmentry_cpu" to match existing pattern. What do you think? diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 499c6cd9633f..ba81e6f68c97 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -1305,11 +1305,20 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu) } else { exit_qual = tdexit_exit_qual(vcpu); if (exit_qual & EPT_VIOLATION_ACC_INSTR) { + union tdx_exit_reason exit_reason = to_tdx(vcpu)->exit_reason; + + /* + * Instruction fetch in TD from shared memory + * causes a #PF. + */ pr_warn("kvm: TDX instr fetch to shared GPA = 0x%lx @ RIP = 0x%lx\n", tdexit_gpa(vcpu), kvm_rip_read(vcpu)); - vcpu->run->exit_reason = KVM_EXIT_EXCEPTION; - vcpu->run->ex.exception = PF_VECTOR; - vcpu->run->ex.error_code = exit_qual; + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = + KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; + vcpu->run->internal.ndata = 2; + vcpu->run->internal.data[0] = exit_reason.full; + vcpu->run->internal.data[1] = exit_qual; return 0; } } Reinette [1] https://github.com/kvm-x86/linux/blob/next/arch/x86/kvm/vmx/vmx.c#L6587 [2] https://github.com/kvm-x86/linux/blob/next/arch/x86/kvm/svm/svm.c#L3436
On Wed, May 01, 2024 at 09:54:07AM -0700, Reinette Chatre <reinette.chatre@intel.com> wrote: > Hi Isaku, > > On 5/1/2024 8:56 AM, Isaku Yamahata wrote: > > On Tue, Apr 30, 2024 at 01:47:07PM -0700, > > Reinette Chatre <reinette.chatre@intel.com> wrote: > >> On 4/3/2024 11:42 AM, Isaku Yamahata wrote: > >>> On Mon, Apr 01, 2024 at 12:10:58PM +0800, > >>> Chao Gao <chao.gao@intel.com> wrote: > >>> > >>>>> +static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu) > >>>>> +{ > >>>>> + unsigned long exit_qual; > >>>>> + > >>>>> + if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) { > >>>>> + /* > >>>>> + * Always treat SEPT violations as write faults. Ignore the > >>>>> + * EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations. > >>>>> + * TD private pages are always RWX in the SEPT tables, > >>>>> + * i.e. they're always mapped writable. Just as importantly, > >>>>> + * treating SEPT violations as write faults is necessary to > >>>>> + * avoid COW allocations, which will cause TDAUGPAGE failures > >>>>> + * due to aliasing a single HPA to multiple GPAs. > >>>>> + */ > >>>>> +#define TDX_SEPT_VIOLATION_EXIT_QUAL EPT_VIOLATION_ACC_WRITE > >>>>> + exit_qual = TDX_SEPT_VIOLATION_EXIT_QUAL; > >>>>> + } else { > >>>>> + exit_qual = tdexit_exit_qual(vcpu); > >>>>> + if (exit_qual & EPT_VIOLATION_ACC_INSTR) { > >>>> > >>>> Unless the CPU has a bug, instruction fetch in TD from shared memory causes a > >>>> #PF. I think you can add a comment for this. > >>> > >>> Yes. > >>> > >>> > >>>> Maybe KVM_BUG_ON() is more appropriate as it signifies a potential bug. > >>> > >>> Bug of what component? CPU. If so, I think KVM_EXIT_INTERNAL_ERROR + > >>> KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON is more appropriate. > >>> > >> > >> Is below what you have in mind? > > > > Yes. data[0] should be the raw value of exit reason if possible. > > data[2] should be exit_qual. Hmm, I don't find document on data[] for > > Did you perhaps intend to write "data[1] should be exit_qual" or would you > like to see ndata = 3? I followed existing usages, for example [1] and [2], > that have ndata = 2 with "data[1] = vcpu->arch.last_vmentry_cpu". > > > KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON. > > Qemu doesn't assumt ndata = 2. Just report all data within ndata. > > I am not sure I interpreted your response correctly so I share one possible > snippet below as I interpret it. Could you please check where I misinterpreted > you? I could also make ndata = 3 to break the existing custom and add > "data[2] = vcpu->arch.last_vmentry_cpu" to match existing pattern. What do you > think? > Sorry, I wasn't clear enough. I meant ndata = 3; data[0] = exit_reason.full; data[1] = vcpu->arch.last_vmentry_cpu; data[2] = exit_qual; Because I hesitate to change the meaning of data[1] from other usage, I appended exit_qual as data[2].
Hi Isaku, On 5/1/2024 11:19 AM, Isaku Yamahata wrote: > On Wed, May 01, 2024 at 09:54:07AM -0700, > Reinette Chatre <reinette.chatre@intel.com> wrote: > >> Hi Isaku, >> >> On 5/1/2024 8:56 AM, Isaku Yamahata wrote: >>> On Tue, Apr 30, 2024 at 01:47:07PM -0700, >>> Reinette Chatre <reinette.chatre@intel.com> wrote: >>>> On 4/3/2024 11:42 AM, Isaku Yamahata wrote: >>>>> On Mon, Apr 01, 2024 at 12:10:58PM +0800, >>>>> Chao Gao <chao.gao@intel.com> wrote: >>>>> >>>>>>> +static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu) >>>>>>> +{ >>>>>>> + unsigned long exit_qual; >>>>>>> + >>>>>>> + if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) { >>>>>>> + /* >>>>>>> + * Always treat SEPT violations as write faults. Ignore the >>>>>>> + * EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations. >>>>>>> + * TD private pages are always RWX in the SEPT tables, >>>>>>> + * i.e. they're always mapped writable. Just as importantly, >>>>>>> + * treating SEPT violations as write faults is necessary to >>>>>>> + * avoid COW allocations, which will cause TDAUGPAGE failures >>>>>>> + * due to aliasing a single HPA to multiple GPAs. >>>>>>> + */ >>>>>>> +#define TDX_SEPT_VIOLATION_EXIT_QUAL EPT_VIOLATION_ACC_WRITE >>>>>>> + exit_qual = TDX_SEPT_VIOLATION_EXIT_QUAL; >>>>>>> + } else { >>>>>>> + exit_qual = tdexit_exit_qual(vcpu); >>>>>>> + if (exit_qual & EPT_VIOLATION_ACC_INSTR) { >>>>>> >>>>>> Unless the CPU has a bug, instruction fetch in TD from shared memory causes a >>>>>> #PF. I think you can add a comment for this. >>>>> >>>>> Yes. >>>>> >>>>> >>>>>> Maybe KVM_BUG_ON() is more appropriate as it signifies a potential bug. >>>>> >>>>> Bug of what component? CPU. If so, I think KVM_EXIT_INTERNAL_ERROR + >>>>> KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON is more appropriate. >>>>> >>>> >>>> Is below what you have in mind? >>> >>> Yes. data[0] should be the raw value of exit reason if possible. >>> data[2] should be exit_qual. Hmm, I don't find document on data[] for >> >> Did you perhaps intend to write "data[1] should be exit_qual" or would you >> like to see ndata = 3? I followed existing usages, for example [1] and [2], >> that have ndata = 2 with "data[1] = vcpu->arch.last_vmentry_cpu". >> >>> KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON. >>> Qemu doesn't assumt ndata = 2. Just report all data within ndata. >> >> I am not sure I interpreted your response correctly so I share one possible >> snippet below as I interpret it. Could you please check where I misinterpreted >> you? I could also make ndata = 3 to break the existing custom and add >> "data[2] = vcpu->arch.last_vmentry_cpu" to match existing pattern. What do you >> think? >> > Sorry, I wasn't clear enough. I meant > ndata = 3; > data[0] = exit_reason.full; > data[1] = vcpu->arch.last_vmentry_cpu; > data[2] = exit_qual; > > Because I hesitate to change the meaning of data[1] from other usage, I > appended exit_qual as data[2]. I understand it now. Thank you very much Isaku. Reinette
On Wed, May 01, 2024 at 09:54:07AM -0700, Reinette Chatre wrote: >diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c >index 499c6cd9633f..ba81e6f68c97 100644 >--- a/arch/x86/kvm/vmx/tdx.c >+++ b/arch/x86/kvm/vmx/tdx.c >@@ -1305,11 +1305,20 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu) > } else { > exit_qual = tdexit_exit_qual(vcpu); > if (exit_qual & EPT_VIOLATION_ACC_INSTR) { >+ union tdx_exit_reason exit_reason = to_tdx(vcpu)->exit_reason; >+ >+ /* >+ * Instruction fetch in TD from shared memory >+ * causes a #PF. >+ */ It is better to hoist this comment above the if-statement. /* * Instruction fetch in TD from shared memory never causes EPT * violation. Warn if such an EPT violation occurs as the CPU * probably is buggy. */ if (exit_qual & EPT_VIOLATION_ACC_INSTR) { ... } but, I am wondering why KVM needs to perform this check. I prefer to drop this check if KVM doesn't rely on it to handle EPT violation correctly. > pr_warn("kvm: TDX instr fetch to shared GPA = 0x%lx @ RIP = 0x%lx\n", > tdexit_gpa(vcpu), kvm_rip_read(vcpu)); how about using vcpu_unimpl()? it is a wrapper for printing such a message. >- vcpu->run->exit_reason = KVM_EXIT_EXCEPTION; >- vcpu->run->ex.exception = PF_VECTOR; >- vcpu->run->ex.error_code = exit_qual; >+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; >+ vcpu->run->internal.suberror = >+ KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; >+ vcpu->run->internal.ndata = 2; >+ vcpu->run->internal.data[0] = exit_reason.full; >+ vcpu->run->internal.data[1] = exit_qual; > return 0; > } > } > >Reinette > >[1] https://github.com/kvm-x86/linux/blob/next/arch/x86/kvm/vmx/vmx.c#L6587 >[2] https://github.com/kvm-x86/linux/blob/next/arch/x86/kvm/svm/svm.c#L3436
On Mon, May 06, 2024, Chao Gao wrote: > On Wed, May 01, 2024 at 09:54:07AM -0700, Reinette Chatre wrote: > >diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c > >index 499c6cd9633f..ba81e6f68c97 100644 > >--- a/arch/x86/kvm/vmx/tdx.c > >+++ b/arch/x86/kvm/vmx/tdx.c > >@@ -1305,11 +1305,20 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu) > > } else { > > exit_qual = tdexit_exit_qual(vcpu); > > if (exit_qual & EPT_VIOLATION_ACC_INSTR) { > >+ union tdx_exit_reason exit_reason = to_tdx(vcpu)->exit_reason; > >+ > >+ /* > >+ * Instruction fetch in TD from shared memory > >+ * causes a #PF. > >+ */ > > It is better to hoist this comment above the if-statement. > > /* > * Instruction fetch in TD from shared memory never causes EPT > * violation. Warn if such an EPT violation occurs as the CPU > * probably is buggy. > */ > if (exit_qual & EPT_VIOLATION_ACC_INSTR) { > ... > } > > > but, I am wondering why KVM needs to perform this check. I prefer to drop this > check if KVM doesn't rely on it to handle EPT violation correctly. > > > pr_warn("kvm: TDX instr fetch to shared GPA = 0x%lx @ RIP = 0x%lx\n", > > tdexit_gpa(vcpu), kvm_rip_read(vcpu)); > > how about using vcpu_unimpl()? it is a wrapper for printing such a message. This isn't an "unimplemented" scenario in KVM, this is a "hardware is broken" scenario. Just WARN_ON_ONCE() and move on. Or I suppose since EPT misconfig needs to do something as well: if (KVM_BUG_ON(exit_qual & EPT_VIOLATION_ACC_INSTR, vcpu->kvm)) return -EIO;
On Mon, Feb 26, 2024, isaku.yamahata@intel.com wrote: > +static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu) > +{ > + unsigned long exit_qual; > + > + if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) { > + /* > + * Always treat SEPT violations as write faults. Ignore the > + * EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations. > + * TD private pages are always RWX in the SEPT tables, > + * i.e. they're always mapped writable. Just as importantly, > + * treating SEPT violations as write faults is necessary to > + * avoid COW allocations, which will cause TDAUGPAGE failures > + * due to aliasing a single HPA to multiple GPAs. > + */ > +#define TDX_SEPT_VIOLATION_EXIT_QUAL EPT_VIOLATION_ACC_WRITE This does not needd a #define. It's use in literally one place, one line below.
diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h index 632af7a76d0a..027aa4175d2c 100644 --- a/arch/x86/kvm/vmx/common.h +++ b/arch/x86/kvm/vmx/common.h @@ -87,6 +87,9 @@ static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa, error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ? PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; + if (kvm_is_private_gpa(vcpu->kvm, gpa)) + error_code |= PFERR_GUEST_ENC_MASK; + return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); } diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 2f68e6f2b53a..0db80fa020d2 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -1285,6 +1285,51 @@ void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, __vmx_deliver_posted_interrupt(vcpu, &tdx->pi_desc, vector); } +static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu) +{ + unsigned long exit_qual; + + if (kvm_is_private_gpa(vcpu->kvm, tdexit_gpa(vcpu))) { + /* + * Always treat SEPT violations as write faults. Ignore the + * EXIT_QUALIFICATION reported by TDX-SEAM for SEPT violations. + * TD private pages are always RWX in the SEPT tables, + * i.e. they're always mapped writable. Just as importantly, + * treating SEPT violations as write faults is necessary to + * avoid COW allocations, which will cause TDAUGPAGE failures + * due to aliasing a single HPA to multiple GPAs. + */ +#define TDX_SEPT_VIOLATION_EXIT_QUAL EPT_VIOLATION_ACC_WRITE + exit_qual = TDX_SEPT_VIOLATION_EXIT_QUAL; + } else { + exit_qual = tdexit_exit_qual(vcpu); + if (exit_qual & EPT_VIOLATION_ACC_INSTR) { + pr_warn("kvm: TDX instr fetch to shared GPA = 0x%lx @ RIP = 0x%lx\n", + tdexit_gpa(vcpu), kvm_rip_read(vcpu)); + vcpu->run->exit_reason = KVM_EXIT_EXCEPTION; + vcpu->run->ex.exception = PF_VECTOR; + vcpu->run->ex.error_code = exit_qual; + return 0; + } + } + + trace_kvm_page_fault(vcpu, tdexit_gpa(vcpu), exit_qual); + return __vmx_handle_ept_violation(vcpu, tdexit_gpa(vcpu), exit_qual); +} + +static int tdx_handle_ept_misconfig(struct kvm_vcpu *vcpu) +{ + WARN_ON_ONCE(1); + + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; + vcpu->run->internal.ndata = 2; + vcpu->run->internal.data[0] = EXIT_REASON_EPT_MISCONFIG; + vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; + + return 0; +} + int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath) { union tdx_exit_reason exit_reason = to_tdx(vcpu)->exit_reason; @@ -1345,6 +1390,10 @@ int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath) WARN_ON_ONCE(fastpath != EXIT_FASTPATH_NONE); switch (exit_reason.basic) { + case EXIT_REASON_EPT_VIOLATION: + return tdx_handle_ept_violation(vcpu); + case EXIT_REASON_EPT_MISCONFIG: + return tdx_handle_ept_misconfig(vcpu); case EXIT_REASON_OTHER_SMI: /* * If reach here, it's not a Machine Check System Management