Message ID | c083430632ba9e80abd09bccd5609fb3cd9d9c63.1708933498.git.isaku.yamahata@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v19,001/130] x86/virt/tdx: Rename _offset to _member for TD_SYSINFO_MAP() macro | expand |
On Mon, Feb 26, 2024 at 12:26:50AM -0800, isaku.yamahata@intel.com wrote: >From: Isaku Yamahata <isaku.yamahata@intel.com> > >Wire up TDX PV HLT hypercall to the KVM backend function. > >Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> >--- >v19: >- move tdvps_state_non_arch_check() to this patch > >v18: >- drop buggy_hlt_workaround and use TDH.VP.RD(TD_VCPU_STATE_DETAILS) > >Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> >--- > arch/x86/kvm/vmx/tdx.c | 26 +++++++++++++++++++++++++- > arch/x86/kvm/vmx/tdx.h | 4 ++++ > 2 files changed, 29 insertions(+), 1 deletion(-) > >diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c >index eb68d6c148b6..a2caf2ae838c 100644 >--- a/arch/x86/kvm/vmx/tdx.c >+++ b/arch/x86/kvm/vmx/tdx.c >@@ -688,7 +688,18 @@ void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > > bool tdx_protected_apic_has_interrupt(struct kvm_vcpu *vcpu) > { >- return pi_has_pending_interrupt(vcpu); >+ bool ret = pi_has_pending_interrupt(vcpu); Maybe bool has_pending_interrupt = pi_has_pending_interrupt(vcpu); "ret" isn't a good name. or even call pi_has_pending_interrupt() directly in the if statement below. >+ union tdx_vcpu_state_details details; >+ struct vcpu_tdx *tdx = to_tdx(vcpu); >+ >+ if (ret || vcpu->arch.mp_state != KVM_MP_STATE_HALTED) >+ return true; Question: why mp_state matters here? >+ >+ if (tdx->interrupt_disabled_hlt) >+ return false; Shouldn't we move this into vt_interrupt_allowed()? VMX calls the function to check if interrupt is disabled. KVM can clear tdx->interrupt_disabled_hlt on every TD-enter and set it only on TD-exit due to the guest making a TDVMCALL(hlt) w/ interrupt disabled. >+ >+ details.full = td_state_non_arch_read64(tdx, TD_VCPU_STATE_DETAILS_NON_ARCH); >+ return !!details.vmxip; > } > > void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) >@@ -1130,6 +1141,17 @@ static int tdx_emulate_cpuid(struct kvm_vcpu *vcpu) > return 1; > } > >+static int tdx_emulate_hlt(struct kvm_vcpu *vcpu) >+{ >+ struct vcpu_tdx *tdx = to_tdx(vcpu); >+ >+ /* See tdx_protected_apic_has_interrupt() to avoid heavy seamcall */ >+ tdx->interrupt_disabled_hlt = tdvmcall_a0_read(vcpu); >+ >+ tdvmcall_set_return_code(vcpu, TDVMCALL_SUCCESS); >+ return kvm_emulate_halt_noskip(vcpu); >+} >+ > static int handle_tdvmcall(struct kvm_vcpu *vcpu) > { > if (tdvmcall_exit_type(vcpu)) >@@ -1138,6 +1160,8 @@ static int handle_tdvmcall(struct kvm_vcpu *vcpu) > switch (tdvmcall_leaf(vcpu)) { > case EXIT_REASON_CPUID: > return tdx_emulate_cpuid(vcpu); >+ case EXIT_REASON_HLT: >+ return tdx_emulate_hlt(vcpu); > default: > break; > } >diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h >index 4399d474764f..11c74c34555f 100644 >--- a/arch/x86/kvm/vmx/tdx.h >+++ b/arch/x86/kvm/vmx/tdx.h >@@ -104,6 +104,8 @@ struct vcpu_tdx { > bool host_state_need_restore; > u64 msr_host_kernel_gs_base; > >+ bool interrupt_disabled_hlt; >+ > /* > * Dummy to make pmu_intel not corrupt memory. > * TODO: Support PMU for TDX. Future work. >@@ -166,6 +168,7 @@ static __always_inline void tdvps_vmcs_check(u32 field, u8 bits) > } > > static __always_inline void tdvps_management_check(u64 field, u8 bits) {} >+static __always_inline void tdvps_state_non_arch_check(u64 field, u8 bits) {} > > #define TDX_BUILD_TDVPS_ACCESSORS(bits, uclass, lclass) \ > static __always_inline u##bits td_##lclass##_read##bits(struct vcpu_tdx *tdx, \ >@@ -226,6 +229,7 @@ TDX_BUILD_TDVPS_ACCESSORS(32, VMCS, vmcs); > TDX_BUILD_TDVPS_ACCESSORS(64, VMCS, vmcs); > > TDX_BUILD_TDVPS_ACCESSORS(8, MANAGEMENT, management); >+TDX_BUILD_TDVPS_ACCESSORS(64, STATE_NON_ARCH, state_non_arch); > > static __always_inline u64 td_tdcs_exec_read64(struct kvm_tdx *kvm_tdx, u32 field) > { >-- >2.25.1 > >
On Wed, Apr 03, 2024, Chao Gao wrote: > On Mon, Feb 26, 2024 at 12:26:50AM -0800, isaku.yamahata@intel.com wrote: > >From: Isaku Yamahata <isaku.yamahata@intel.com> > > > >Wire up TDX PV HLT hypercall to the KVM backend function. > > > >Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> > >--- > >v19: > >- move tdvps_state_non_arch_check() to this patch > > > >v18: > >- drop buggy_hlt_workaround and use TDH.VP.RD(TD_VCPU_STATE_DETAILS) > > > >Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> > >--- > > arch/x86/kvm/vmx/tdx.c | 26 +++++++++++++++++++++++++- > > arch/x86/kvm/vmx/tdx.h | 4 ++++ > > 2 files changed, 29 insertions(+), 1 deletion(-) > > > >diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c > >index eb68d6c148b6..a2caf2ae838c 100644 > >--- a/arch/x86/kvm/vmx/tdx.c > >+++ b/arch/x86/kvm/vmx/tdx.c > >@@ -688,7 +688,18 @@ void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > > > > bool tdx_protected_apic_has_interrupt(struct kvm_vcpu *vcpu) > > { > >- return pi_has_pending_interrupt(vcpu); > >+ bool ret = pi_has_pending_interrupt(vcpu); > > Maybe > bool has_pending_interrupt = pi_has_pending_interrupt(vcpu); > > "ret" isn't a good name. or even call pi_has_pending_interrupt() directly in > the if statement below. Ya, or split the if-statement into multiple chucks, with comments explaining what each non-intuitive chunk is doing. The pi_has_pending_interrupt(vcpu) check is self-explanatory, the halted thing, not so much. They are terminal statements, there's zero reason to pre-check the PID. E.g. /* * Comment explaining why KVM needs to assume a non-halted vCPU has a * pending interrupt (KVM can't see RFLAGS.IF). */ if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED) return true; if (pi_has_pending_interrupt(vcpu)) return; > >+ union tdx_vcpu_state_details details; > >+ struct vcpu_tdx *tdx = to_tdx(vcpu); > >+ > >+ if (ret || vcpu->arch.mp_state != KVM_MP_STATE_HALTED) > >+ return true; > > Question: why mp_state matters here? > >+ > >+ if (tdx->interrupt_disabled_hlt) > >+ return false; > > Shouldn't we move this into vt_interrupt_allowed()? VMX calls the function to > check if interrupt is disabled. KVM can clear tdx->interrupt_disabled_hlt on > every TD-enter and set it only on TD-exit due to the guest making a > TDVMCALL(hlt) w/ interrupt disabled. I'm pretty sure interrupt_disabled_hlt shouldn't exist, should "a0", a.k.a. r12, be preserved at this point? /* Another comment explaning magic code. */ if (to_vmx(vcpu)->exit_reason.basic == EXIT_REASON_HLT && tdvmcall_a0_read(vcpu)) return false; Actually, can't this all be: if (to_vmx(vcpu)->exit_reason.basic != EXIT_REASON_HLT) return true; if (!tdvmcall_a0_read(vcpu)) return false; if (pi_has_pending_interrupt(vcpu)) return true; return tdx_has_pending_virtual_interrupt(vcpu);
On Wed, Apr 03, 2024 at 07:49:28AM -0700, Sean Christopherson <seanjc@google.com> wrote: > On Wed, Apr 03, 2024, Chao Gao wrote: > > On Mon, Feb 26, 2024 at 12:26:50AM -0800, isaku.yamahata@intel.com wrote: > > >From: Isaku Yamahata <isaku.yamahata@intel.com> > > > > > >Wire up TDX PV HLT hypercall to the KVM backend function. > > > > > >Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> > > >--- > > >v19: > > >- move tdvps_state_non_arch_check() to this patch > > > > > >v18: > > >- drop buggy_hlt_workaround and use TDH.VP.RD(TD_VCPU_STATE_DETAILS) > > > > > >Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> > > >--- > > > arch/x86/kvm/vmx/tdx.c | 26 +++++++++++++++++++++++++- > > > arch/x86/kvm/vmx/tdx.h | 4 ++++ > > > 2 files changed, 29 insertions(+), 1 deletion(-) > > > > > >diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c > > >index eb68d6c148b6..a2caf2ae838c 100644 > > >--- a/arch/x86/kvm/vmx/tdx.c > > >+++ b/arch/x86/kvm/vmx/tdx.c > > >@@ -688,7 +688,18 @@ void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > > > > > > bool tdx_protected_apic_has_interrupt(struct kvm_vcpu *vcpu) > > > { > > >- return pi_has_pending_interrupt(vcpu); > > >+ bool ret = pi_has_pending_interrupt(vcpu); > > > > Maybe > > bool has_pending_interrupt = pi_has_pending_interrupt(vcpu); > > > > "ret" isn't a good name. or even call pi_has_pending_interrupt() directly in > > the if statement below. > > Ya, or split the if-statement into multiple chucks, with comments explaining > what each non-intuitive chunk is doing. The pi_has_pending_interrupt(vcpu) check > is self-explanatory, the halted thing, not so much. They are terminal statements, > there's zero reason to pre-check the PID. > > E.g. > > /* > * Comment explaining why KVM needs to assume a non-halted vCPU has a > * pending interrupt (KVM can't see RFLAGS.IF). > */ > if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED) > return true; > > if (pi_has_pending_interrupt(vcpu)) > return; > > > >+ union tdx_vcpu_state_details details; > > >+ struct vcpu_tdx *tdx = to_tdx(vcpu); > > >+ > > >+ if (ret || vcpu->arch.mp_state != KVM_MP_STATE_HALTED) > > >+ return true; > > > > Question: why mp_state matters here? > > >+ > > >+ if (tdx->interrupt_disabled_hlt) > > >+ return false; > > > > Shouldn't we move this into vt_interrupt_allowed()? VMX calls the function to > > check if interrupt is disabled. Chao, are you suggesting to implement tdx_interrupt_allowed() as "EXIT_REASON_HLT && a0" instead of "return true"? I don't think it makes sense because it's rare case and we can't avoid spurious wakeup for TDX case. > >KVM can clear tdx->interrupt_disabled_hlt on > > every TD-enter and set it only on TD-exit due to the guest making a > > TDVMCALL(hlt) w/ interrupt disabled. > > I'm pretty sure interrupt_disabled_hlt shouldn't exist, should "a0", a.k.a. r12, > be preserved at this point? > > /* Another comment explaning magic code. */ > if (to_vmx(vcpu)->exit_reason.basic == EXIT_REASON_HLT && > tdvmcall_a0_read(vcpu)) > return false; > > > Actually, can't this all be: > > if (to_vmx(vcpu)->exit_reason.basic != EXIT_REASON_HLT) > return true; > > if (!tdvmcall_a0_read(vcpu)) > return false; > > if (pi_has_pending_interrupt(vcpu)) > return true; > > return tdx_has_pending_virtual_interrupt(vcpu); > Thanks for the suggestion. This is much cleaner. Will update the function.
>> > >+ union tdx_vcpu_state_details details; >> > >+ struct vcpu_tdx *tdx = to_tdx(vcpu); >> > >+ >> > >+ if (ret || vcpu->arch.mp_state != KVM_MP_STATE_HALTED) >> > >+ return true; >> > >> > Question: why mp_state matters here? >> > >+ >> > >+ if (tdx->interrupt_disabled_hlt) >> > >+ return false; >> > >> > Shouldn't we move this into vt_interrupt_allowed()? VMX calls the function to >> > check if interrupt is disabled. > >Chao, are you suggesting to implement tdx_interrupt_allowed() as >"EXIT_REASON_HLT && a0" instead of "return true"? >I don't think it makes sense because it's rare case and we can't avoid spurious >wakeup for TDX case. Yes. KVM differeniates "interrupt allowed" from "has interrupt", e.g., static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) ... if (kvm_arch_interrupt_allowed(vcpu) && (kvm_cpu_has_interrupt(vcpu) || kvm_guest_apic_has_interrupt(vcpu))) return true; I think tdx_protected_apic_has_interrupt() mixes them together, which isn't good. Probably it is a minor thing; if no one else thinks it is better to move the "interrupt allowed" check to tdx_interrupt_allowed(), I am also fine with not doing that.
On Sun, Apr 07, 2024 at 11:50:04AM +0800, Chao Gao <chao.gao@intel.com> wrote: > >> > >+ union tdx_vcpu_state_details details; > >> > >+ struct vcpu_tdx *tdx = to_tdx(vcpu); > >> > >+ > >> > >+ if (ret || vcpu->arch.mp_state != KVM_MP_STATE_HALTED) > >> > >+ return true; > >> > > >> > Question: why mp_state matters here? > >> > >+ > >> > >+ if (tdx->interrupt_disabled_hlt) > >> > >+ return false; > >> > > >> > Shouldn't we move this into vt_interrupt_allowed()? VMX calls the function to > >> > check if interrupt is disabled. > > > >Chao, are you suggesting to implement tdx_interrupt_allowed() as > >"EXIT_REASON_HLT && a0" instead of "return true"? > >I don't think it makes sense because it's rare case and we can't avoid spurious > >wakeup for TDX case. > > Yes. KVM differeniates "interrupt allowed" from "has interrupt", e.g., > > static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) > ... > > if (kvm_arch_interrupt_allowed(vcpu) && > (kvm_cpu_has_interrupt(vcpu) || > kvm_guest_apic_has_interrupt(vcpu))) > return true; > > > I think tdx_protected_apic_has_interrupt() mixes them together, which isn't > good. Your point is code clarity. Ok, we can code in that way. I don't expect any performance difference.
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index eb68d6c148b6..a2caf2ae838c 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -688,7 +688,18 @@ void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) bool tdx_protected_apic_has_interrupt(struct kvm_vcpu *vcpu) { - return pi_has_pending_interrupt(vcpu); + bool ret = pi_has_pending_interrupt(vcpu); + union tdx_vcpu_state_details details; + struct vcpu_tdx *tdx = to_tdx(vcpu); + + if (ret || vcpu->arch.mp_state != KVM_MP_STATE_HALTED) + return true; + + if (tdx->interrupt_disabled_hlt) + return false; + + details.full = td_state_non_arch_read64(tdx, TD_VCPU_STATE_DETAILS_NON_ARCH); + return !!details.vmxip; } void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) @@ -1130,6 +1141,17 @@ static int tdx_emulate_cpuid(struct kvm_vcpu *vcpu) return 1; } +static int tdx_emulate_hlt(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + + /* See tdx_protected_apic_has_interrupt() to avoid heavy seamcall */ + tdx->interrupt_disabled_hlt = tdvmcall_a0_read(vcpu); + + tdvmcall_set_return_code(vcpu, TDVMCALL_SUCCESS); + return kvm_emulate_halt_noskip(vcpu); +} + static int handle_tdvmcall(struct kvm_vcpu *vcpu) { if (tdvmcall_exit_type(vcpu)) @@ -1138,6 +1160,8 @@ static int handle_tdvmcall(struct kvm_vcpu *vcpu) switch (tdvmcall_leaf(vcpu)) { case EXIT_REASON_CPUID: return tdx_emulate_cpuid(vcpu); + case EXIT_REASON_HLT: + return tdx_emulate_hlt(vcpu); default: break; } diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h index 4399d474764f..11c74c34555f 100644 --- a/arch/x86/kvm/vmx/tdx.h +++ b/arch/x86/kvm/vmx/tdx.h @@ -104,6 +104,8 @@ struct vcpu_tdx { bool host_state_need_restore; u64 msr_host_kernel_gs_base; + bool interrupt_disabled_hlt; + /* * Dummy to make pmu_intel not corrupt memory. * TODO: Support PMU for TDX. Future work. @@ -166,6 +168,7 @@ static __always_inline void tdvps_vmcs_check(u32 field, u8 bits) } static __always_inline void tdvps_management_check(u64 field, u8 bits) {} +static __always_inline void tdvps_state_non_arch_check(u64 field, u8 bits) {} #define TDX_BUILD_TDVPS_ACCESSORS(bits, uclass, lclass) \ static __always_inline u##bits td_##lclass##_read##bits(struct vcpu_tdx *tdx, \ @@ -226,6 +229,7 @@ TDX_BUILD_TDVPS_ACCESSORS(32, VMCS, vmcs); TDX_BUILD_TDVPS_ACCESSORS(64, VMCS, vmcs); TDX_BUILD_TDVPS_ACCESSORS(8, MANAGEMENT, management); +TDX_BUILD_TDVPS_ACCESSORS(64, STATE_NON_ARCH, state_non_arch); static __always_inline u64 td_tdcs_exec_read64(struct kvm_tdx *kvm_tdx, u32 field) {