Message ID | 6c7774a44515d6787c9512cb05c3b305e9b5855c.1708933498.git.isaku.yamahata@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v19,001/130] x86/virt/tdx: Rename _offset to _member for TD_SYSINFO_MAP() macro | expand |
On 2/26/2024 4:26 PM, isaku.yamahata@intel.com wrote: > From: Isaku Yamahata <isaku.yamahata@intel.com> > > As TDX will use posted_interrupt.c, the use of struct vcpu_vmx is a > blocker. Because the members of Extra "of" > struct pi_desc pi_desc and struct > list_head pi_wakeup_list are only used in posted_interrupt.c, introduce > common structure, struct vcpu_pi, make vcpu_vmx and vcpu_tdx has same > layout in the top of structure. > > To minimize the diff size, avoid code conversion like, > vmx->pi_desc => vmx->common->pi_desc. Instead add compile time check > if the layout is expected. > > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> > --- > arch/x86/kvm/vmx/posted_intr.c | 41 ++++++++++++++++++++++++++-------- > arch/x86/kvm/vmx/posted_intr.h | 11 +++++++++ > arch/x86/kvm/vmx/tdx.c | 1 + > arch/x86/kvm/vmx/tdx.h | 8 +++++++ > arch/x86/kvm/vmx/vmx.h | 14 +++++++----- > 5 files changed, 60 insertions(+), 15 deletions(-) > > diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c > index af662312fd07..b66add9da0f3 100644 > --- a/arch/x86/kvm/vmx/posted_intr.c > +++ b/arch/x86/kvm/vmx/posted_intr.c > @@ -11,6 +11,7 @@ > #include "posted_intr.h" > #include "trace.h" > #include "vmx.h" > +#include "tdx.h" > > /* > * Maintain a per-CPU list of vCPUs that need to be awakened by wakeup_handler() > @@ -31,9 +32,29 @@ static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu); > */ > static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock); > > +/* > + * The layout of the head of struct vcpu_vmx and struct vcpu_tdx must match with > + * struct vcpu_pi. > + */ > +static_assert(offsetof(struct vcpu_pi, pi_desc) == > + offsetof(struct vcpu_vmx, pi_desc)); > +static_assert(offsetof(struct vcpu_pi, pi_wakeup_list) == > + offsetof(struct vcpu_vmx, pi_wakeup_list)); > +#ifdef CONFIG_INTEL_TDX_HOST > +static_assert(offsetof(struct vcpu_pi, pi_desc) == > + offsetof(struct vcpu_tdx, pi_desc)); > +static_assert(offsetof(struct vcpu_pi, pi_wakeup_list) == > + offsetof(struct vcpu_tdx, pi_wakeup_list)); > +#endif > + > +static inline struct vcpu_pi *vcpu_to_pi(struct kvm_vcpu *vcpu) > +{ > + return (struct vcpu_pi *)vcpu; > +} > + > static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) > { > - return &(to_vmx(vcpu)->pi_desc); > + return &vcpu_to_pi(vcpu)->pi_desc; > } > > static int pi_try_set_control(struct pi_desc *pi_desc, u64 *pold, u64 new) > @@ -52,8 +73,8 @@ static int pi_try_set_control(struct pi_desc *pi_desc, u64 *pold, u64 new) > > void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) > { > - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); > - struct vcpu_vmx *vmx = to_vmx(vcpu); > + struct vcpu_pi *vcpu_pi = vcpu_to_pi(vcpu); > + struct pi_desc *pi_desc = &vcpu_pi->pi_desc; > struct pi_desc old, new; > unsigned long flags; > unsigned int dest; > @@ -90,7 +111,7 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) > */ > if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) { > raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); > - list_del(&vmx->pi_wakeup_list); > + list_del(&vcpu_pi->pi_wakeup_list); > raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); > } > > @@ -145,15 +166,15 @@ static bool vmx_can_use_vtd_pi(struct kvm *kvm) > */ > static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu) > { > - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); > - struct vcpu_vmx *vmx = to_vmx(vcpu); > + struct vcpu_pi *vcpu_pi = vcpu_to_pi(vcpu); > + struct pi_desc *pi_desc = &vcpu_pi->pi_desc; > struct pi_desc old, new; > unsigned long flags; > > local_irq_save(flags); > > raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); > - list_add_tail(&vmx->pi_wakeup_list, > + list_add_tail(&vcpu_pi->pi_wakeup_list, > &per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu)); > raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); > > @@ -190,7 +211,8 @@ static bool vmx_needs_pi_wakeup(struct kvm_vcpu *vcpu) > * notification vector is switched to the one that calls > * back to the pi_wakeup_handler() function. > */ > - return vmx_can_use_ipiv(vcpu) || vmx_can_use_vtd_pi(vcpu->kvm); > + return (vmx_can_use_ipiv(vcpu) && !is_td_vcpu(vcpu)) || > + vmx_can_use_vtd_pi(vcpu->kvm); > } > > void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) > @@ -200,7 +222,8 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) > if (!vmx_needs_pi_wakeup(vcpu)) > return; > > - if (kvm_vcpu_is_blocking(vcpu) && !vmx_interrupt_blocked(vcpu)) > + if (kvm_vcpu_is_blocking(vcpu) && > + (is_td_vcpu(vcpu) || !vmx_interrupt_blocked(vcpu))) > pi_enable_wakeup_handler(vcpu); > > /* > diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h > index 26992076552e..2fe8222308b2 100644 > --- a/arch/x86/kvm/vmx/posted_intr.h > +++ b/arch/x86/kvm/vmx/posted_intr.h > @@ -94,6 +94,17 @@ static inline bool pi_test_sn(struct pi_desc *pi_desc) > (unsigned long *)&pi_desc->control); > } > > +struct vcpu_pi { > + struct kvm_vcpu vcpu; > + > + /* Posted interrupt descriptor */ > + struct pi_desc pi_desc; > + > + /* Used if this vCPU is waiting for PI notification wakeup. */ > + struct list_head pi_wakeup_list; > + /* Until here common layout betwwn vcpu_vmx and vcpu_tdx. */ s/betwwn/between Also, in pi_wakeup_handler(), it is still using struct vcpu_vmx, but it could be vcpu_tdx. Functionally it is OK, however, since you have added vcpu_pi, should it use vcpu_pi instead of vcpu_vmx in pi_wakeup_handler()? > +}; > + > void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu); > void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu); > void pi_wakeup_handler(void); > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c > index a5b52aa6d153..1da58c36217c 100644 > --- a/arch/x86/kvm/vmx/tdx.c > +++ b/arch/x86/kvm/vmx/tdx.c > @@ -584,6 +584,7 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu) > > fpstate_set_confidential(&vcpu->arch.guest_fpu); > vcpu->arch.apic->guest_apic_protected = true; > + INIT_LIST_HEAD(&tdx->pi_wakeup_list); > > vcpu->arch.efer = EFER_SCE | EFER_LME | EFER_LMA | EFER_NX; > > diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h > index 7f8c78f06508..eaffa7384725 100644 > --- a/arch/x86/kvm/vmx/tdx.h > +++ b/arch/x86/kvm/vmx/tdx.h > @@ -4,6 +4,7 @@ > > #ifdef CONFIG_INTEL_TDX_HOST > > +#include "posted_intr.h" > #include "pmu_intel.h" > #include "tdx_ops.h" > > @@ -69,6 +70,13 @@ union tdx_exit_reason { > struct vcpu_tdx { > struct kvm_vcpu vcpu; > > + /* Posted interrupt descriptor */ > + struct pi_desc pi_desc; > + > + /* Used if this vCPU is waiting for PI notification wakeup. */ > + struct list_head pi_wakeup_list; > + /* Until here same layout to struct vcpu_pi. */ > + > unsigned long tdvpr_pa; > unsigned long *tdvpx_pa; > bool td_vcpu_created; > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h > index 79ff54f08fee..634a9a250b95 100644 > --- a/arch/x86/kvm/vmx/vmx.h > +++ b/arch/x86/kvm/vmx/vmx.h > @@ -235,6 +235,14 @@ struct nested_vmx { > > struct vcpu_vmx { > struct kvm_vcpu vcpu; > + > + /* Posted interrupt descriptor */ > + struct pi_desc pi_desc; > + > + /* Used if this vCPU is waiting for PI notification wakeup. */ > + struct list_head pi_wakeup_list; > + /* Until here same layout to struct vcpu_pi. */ > + > u8 fail; > u8 x2apic_msr_bitmap_mode; > > @@ -304,12 +312,6 @@ struct vcpu_vmx { > > union vmx_exit_reason exit_reason; > > - /* Posted interrupt descriptor */ > - struct pi_desc pi_desc; > - > - /* Used if this vCPU is waiting for PI notification wakeup. */ > - struct list_head pi_wakeup_list; > - > /* Support for a guest hypervisor (nested VMX) */ > struct nested_vmx nested; >
On Tue, Feb 27, 2024 at 04:52:01PM +0800, Binbin Wu <binbin.wu@linux.intel.com> wrote: > > > On 2/26/2024 4:26 PM, isaku.yamahata@intel.com wrote: > > From: Isaku Yamahata <isaku.yamahata@intel.com> > > > > As TDX will use posted_interrupt.c, the use of struct vcpu_vmx is a > > blocker. Because the members of > > Extra "of" > > > struct pi_desc pi_desc and struct > > list_head pi_wakeup_list are only used in posted_interrupt.c, introduce > > common structure, struct vcpu_pi, make vcpu_vmx and vcpu_tdx has same > > layout in the top of structure. > > > > To minimize the diff size, avoid code conversion like, > > vmx->pi_desc => vmx->common->pi_desc. Instead add compile time check > > if the layout is expected. > > > > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> > > --- > > arch/x86/kvm/vmx/posted_intr.c | 41 ++++++++++++++++++++++++++-------- > > arch/x86/kvm/vmx/posted_intr.h | 11 +++++++++ > > arch/x86/kvm/vmx/tdx.c | 1 + > > arch/x86/kvm/vmx/tdx.h | 8 +++++++ > > arch/x86/kvm/vmx/vmx.h | 14 +++++++----- > > 5 files changed, 60 insertions(+), 15 deletions(-) > > > > diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c > > index af662312fd07..b66add9da0f3 100644 > > --- a/arch/x86/kvm/vmx/posted_intr.c > > +++ b/arch/x86/kvm/vmx/posted_intr.c > > @@ -11,6 +11,7 @@ > > #include "posted_intr.h" > > #include "trace.h" > > #include "vmx.h" > > +#include "tdx.h" > > /* > > * Maintain a per-CPU list of vCPUs that need to be awakened by wakeup_handler() > > @@ -31,9 +32,29 @@ static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu); > > */ > > static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock); > > +/* > > + * The layout of the head of struct vcpu_vmx and struct vcpu_tdx must match with > > + * struct vcpu_pi. > > + */ > > +static_assert(offsetof(struct vcpu_pi, pi_desc) == > > + offsetof(struct vcpu_vmx, pi_desc)); > > +static_assert(offsetof(struct vcpu_pi, pi_wakeup_list) == > > + offsetof(struct vcpu_vmx, pi_wakeup_list)); > > +#ifdef CONFIG_INTEL_TDX_HOST > > +static_assert(offsetof(struct vcpu_pi, pi_desc) == > > + offsetof(struct vcpu_tdx, pi_desc)); > > +static_assert(offsetof(struct vcpu_pi, pi_wakeup_list) == > > + offsetof(struct vcpu_tdx, pi_wakeup_list)); > > +#endif > > + > > +static inline struct vcpu_pi *vcpu_to_pi(struct kvm_vcpu *vcpu) > > +{ > > + return (struct vcpu_pi *)vcpu; > > +} > > + > > static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) > > { > > - return &(to_vmx(vcpu)->pi_desc); > > + return &vcpu_to_pi(vcpu)->pi_desc; > > } > > static int pi_try_set_control(struct pi_desc *pi_desc, u64 *pold, u64 new) > > @@ -52,8 +73,8 @@ static int pi_try_set_control(struct pi_desc *pi_desc, u64 *pold, u64 new) > > void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) > > { > > - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); > > - struct vcpu_vmx *vmx = to_vmx(vcpu); > > + struct vcpu_pi *vcpu_pi = vcpu_to_pi(vcpu); > > + struct pi_desc *pi_desc = &vcpu_pi->pi_desc; > > struct pi_desc old, new; > > unsigned long flags; > > unsigned int dest; > > @@ -90,7 +111,7 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) > > */ > > if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) { > > raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); > > - list_del(&vmx->pi_wakeup_list); > > + list_del(&vcpu_pi->pi_wakeup_list); > > raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); > > } > > @@ -145,15 +166,15 @@ static bool vmx_can_use_vtd_pi(struct kvm *kvm) > > */ > > static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu) > > { > > - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); > > - struct vcpu_vmx *vmx = to_vmx(vcpu); > > + struct vcpu_pi *vcpu_pi = vcpu_to_pi(vcpu); > > + struct pi_desc *pi_desc = &vcpu_pi->pi_desc; > > struct pi_desc old, new; > > unsigned long flags; > > local_irq_save(flags); > > raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); > > - list_add_tail(&vmx->pi_wakeup_list, > > + list_add_tail(&vcpu_pi->pi_wakeup_list, > > &per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu)); > > raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); > > @@ -190,7 +211,8 @@ static bool vmx_needs_pi_wakeup(struct kvm_vcpu *vcpu) > > * notification vector is switched to the one that calls > > * back to the pi_wakeup_handler() function. > > */ > > - return vmx_can_use_ipiv(vcpu) || vmx_can_use_vtd_pi(vcpu->kvm); > > + return (vmx_can_use_ipiv(vcpu) && !is_td_vcpu(vcpu)) || > > + vmx_can_use_vtd_pi(vcpu->kvm); > > } > > void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) > > @@ -200,7 +222,8 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) > > if (!vmx_needs_pi_wakeup(vcpu)) > > return; > > - if (kvm_vcpu_is_blocking(vcpu) && !vmx_interrupt_blocked(vcpu)) > > + if (kvm_vcpu_is_blocking(vcpu) && > > + (is_td_vcpu(vcpu) || !vmx_interrupt_blocked(vcpu))) > > pi_enable_wakeup_handler(vcpu); > > /* > > diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h > > index 26992076552e..2fe8222308b2 100644 > > --- a/arch/x86/kvm/vmx/posted_intr.h > > +++ b/arch/x86/kvm/vmx/posted_intr.h > > @@ -94,6 +94,17 @@ static inline bool pi_test_sn(struct pi_desc *pi_desc) > > (unsigned long *)&pi_desc->control); > > } > > +struct vcpu_pi { > > + struct kvm_vcpu vcpu; > > + > > + /* Posted interrupt descriptor */ > > + struct pi_desc pi_desc; > > + > > + /* Used if this vCPU is waiting for PI notification wakeup. */ > > + struct list_head pi_wakeup_list; > > + /* Until here common layout betwwn vcpu_vmx and vcpu_tdx. */ > > s/betwwn/between > > Also, in pi_wakeup_handler(), it is still using struct vcpu_vmx, but it > could > be vcpu_tdx. > Functionally it is OK, however, since you have added vcpu_pi, should it use > vcpu_pi instead of vcpu_vmx in pi_wakeup_handler()? Makes sense. diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index b66add9da0f3..5b71aef931dc 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -243,13 +243,13 @@ void pi_wakeup_handler(void) int cpu = smp_processor_id(); struct list_head *wakeup_list = &per_cpu(wakeup_vcpus_on_cpu, cpu); raw_spinlock_t *spinlock = &per_cpu(wakeup_vcpus_on_cpu_lock, cpu); - struct vcpu_vmx *vmx; + struct vcpu_pi *pi; raw_spin_lock(spinlock); - list_for_each_entry(vmx, wakeup_list, pi_wakeup_list) { + list_for_each_entry(pi, wakeup_list, pi_wakeup_list) { - if (pi_test_on(&vmx->pi_desc)) - kvm_vcpu_wake_up(&vmx->vcpu); + if (pi_test_on(&pi->pi_desc)) + kvm_vcpu_wake_up(&pi->vcpu); }
On Mon, Feb 26, 2024 at 12:26:33AM -0800, isaku.yamahata@intel.com wrote: >@@ -190,7 +211,8 @@ static bool vmx_needs_pi_wakeup(struct kvm_vcpu *vcpu) > * notification vector is switched to the one that calls > * back to the pi_wakeup_handler() function. > */ >- return vmx_can_use_ipiv(vcpu) || vmx_can_use_vtd_pi(vcpu->kvm); >+ return (vmx_can_use_ipiv(vcpu) && !is_td_vcpu(vcpu)) || >+ vmx_can_use_vtd_pi(vcpu->kvm); It is better to separate this functional change from the code refactoring. > } > > void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) >@@ -200,7 +222,8 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) > if (!vmx_needs_pi_wakeup(vcpu)) > return; > >- if (kvm_vcpu_is_blocking(vcpu) && !vmx_interrupt_blocked(vcpu)) >+ if (kvm_vcpu_is_blocking(vcpu) && >+ (is_td_vcpu(vcpu) || !vmx_interrupt_blocked(vcpu))) Ditto. This looks incorrect to me. here we assume interrupt is always enabled for TD. But on TDVMCALL(HLT), the guest tells KVM if hlt is called with interrupt disabled. KVM can just check that interrupt status passed from the guest. > pi_enable_wakeup_handler(vcpu); >
On Thu, Mar 28, 2024 at 04:12:36PM +0800, Chao Gao <chao.gao@intel.com> wrote: > On Mon, Feb 26, 2024 at 12:26:33AM -0800, isaku.yamahata@intel.com wrote: > >@@ -190,7 +211,8 @@ static bool vmx_needs_pi_wakeup(struct kvm_vcpu *vcpu) > > * notification vector is switched to the one that calls > > * back to the pi_wakeup_handler() function. > > */ > >- return vmx_can_use_ipiv(vcpu) || vmx_can_use_vtd_pi(vcpu->kvm); > >+ return (vmx_can_use_ipiv(vcpu) && !is_td_vcpu(vcpu)) || > >+ vmx_can_use_vtd_pi(vcpu->kvm); > > It is better to separate this functional change from the code refactoring. Agreed. Let's split this patch. > > } > > > > void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) > >@@ -200,7 +222,8 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) > > if (!vmx_needs_pi_wakeup(vcpu)) > > return; > > > >- if (kvm_vcpu_is_blocking(vcpu) && !vmx_interrupt_blocked(vcpu)) > >+ if (kvm_vcpu_is_blocking(vcpu) && > >+ (is_td_vcpu(vcpu) || !vmx_interrupt_blocked(vcpu))) > > Ditto. > > This looks incorrect to me. here we assume interrupt is always enabled for TD. > But on TDVMCALL(HLT), the guest tells KVM if hlt is called with interrupt > disabled. KVM can just check that interrupt status passed from the guest. That's true. We can complicate this function and HLT emulation. But I don't think it's worthwhile because HLT with interrupt masked is rare. Only for CPU online.
On 3/29/2024 5:10 AM, Isaku Yamahata wrote: > On Thu, Mar 28, 2024 at 04:12:36PM +0800, > Chao Gao <chao.gao@intel.com> wrote: > >>> } >>> >>> void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) >>> @@ -200,7 +222,8 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) >>> if (!vmx_needs_pi_wakeup(vcpu)) >>> return; >>> >>> - if (kvm_vcpu_is_blocking(vcpu) && !vmx_interrupt_blocked(vcpu)) >>> + if (kvm_vcpu_is_blocking(vcpu) && >>> + (is_td_vcpu(vcpu) || !vmx_interrupt_blocked(vcpu))) >> Ditto. >> >> This looks incorrect to me. here we assume interrupt is always enabled for TD. >> But on TDVMCALL(HLT), the guest tells KVM if hlt is called with interrupt >> disabled. KVM can just check that interrupt status passed from the guest. > That's true. We can complicate this function and HLT emulation. But I don't > think it's worthwhile because HLT with interrupt masked is rare. Only for > CPU online. Then, it's better to add some comments?
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index af662312fd07..b66add9da0f3 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -11,6 +11,7 @@ #include "posted_intr.h" #include "trace.h" #include "vmx.h" +#include "tdx.h" /* * Maintain a per-CPU list of vCPUs that need to be awakened by wakeup_handler() @@ -31,9 +32,29 @@ static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu); */ static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock); +/* + * The layout of the head of struct vcpu_vmx and struct vcpu_tdx must match with + * struct vcpu_pi. + */ +static_assert(offsetof(struct vcpu_pi, pi_desc) == + offsetof(struct vcpu_vmx, pi_desc)); +static_assert(offsetof(struct vcpu_pi, pi_wakeup_list) == + offsetof(struct vcpu_vmx, pi_wakeup_list)); +#ifdef CONFIG_INTEL_TDX_HOST +static_assert(offsetof(struct vcpu_pi, pi_desc) == + offsetof(struct vcpu_tdx, pi_desc)); +static_assert(offsetof(struct vcpu_pi, pi_wakeup_list) == + offsetof(struct vcpu_tdx, pi_wakeup_list)); +#endif + +static inline struct vcpu_pi *vcpu_to_pi(struct kvm_vcpu *vcpu) +{ + return (struct vcpu_pi *)vcpu; +} + static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) { - return &(to_vmx(vcpu)->pi_desc); + return &vcpu_to_pi(vcpu)->pi_desc; } static int pi_try_set_control(struct pi_desc *pi_desc, u64 *pold, u64 new) @@ -52,8 +73,8 @@ static int pi_try_set_control(struct pi_desc *pi_desc, u64 *pold, u64 new) void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) { - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vcpu_pi *vcpu_pi = vcpu_to_pi(vcpu); + struct pi_desc *pi_desc = &vcpu_pi->pi_desc; struct pi_desc old, new; unsigned long flags; unsigned int dest; @@ -90,7 +111,7 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) */ if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) { raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); - list_del(&vmx->pi_wakeup_list); + list_del(&vcpu_pi->pi_wakeup_list); raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); } @@ -145,15 +166,15 @@ static bool vmx_can_use_vtd_pi(struct kvm *kvm) */ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu) { - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vcpu_pi *vcpu_pi = vcpu_to_pi(vcpu); + struct pi_desc *pi_desc = &vcpu_pi->pi_desc; struct pi_desc old, new; unsigned long flags; local_irq_save(flags); raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); - list_add_tail(&vmx->pi_wakeup_list, + list_add_tail(&vcpu_pi->pi_wakeup_list, &per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu)); raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); @@ -190,7 +211,8 @@ static bool vmx_needs_pi_wakeup(struct kvm_vcpu *vcpu) * notification vector is switched to the one that calls * back to the pi_wakeup_handler() function. */ - return vmx_can_use_ipiv(vcpu) || vmx_can_use_vtd_pi(vcpu->kvm); + return (vmx_can_use_ipiv(vcpu) && !is_td_vcpu(vcpu)) || + vmx_can_use_vtd_pi(vcpu->kvm); } void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) @@ -200,7 +222,8 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) if (!vmx_needs_pi_wakeup(vcpu)) return; - if (kvm_vcpu_is_blocking(vcpu) && !vmx_interrupt_blocked(vcpu)) + if (kvm_vcpu_is_blocking(vcpu) && + (is_td_vcpu(vcpu) || !vmx_interrupt_blocked(vcpu))) pi_enable_wakeup_handler(vcpu); /* diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h index 26992076552e..2fe8222308b2 100644 --- a/arch/x86/kvm/vmx/posted_intr.h +++ b/arch/x86/kvm/vmx/posted_intr.h @@ -94,6 +94,17 @@ static inline bool pi_test_sn(struct pi_desc *pi_desc) (unsigned long *)&pi_desc->control); } +struct vcpu_pi { + struct kvm_vcpu vcpu; + + /* Posted interrupt descriptor */ + struct pi_desc pi_desc; + + /* Used if this vCPU is waiting for PI notification wakeup. */ + struct list_head pi_wakeup_list; + /* Until here common layout betwwn vcpu_vmx and vcpu_tdx. */ +}; + void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu); void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu); void pi_wakeup_handler(void); diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index a5b52aa6d153..1da58c36217c 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -584,6 +584,7 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu) fpstate_set_confidential(&vcpu->arch.guest_fpu); vcpu->arch.apic->guest_apic_protected = true; + INIT_LIST_HEAD(&tdx->pi_wakeup_list); vcpu->arch.efer = EFER_SCE | EFER_LME | EFER_LMA | EFER_NX; diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h index 7f8c78f06508..eaffa7384725 100644 --- a/arch/x86/kvm/vmx/tdx.h +++ b/arch/x86/kvm/vmx/tdx.h @@ -4,6 +4,7 @@ #ifdef CONFIG_INTEL_TDX_HOST +#include "posted_intr.h" #include "pmu_intel.h" #include "tdx_ops.h" @@ -69,6 +70,13 @@ union tdx_exit_reason { struct vcpu_tdx { struct kvm_vcpu vcpu; + /* Posted interrupt descriptor */ + struct pi_desc pi_desc; + + /* Used if this vCPU is waiting for PI notification wakeup. */ + struct list_head pi_wakeup_list; + /* Until here same layout to struct vcpu_pi. */ + unsigned long tdvpr_pa; unsigned long *tdvpx_pa; bool td_vcpu_created; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 79ff54f08fee..634a9a250b95 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -235,6 +235,14 @@ struct nested_vmx { struct vcpu_vmx { struct kvm_vcpu vcpu; + + /* Posted interrupt descriptor */ + struct pi_desc pi_desc; + + /* Used if this vCPU is waiting for PI notification wakeup. */ + struct list_head pi_wakeup_list; + /* Until here same layout to struct vcpu_pi. */ + u8 fail; u8 x2apic_msr_bitmap_mode; @@ -304,12 +312,6 @@ struct vcpu_vmx { union vmx_exit_reason exit_reason; - /* Posted interrupt descriptor */ - struct pi_desc pi_desc; - - /* Used if this vCPU is waiting for PI notification wakeup. */ - struct list_head pi_wakeup_list; - /* Support for a guest hypervisor (nested VMX) */ struct nested_vmx nested;