diff mbox series

[v19,091/130] KVM: TDX: remove use of struct vcpu_vmx from posted_interrupt.c

Message ID 6c7774a44515d6787c9512cb05c3b305e9b5855c.1708933498.git.isaku.yamahata@intel.com (mailing list archive)
State New, archived
Headers show
Series [v19,001/130] x86/virt/tdx: Rename _offset to _member for TD_SYSINFO_MAP() macro | expand

Commit Message

Isaku Yamahata Feb. 26, 2024, 8:26 a.m. UTC
From: Isaku Yamahata <isaku.yamahata@intel.com>

As TDX will use posted_interrupt.c, the use of struct vcpu_vmx is a
blocker.  Because the members of struct pi_desc pi_desc and struct
list_head pi_wakeup_list are only used in posted_interrupt.c, introduce
common structure, struct vcpu_pi, make vcpu_vmx and vcpu_tdx has same
layout in the top of structure.

To minimize the diff size, avoid code conversion like,
vmx->pi_desc => vmx->common->pi_desc.  Instead add compile time check
if the layout is expected.

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
 arch/x86/kvm/vmx/posted_intr.c | 41 ++++++++++++++++++++++++++--------
 arch/x86/kvm/vmx/posted_intr.h | 11 +++++++++
 arch/x86/kvm/vmx/tdx.c         |  1 +
 arch/x86/kvm/vmx/tdx.h         |  8 +++++++
 arch/x86/kvm/vmx/vmx.h         | 14 +++++++-----
 5 files changed, 60 insertions(+), 15 deletions(-)

Comments

Binbin Wu Feb. 27, 2024, 8:52 a.m. UTC | #1
On 2/26/2024 4:26 PM, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
>
> As TDX will use posted_interrupt.c, the use of struct vcpu_vmx is a
> blocker.  Because the members of

Extra "of"

> struct pi_desc pi_desc and struct
> list_head pi_wakeup_list are only used in posted_interrupt.c, introduce
> common structure, struct vcpu_pi, make vcpu_vmx and vcpu_tdx has same
> layout in the top of structure.
>
> To minimize the diff size, avoid code conversion like,
> vmx->pi_desc => vmx->common->pi_desc.  Instead add compile time check
> if the layout is expected.
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
>   arch/x86/kvm/vmx/posted_intr.c | 41 ++++++++++++++++++++++++++--------
>   arch/x86/kvm/vmx/posted_intr.h | 11 +++++++++
>   arch/x86/kvm/vmx/tdx.c         |  1 +
>   arch/x86/kvm/vmx/tdx.h         |  8 +++++++
>   arch/x86/kvm/vmx/vmx.h         | 14 +++++++-----
>   5 files changed, 60 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
> index af662312fd07..b66add9da0f3 100644
> --- a/arch/x86/kvm/vmx/posted_intr.c
> +++ b/arch/x86/kvm/vmx/posted_intr.c
> @@ -11,6 +11,7 @@
>   #include "posted_intr.h"
>   #include "trace.h"
>   #include "vmx.h"
> +#include "tdx.h"
>   
>   /*
>    * Maintain a per-CPU list of vCPUs that need to be awakened by wakeup_handler()
> @@ -31,9 +32,29 @@ static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu);
>    */
>   static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock);
>   
> +/*
> + * The layout of the head of struct vcpu_vmx and struct vcpu_tdx must match with
> + * struct vcpu_pi.
> + */
> +static_assert(offsetof(struct vcpu_pi, pi_desc) ==
> +	      offsetof(struct vcpu_vmx, pi_desc));
> +static_assert(offsetof(struct vcpu_pi, pi_wakeup_list) ==
> +	      offsetof(struct vcpu_vmx, pi_wakeup_list));
> +#ifdef CONFIG_INTEL_TDX_HOST
> +static_assert(offsetof(struct vcpu_pi, pi_desc) ==
> +	      offsetof(struct vcpu_tdx, pi_desc));
> +static_assert(offsetof(struct vcpu_pi, pi_wakeup_list) ==
> +	      offsetof(struct vcpu_tdx, pi_wakeup_list));
> +#endif
> +
> +static inline struct vcpu_pi *vcpu_to_pi(struct kvm_vcpu *vcpu)
> +{
> +	return (struct vcpu_pi *)vcpu;
> +}
> +
>   static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
>   {
> -	return &(to_vmx(vcpu)->pi_desc);
> +	return &vcpu_to_pi(vcpu)->pi_desc;
>   }
>   
>   static int pi_try_set_control(struct pi_desc *pi_desc, u64 *pold, u64 new)
> @@ -52,8 +73,8 @@ static int pi_try_set_control(struct pi_desc *pi_desc, u64 *pold, u64 new)
>   
>   void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
>   {
> -	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
> -	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	struct vcpu_pi *vcpu_pi = vcpu_to_pi(vcpu);
> +	struct pi_desc *pi_desc = &vcpu_pi->pi_desc;
>   	struct pi_desc old, new;
>   	unsigned long flags;
>   	unsigned int dest;
> @@ -90,7 +111,7 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
>   	 */
>   	if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) {
>   		raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
> -		list_del(&vmx->pi_wakeup_list);
> +		list_del(&vcpu_pi->pi_wakeup_list);
>   		raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
>   	}
>   
> @@ -145,15 +166,15 @@ static bool vmx_can_use_vtd_pi(struct kvm *kvm)
>    */
>   static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
>   {
> -	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
> -	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	struct vcpu_pi *vcpu_pi = vcpu_to_pi(vcpu);
> +	struct pi_desc *pi_desc = &vcpu_pi->pi_desc;
>   	struct pi_desc old, new;
>   	unsigned long flags;
>   
>   	local_irq_save(flags);
>   
>   	raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
> -	list_add_tail(&vmx->pi_wakeup_list,
> +	list_add_tail(&vcpu_pi->pi_wakeup_list,
>   		      &per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu));
>   	raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
>   
> @@ -190,7 +211,8 @@ static bool vmx_needs_pi_wakeup(struct kvm_vcpu *vcpu)
>   	 * notification vector is switched to the one that calls
>   	 * back to the pi_wakeup_handler() function.
>   	 */
> -	return vmx_can_use_ipiv(vcpu) || vmx_can_use_vtd_pi(vcpu->kvm);
> +	return (vmx_can_use_ipiv(vcpu) && !is_td_vcpu(vcpu)) ||
> +		vmx_can_use_vtd_pi(vcpu->kvm);
>   }
>   
>   void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
> @@ -200,7 +222,8 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
>   	if (!vmx_needs_pi_wakeup(vcpu))
>   		return;
>   
> -	if (kvm_vcpu_is_blocking(vcpu) && !vmx_interrupt_blocked(vcpu))
> +	if (kvm_vcpu_is_blocking(vcpu) &&
> +	    (is_td_vcpu(vcpu) || !vmx_interrupt_blocked(vcpu)))
>   		pi_enable_wakeup_handler(vcpu);
>   
>   	/*
> diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
> index 26992076552e..2fe8222308b2 100644
> --- a/arch/x86/kvm/vmx/posted_intr.h
> +++ b/arch/x86/kvm/vmx/posted_intr.h
> @@ -94,6 +94,17 @@ static inline bool pi_test_sn(struct pi_desc *pi_desc)
>   			(unsigned long *)&pi_desc->control);
>   }
>   
> +struct vcpu_pi {
> +	struct kvm_vcpu	vcpu;
> +
> +	/* Posted interrupt descriptor */
> +	struct pi_desc pi_desc;
> +
> +	/* Used if this vCPU is waiting for PI notification wakeup. */
> +	struct list_head pi_wakeup_list;
> +	/* Until here common layout betwwn vcpu_vmx and vcpu_tdx. */

s/betwwn/between

Also, in pi_wakeup_handler(), it is still using struct vcpu_vmx, but it 
could
be vcpu_tdx.
Functionally it is OK, however, since you have added vcpu_pi, should it use
vcpu_pi instead of vcpu_vmx in pi_wakeup_handler()?

> +};
> +
>   void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
>   void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu);
>   void pi_wakeup_handler(void);
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index a5b52aa6d153..1da58c36217c 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -584,6 +584,7 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu)
>   
>   	fpstate_set_confidential(&vcpu->arch.guest_fpu);
>   	vcpu->arch.apic->guest_apic_protected = true;
> +	INIT_LIST_HEAD(&tdx->pi_wakeup_list);
>   
>   	vcpu->arch.efer = EFER_SCE | EFER_LME | EFER_LMA | EFER_NX;
>   
> diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
> index 7f8c78f06508..eaffa7384725 100644
> --- a/arch/x86/kvm/vmx/tdx.h
> +++ b/arch/x86/kvm/vmx/tdx.h
> @@ -4,6 +4,7 @@
>   
>   #ifdef CONFIG_INTEL_TDX_HOST
>   
> +#include "posted_intr.h"
>   #include "pmu_intel.h"
>   #include "tdx_ops.h"
>   
> @@ -69,6 +70,13 @@ union tdx_exit_reason {
>   struct vcpu_tdx {
>   	struct kvm_vcpu	vcpu;
>   
> +	/* Posted interrupt descriptor */
> +	struct pi_desc pi_desc;
> +
> +	/* Used if this vCPU is waiting for PI notification wakeup. */
> +	struct list_head pi_wakeup_list;
> +	/* Until here same layout to struct vcpu_pi. */
> +
>   	unsigned long tdvpr_pa;
>   	unsigned long *tdvpx_pa;
>   	bool td_vcpu_created;
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index 79ff54f08fee..634a9a250b95 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -235,6 +235,14 @@ struct nested_vmx {
>   
>   struct vcpu_vmx {
>   	struct kvm_vcpu       vcpu;
> +
> +	/* Posted interrupt descriptor */
> +	struct pi_desc pi_desc;
> +
> +	/* Used if this vCPU is waiting for PI notification wakeup. */
> +	struct list_head pi_wakeup_list;
> +	/* Until here same layout to struct vcpu_pi. */
> +
>   	u8                    fail;
>   	u8		      x2apic_msr_bitmap_mode;
>   
> @@ -304,12 +312,6 @@ struct vcpu_vmx {
>   
>   	union vmx_exit_reason exit_reason;
>   
> -	/* Posted interrupt descriptor */
> -	struct pi_desc pi_desc;
> -
> -	/* Used if this vCPU is waiting for PI notification wakeup. */
> -	struct list_head pi_wakeup_list;
> -
>   	/* Support for a guest hypervisor (nested VMX) */
>   	struct nested_vmx nested;
>
Isaku Yamahata March 5, 2024, 8:35 a.m. UTC | #2
On Tue, Feb 27, 2024 at 04:52:01PM +0800,
Binbin Wu <binbin.wu@linux.intel.com> wrote:

> 
> 
> On 2/26/2024 4:26 PM, isaku.yamahata@intel.com wrote:
> > From: Isaku Yamahata <isaku.yamahata@intel.com>
> > 
> > As TDX will use posted_interrupt.c, the use of struct vcpu_vmx is a
> > blocker.  Because the members of
> 
> Extra "of"
> 
> > struct pi_desc pi_desc and struct
> > list_head pi_wakeup_list are only used in posted_interrupt.c, introduce
> > common structure, struct vcpu_pi, make vcpu_vmx and vcpu_tdx has same
> > layout in the top of structure.
> > 
> > To minimize the diff size, avoid code conversion like,
> > vmx->pi_desc => vmx->common->pi_desc.  Instead add compile time check
> > if the layout is expected.
> > 
> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > ---
> >   arch/x86/kvm/vmx/posted_intr.c | 41 ++++++++++++++++++++++++++--------
> >   arch/x86/kvm/vmx/posted_intr.h | 11 +++++++++
> >   arch/x86/kvm/vmx/tdx.c         |  1 +
> >   arch/x86/kvm/vmx/tdx.h         |  8 +++++++
> >   arch/x86/kvm/vmx/vmx.h         | 14 +++++++-----
> >   5 files changed, 60 insertions(+), 15 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
> > index af662312fd07..b66add9da0f3 100644
> > --- a/arch/x86/kvm/vmx/posted_intr.c
> > +++ b/arch/x86/kvm/vmx/posted_intr.c
> > @@ -11,6 +11,7 @@
> >   #include "posted_intr.h"
> >   #include "trace.h"
> >   #include "vmx.h"
> > +#include "tdx.h"
> >   /*
> >    * Maintain a per-CPU list of vCPUs that need to be awakened by wakeup_handler()
> > @@ -31,9 +32,29 @@ static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu);
> >    */
> >   static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock);
> > +/*
> > + * The layout of the head of struct vcpu_vmx and struct vcpu_tdx must match with
> > + * struct vcpu_pi.
> > + */
> > +static_assert(offsetof(struct vcpu_pi, pi_desc) ==
> > +	      offsetof(struct vcpu_vmx, pi_desc));
> > +static_assert(offsetof(struct vcpu_pi, pi_wakeup_list) ==
> > +	      offsetof(struct vcpu_vmx, pi_wakeup_list));
> > +#ifdef CONFIG_INTEL_TDX_HOST
> > +static_assert(offsetof(struct vcpu_pi, pi_desc) ==
> > +	      offsetof(struct vcpu_tdx, pi_desc));
> > +static_assert(offsetof(struct vcpu_pi, pi_wakeup_list) ==
> > +	      offsetof(struct vcpu_tdx, pi_wakeup_list));
> > +#endif
> > +
> > +static inline struct vcpu_pi *vcpu_to_pi(struct kvm_vcpu *vcpu)
> > +{
> > +	return (struct vcpu_pi *)vcpu;
> > +}
> > +
> >   static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
> >   {
> > -	return &(to_vmx(vcpu)->pi_desc);
> > +	return &vcpu_to_pi(vcpu)->pi_desc;
> >   }
> >   static int pi_try_set_control(struct pi_desc *pi_desc, u64 *pold, u64 new)
> > @@ -52,8 +73,8 @@ static int pi_try_set_control(struct pi_desc *pi_desc, u64 *pold, u64 new)
> >   void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
> >   {
> > -	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
> > -	struct vcpu_vmx *vmx = to_vmx(vcpu);
> > +	struct vcpu_pi *vcpu_pi = vcpu_to_pi(vcpu);
> > +	struct pi_desc *pi_desc = &vcpu_pi->pi_desc;
> >   	struct pi_desc old, new;
> >   	unsigned long flags;
> >   	unsigned int dest;
> > @@ -90,7 +111,7 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
> >   	 */
> >   	if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) {
> >   		raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
> > -		list_del(&vmx->pi_wakeup_list);
> > +		list_del(&vcpu_pi->pi_wakeup_list);
> >   		raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
> >   	}
> > @@ -145,15 +166,15 @@ static bool vmx_can_use_vtd_pi(struct kvm *kvm)
> >    */
> >   static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
> >   {
> > -	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
> > -	struct vcpu_vmx *vmx = to_vmx(vcpu);
> > +	struct vcpu_pi *vcpu_pi = vcpu_to_pi(vcpu);
> > +	struct pi_desc *pi_desc = &vcpu_pi->pi_desc;
> >   	struct pi_desc old, new;
> >   	unsigned long flags;
> >   	local_irq_save(flags);
> >   	raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
> > -	list_add_tail(&vmx->pi_wakeup_list,
> > +	list_add_tail(&vcpu_pi->pi_wakeup_list,
> >   		      &per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu));
> >   	raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
> > @@ -190,7 +211,8 @@ static bool vmx_needs_pi_wakeup(struct kvm_vcpu *vcpu)
> >   	 * notification vector is switched to the one that calls
> >   	 * back to the pi_wakeup_handler() function.
> >   	 */
> > -	return vmx_can_use_ipiv(vcpu) || vmx_can_use_vtd_pi(vcpu->kvm);
> > +	return (vmx_can_use_ipiv(vcpu) && !is_td_vcpu(vcpu)) ||
> > +		vmx_can_use_vtd_pi(vcpu->kvm);
> >   }
> >   void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
> > @@ -200,7 +222,8 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
> >   	if (!vmx_needs_pi_wakeup(vcpu))
> >   		return;
> > -	if (kvm_vcpu_is_blocking(vcpu) && !vmx_interrupt_blocked(vcpu))
> > +	if (kvm_vcpu_is_blocking(vcpu) &&
> > +	    (is_td_vcpu(vcpu) || !vmx_interrupt_blocked(vcpu)))
> >   		pi_enable_wakeup_handler(vcpu);
> >   	/*
> > diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
> > index 26992076552e..2fe8222308b2 100644
> > --- a/arch/x86/kvm/vmx/posted_intr.h
> > +++ b/arch/x86/kvm/vmx/posted_intr.h
> > @@ -94,6 +94,17 @@ static inline bool pi_test_sn(struct pi_desc *pi_desc)
> >   			(unsigned long *)&pi_desc->control);
> >   }
> > +struct vcpu_pi {
> > +	struct kvm_vcpu	vcpu;
> > +
> > +	/* Posted interrupt descriptor */
> > +	struct pi_desc pi_desc;
> > +
> > +	/* Used if this vCPU is waiting for PI notification wakeup. */
> > +	struct list_head pi_wakeup_list;
> > +	/* Until here common layout betwwn vcpu_vmx and vcpu_tdx. */
> 
> s/betwwn/between
> 
> Also, in pi_wakeup_handler(), it is still using struct vcpu_vmx, but it
> could
> be vcpu_tdx.
> Functionally it is OK, however, since you have added vcpu_pi, should it use
> vcpu_pi instead of vcpu_vmx in pi_wakeup_handler()?

Makes sense.

diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index b66add9da0f3..5b71aef931dc 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -243,13 +243,13 @@ void pi_wakeup_handler(void)
        int cpu = smp_processor_id();
        struct list_head *wakeup_list = &per_cpu(wakeup_vcpus_on_cpu, cpu);
        raw_spinlock_t *spinlock = &per_cpu(wakeup_vcpus_on_cpu_lock, cpu);
-       struct vcpu_vmx *vmx;
+       struct vcpu_pi *pi;
 
        raw_spin_lock(spinlock);
-       list_for_each_entry(vmx, wakeup_list, pi_wakeup_list) {
+       list_for_each_entry(pi, wakeup_list, pi_wakeup_list) {
 
-               if (pi_test_on(&vmx->pi_desc))
-                       kvm_vcpu_wake_up(&vmx->vcpu);
+               if (pi_test_on(&pi->pi_desc))
+                       kvm_vcpu_wake_up(&pi->vcpu);
        }
Chao Gao March 28, 2024, 8:12 a.m. UTC | #3
On Mon, Feb 26, 2024 at 12:26:33AM -0800, isaku.yamahata@intel.com wrote:
>@@ -190,7 +211,8 @@ static bool vmx_needs_pi_wakeup(struct kvm_vcpu *vcpu)
> 	 * notification vector is switched to the one that calls
> 	 * back to the pi_wakeup_handler() function.
> 	 */
>-	return vmx_can_use_ipiv(vcpu) || vmx_can_use_vtd_pi(vcpu->kvm);
>+	return (vmx_can_use_ipiv(vcpu) && !is_td_vcpu(vcpu)) ||
>+		vmx_can_use_vtd_pi(vcpu->kvm);

It is better to separate this functional change from the code refactoring.

> }
> 
> void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
>@@ -200,7 +222,8 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
> 	if (!vmx_needs_pi_wakeup(vcpu))
> 		return;
> 
>-	if (kvm_vcpu_is_blocking(vcpu) && !vmx_interrupt_blocked(vcpu))
>+	if (kvm_vcpu_is_blocking(vcpu) &&
>+	    (is_td_vcpu(vcpu) || !vmx_interrupt_blocked(vcpu)))

Ditto.

This looks incorrect to me. here we assume interrupt is always enabled for TD.
But on TDVMCALL(HLT), the guest tells KVM if hlt is called with interrupt
disabled. KVM can just check that interrupt status passed from the guest.

> 		pi_enable_wakeup_handler(vcpu);
>
Isaku Yamahata March 28, 2024, 9:10 p.m. UTC | #4
On Thu, Mar 28, 2024 at 04:12:36PM +0800,
Chao Gao <chao.gao@intel.com> wrote:

> On Mon, Feb 26, 2024 at 12:26:33AM -0800, isaku.yamahata@intel.com wrote:
> >@@ -190,7 +211,8 @@ static bool vmx_needs_pi_wakeup(struct kvm_vcpu *vcpu)
> > 	 * notification vector is switched to the one that calls
> > 	 * back to the pi_wakeup_handler() function.
> > 	 */
> >-	return vmx_can_use_ipiv(vcpu) || vmx_can_use_vtd_pi(vcpu->kvm);
> >+	return (vmx_can_use_ipiv(vcpu) && !is_td_vcpu(vcpu)) ||
> >+		vmx_can_use_vtd_pi(vcpu->kvm);
> 
> It is better to separate this functional change from the code refactoring.

Agreed. Let's split this patch.


> > }
> > 
> > void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
> >@@ -200,7 +222,8 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
> > 	if (!vmx_needs_pi_wakeup(vcpu))
> > 		return;
> > 
> >-	if (kvm_vcpu_is_blocking(vcpu) && !vmx_interrupt_blocked(vcpu))
> >+	if (kvm_vcpu_is_blocking(vcpu) &&
> >+	    (is_td_vcpu(vcpu) || !vmx_interrupt_blocked(vcpu)))
> 
> Ditto.
> 
> This looks incorrect to me. here we assume interrupt is always enabled for TD.
> But on TDVMCALL(HLT), the guest tells KVM if hlt is called with interrupt
> disabled. KVM can just check that interrupt status passed from the guest.

That's true.  We can complicate this function and HLT emulation.  But I don't
think it's worthwhile because HLT with interrupt masked is rare.  Only for
CPU online.
Binbin Wu April 8, 2024, 3:16 a.m. UTC | #5
On 3/29/2024 5:10 AM, Isaku Yamahata wrote:
> On Thu, Mar 28, 2024 at 04:12:36PM +0800,
> Chao Gao <chao.gao@intel.com> wrote:
>
>>> }
>>>
>>> void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
>>> @@ -200,7 +222,8 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
>>> 	if (!vmx_needs_pi_wakeup(vcpu))
>>> 		return;
>>>
>>> -	if (kvm_vcpu_is_blocking(vcpu) && !vmx_interrupt_blocked(vcpu))
>>> +	if (kvm_vcpu_is_blocking(vcpu) &&
>>> +	    (is_td_vcpu(vcpu) || !vmx_interrupt_blocked(vcpu)))
>> Ditto.
>>
>> This looks incorrect to me. here we assume interrupt is always enabled for TD.
>> But on TDVMCALL(HLT), the guest tells KVM if hlt is called with interrupt
>> disabled. KVM can just check that interrupt status passed from the guest.
> That's true.  We can complicate this function and HLT emulation.  But I don't
> think it's worthwhile because HLT with interrupt masked is rare.  Only for
> CPU online.
Then, it's better to add some comments?
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index af662312fd07..b66add9da0f3 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -11,6 +11,7 @@ 
 #include "posted_intr.h"
 #include "trace.h"
 #include "vmx.h"
+#include "tdx.h"
 
 /*
  * Maintain a per-CPU list of vCPUs that need to be awakened by wakeup_handler()
@@ -31,9 +32,29 @@  static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu);
  */
 static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock);
 
+/*
+ * The layout of the head of struct vcpu_vmx and struct vcpu_tdx must match with
+ * struct vcpu_pi.
+ */
+static_assert(offsetof(struct vcpu_pi, pi_desc) ==
+	      offsetof(struct vcpu_vmx, pi_desc));
+static_assert(offsetof(struct vcpu_pi, pi_wakeup_list) ==
+	      offsetof(struct vcpu_vmx, pi_wakeup_list));
+#ifdef CONFIG_INTEL_TDX_HOST
+static_assert(offsetof(struct vcpu_pi, pi_desc) ==
+	      offsetof(struct vcpu_tdx, pi_desc));
+static_assert(offsetof(struct vcpu_pi, pi_wakeup_list) ==
+	      offsetof(struct vcpu_tdx, pi_wakeup_list));
+#endif
+
+static inline struct vcpu_pi *vcpu_to_pi(struct kvm_vcpu *vcpu)
+{
+	return (struct vcpu_pi *)vcpu;
+}
+
 static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
 {
-	return &(to_vmx(vcpu)->pi_desc);
+	return &vcpu_to_pi(vcpu)->pi_desc;
 }
 
 static int pi_try_set_control(struct pi_desc *pi_desc, u64 *pold, u64 new)
@@ -52,8 +73,8 @@  static int pi_try_set_control(struct pi_desc *pi_desc, u64 *pold, u64 new)
 
 void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
 {
-	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct vcpu_pi *vcpu_pi = vcpu_to_pi(vcpu);
+	struct pi_desc *pi_desc = &vcpu_pi->pi_desc;
 	struct pi_desc old, new;
 	unsigned long flags;
 	unsigned int dest;
@@ -90,7 +111,7 @@  void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
 	 */
 	if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) {
 		raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
-		list_del(&vmx->pi_wakeup_list);
+		list_del(&vcpu_pi->pi_wakeup_list);
 		raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
 	}
 
@@ -145,15 +166,15 @@  static bool vmx_can_use_vtd_pi(struct kvm *kvm)
  */
 static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
 {
-	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct vcpu_pi *vcpu_pi = vcpu_to_pi(vcpu);
+	struct pi_desc *pi_desc = &vcpu_pi->pi_desc;
 	struct pi_desc old, new;
 	unsigned long flags;
 
 	local_irq_save(flags);
 
 	raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
-	list_add_tail(&vmx->pi_wakeup_list,
+	list_add_tail(&vcpu_pi->pi_wakeup_list,
 		      &per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu));
 	raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
 
@@ -190,7 +211,8 @@  static bool vmx_needs_pi_wakeup(struct kvm_vcpu *vcpu)
 	 * notification vector is switched to the one that calls
 	 * back to the pi_wakeup_handler() function.
 	 */
-	return vmx_can_use_ipiv(vcpu) || vmx_can_use_vtd_pi(vcpu->kvm);
+	return (vmx_can_use_ipiv(vcpu) && !is_td_vcpu(vcpu)) ||
+		vmx_can_use_vtd_pi(vcpu->kvm);
 }
 
 void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
@@ -200,7 +222,8 @@  void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
 	if (!vmx_needs_pi_wakeup(vcpu))
 		return;
 
-	if (kvm_vcpu_is_blocking(vcpu) && !vmx_interrupt_blocked(vcpu))
+	if (kvm_vcpu_is_blocking(vcpu) &&
+	    (is_td_vcpu(vcpu) || !vmx_interrupt_blocked(vcpu)))
 		pi_enable_wakeup_handler(vcpu);
 
 	/*
diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
index 26992076552e..2fe8222308b2 100644
--- a/arch/x86/kvm/vmx/posted_intr.h
+++ b/arch/x86/kvm/vmx/posted_intr.h
@@ -94,6 +94,17 @@  static inline bool pi_test_sn(struct pi_desc *pi_desc)
 			(unsigned long *)&pi_desc->control);
 }
 
+struct vcpu_pi {
+	struct kvm_vcpu	vcpu;
+
+	/* Posted interrupt descriptor */
+	struct pi_desc pi_desc;
+
+	/* Used if this vCPU is waiting for PI notification wakeup. */
+	struct list_head pi_wakeup_list;
+	/* Until here common layout betwwn vcpu_vmx and vcpu_tdx. */
+};
+
 void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
 void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu);
 void pi_wakeup_handler(void);
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index a5b52aa6d153..1da58c36217c 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -584,6 +584,7 @@  int tdx_vcpu_create(struct kvm_vcpu *vcpu)
 
 	fpstate_set_confidential(&vcpu->arch.guest_fpu);
 	vcpu->arch.apic->guest_apic_protected = true;
+	INIT_LIST_HEAD(&tdx->pi_wakeup_list);
 
 	vcpu->arch.efer = EFER_SCE | EFER_LME | EFER_LMA | EFER_NX;
 
diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
index 7f8c78f06508..eaffa7384725 100644
--- a/arch/x86/kvm/vmx/tdx.h
+++ b/arch/x86/kvm/vmx/tdx.h
@@ -4,6 +4,7 @@ 
 
 #ifdef CONFIG_INTEL_TDX_HOST
 
+#include "posted_intr.h"
 #include "pmu_intel.h"
 #include "tdx_ops.h"
 
@@ -69,6 +70,13 @@  union tdx_exit_reason {
 struct vcpu_tdx {
 	struct kvm_vcpu	vcpu;
 
+	/* Posted interrupt descriptor */
+	struct pi_desc pi_desc;
+
+	/* Used if this vCPU is waiting for PI notification wakeup. */
+	struct list_head pi_wakeup_list;
+	/* Until here same layout to struct vcpu_pi. */
+
 	unsigned long tdvpr_pa;
 	unsigned long *tdvpx_pa;
 	bool td_vcpu_created;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 79ff54f08fee..634a9a250b95 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -235,6 +235,14 @@  struct nested_vmx {
 
 struct vcpu_vmx {
 	struct kvm_vcpu       vcpu;
+
+	/* Posted interrupt descriptor */
+	struct pi_desc pi_desc;
+
+	/* Used if this vCPU is waiting for PI notification wakeup. */
+	struct list_head pi_wakeup_list;
+	/* Until here same layout to struct vcpu_pi. */
+
 	u8                    fail;
 	u8		      x2apic_msr_bitmap_mode;
 
@@ -304,12 +312,6 @@  struct vcpu_vmx {
 
 	union vmx_exit_reason exit_reason;
 
-	/* Posted interrupt descriptor */
-	struct pi_desc pi_desc;
-
-	/* Used if this vCPU is waiting for PI notification wakeup. */
-	struct list_head pi_wakeup_list;
-
 	/* Support for a guest hypervisor (nested VMX) */
 	struct nested_vmx nested;