diff mbox

[v5,7/7] VMX: Fixup PI descriptor when cpu is offline

Message ID 1476147473-30970-8-git-send-email-feng.wu@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wu, Feng Oct. 11, 2016, 12:57 a.m. UTC
When cpu is offline, we need to move all the vcpus in its blocking
list to another online cpu, this patch handles it.

Signed-off-by: Feng Wu <feng.wu@intel.com>
---
v5:
- Add some comments to explain why it doesn't cause deadlock
for the ABBA deadlock scenario.

 xen/arch/x86/hvm/vmx/vmcs.c       |  1 +
 xen/arch/x86/hvm/vmx/vmx.c        | 48 +++++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/hvm/vmx/vmx.h |  1 +
 3 files changed, 50 insertions(+)

Comments

Tian, Kevin Oct. 11, 2016, 8:38 a.m. UTC | #1
> From: Wu, Feng
> Sent: Tuesday, October 11, 2016 8:58 AM
> 
> When cpu is offline, we need to move all the vcpus in its blocking
> list to another online cpu, this patch handles it.
> 
> Signed-off-by: Feng Wu <feng.wu@intel.com>
> ---
> v5:
> - Add some comments to explain why it doesn't cause deadlock
> for the ABBA deadlock scenario.
> 
>  xen/arch/x86/hvm/vmx/vmcs.c       |  1 +
>  xen/arch/x86/hvm/vmx/vmx.c        | 48
> +++++++++++++++++++++++++++++++++++++++
>  xen/include/asm-x86/hvm/vmx/vmx.h |  1 +
>  3 files changed, 50 insertions(+)
> 
> diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
> index 10976bd..5dd68ca 100644
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -578,6 +578,7 @@ void vmx_cpu_dead(unsigned int cpu)
>      vmx_free_vmcs(per_cpu(vmxon_region, cpu));
>      per_cpu(vmxon_region, cpu) = 0;
>      nvmx_cpu_dead(cpu);
> +    vmx_pi_desc_fixup(cpu);
>  }
> 
>  int vmx_cpu_up(void)
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index b14c84e..c71d496 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -208,6 +208,54 @@ static void vmx_pi_do_resume(struct vcpu *v)
>      vmx_pi_list_remove(v);
>  }
> 
> +void vmx_pi_desc_fixup(int cpu)
> +{
> +    unsigned int new_cpu, dest;
> +    unsigned long flags;
> +    struct arch_vmx_struct *vmx, *tmp;
> +    spinlock_t *new_lock, *old_lock = &per_cpu(vmx_pi_blocking, cpu).lock;
> +    struct list_head *blocked_vcpus = &per_cpu(vmx_pi_blocking, cpu).list;
> +
> +    if ( !iommu_intpost )
> +        return;
> +
> +    /*
> +     * We are in the context of CPU_DEAD or CPU_UP_CANCELED notification,
> +     * and it is impossible for a second CPU go down in parallel. So we
> +     * can safely acquire the old cpu's lock and then acquire the new_cpu's
> +     * lock after that.
> +     */
> +    spin_lock_irqsave(old_lock, flags);
> +
> +    list_for_each_entry_safe(vmx, tmp, blocked_vcpus, pi_blocking.list)
> +    {
> +        /*
> +         * We need to find an online cpu as the NDST of the PI descriptor, it
> +         * doesn't matter whether it is within the cpupool of the domain or
> +         * not. As long as it is online, the vCPU will be woken up once the
> +         * notification event arrives.
> +         */
> +        new_cpu = cpumask_any(&cpu_online_map);
> +        new_lock = &per_cpu(vmx_pi_blocking, new_cpu).lock;
> +
> +        spin_lock(new_lock);
> +
> +        ASSERT(vmx->pi_blocking.lock == old_lock);
> +
> +        dest = cpu_physical_id(new_cpu);
> +        write_atomic(&vmx->pi_desc.ndst,
> +                     x2apic_enabled ? dest : MASK_INSR(dest, PI_xAPIC_NDST_MASK));
> +
> +        list_move(&vmx->pi_blocking.list,
> +                  &per_cpu(vmx_pi_blocking, new_cpu).list);
> +        vmx->pi_blocking.lock = new_lock;
> +
> +        spin_unlock(new_lock);

I didn't check the whole flow... but did you suppress notification somewhere
earlier before above list movement happens? Otherwise you may miss an
interrupt when the target cpu is dying...

> +    }
> +
> +    spin_unlock_irqrestore(old_lock, flags);
> +}
> +
>  /* This function is called when pcidevs_lock is held */
>  void vmx_pi_hooks_assign(struct domain *d)
>  {
> diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h
> b/xen/include/asm-x86/hvm/vmx/vmx.h
> index 4cdd9b1..9783c70 100644
> --- a/xen/include/asm-x86/hvm/vmx/vmx.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmx.h
> @@ -569,6 +569,7 @@ void free_p2m_hap_data(struct p2m_domain *p2m);
>  void p2m_init_hap_data(struct p2m_domain *p2m);
> 
>  void vmx_pi_per_cpu_init(unsigned int cpu);
> +void vmx_pi_desc_fixup(int cpu);
> 
>  void vmx_pi_hooks_assign(struct domain *d);
>  void vmx_pi_hooks_deassign(struct domain *d);
> --
> 2.1.0
Wu, Feng Oct. 11, 2016, 11:46 a.m. UTC | #2
> -----Original Message-----
> From: Tian, Kevin
> Sent: Tuesday, October 11, 2016 4:38 PM
> To: Wu, Feng <feng.wu@intel.com>; xen-devel@lists.xen.org
> Cc: jbeulich@suse.com; andrew.cooper3@citrix.com;
> george.dunlap@eu.citrix.com; dario.faggioli@citrix.com
> Subject: RE: [PATCH v5 7/7] VMX: Fixup PI descriptor when cpu is offline
> 
> > From: Wu, Feng
> > Sent: Tuesday, October 11, 2016 8:58 AM
> >
> > When cpu is offline, we need to move all the vcpus in its blocking
> > list to another online cpu, this patch handles it.
> >
> > Signed-off-by: Feng Wu <feng.wu@intel.com>
> > ---
> > v5:
> > - Add some comments to explain why it doesn't cause deadlock
> > for the ABBA deadlock scenario.
> >
> >  xen/arch/x86/hvm/vmx/vmcs.c       |  1 +
> >  xen/arch/x86/hvm/vmx/vmx.c        | 48
> > +++++++++++++++++++++++++++++++++++++++
> >  xen/include/asm-x86/hvm/vmx/vmx.h |  1 +
> >  3 files changed, 50 insertions(+)
> >
> > diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
> > index 10976bd..5dd68ca 100644
> > --- a/xen/arch/x86/hvm/vmx/vmcs.c
> > +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> > @@ -578,6 +578,7 @@ void vmx_cpu_dead(unsigned int cpu)
> >      vmx_free_vmcs(per_cpu(vmxon_region, cpu));
> >      per_cpu(vmxon_region, cpu) = 0;
> >      nvmx_cpu_dead(cpu);
> > +    vmx_pi_desc_fixup(cpu);
> >  }
> >
> >  int vmx_cpu_up(void)
> > diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> > index b14c84e..c71d496 100644
> > --- a/xen/arch/x86/hvm/vmx/vmx.c
> > +++ b/xen/arch/x86/hvm/vmx/vmx.c
> > @@ -208,6 +208,54 @@ static void vmx_pi_do_resume(struct vcpu *v)
> >      vmx_pi_list_remove(v);
> >  }
> >
> > +void vmx_pi_desc_fixup(int cpu)
> > +{
> > +    unsigned int new_cpu, dest;
> > +    unsigned long flags;
> > +    struct arch_vmx_struct *vmx, *tmp;
> > +    spinlock_t *new_lock, *old_lock = &per_cpu(vmx_pi_blocking, cpu).lock;
> > +    struct list_head *blocked_vcpus = &per_cpu(vmx_pi_blocking, cpu).list;
> > +
> > +    if ( !iommu_intpost )
> > +        return;
> > +
> > +    /*
> > +     * We are in the context of CPU_DEAD or CPU_UP_CANCELED notification,
> > +     * and it is impossible for a second CPU go down in parallel. So we
> > +     * can safely acquire the old cpu's lock and then acquire the new_cpu's
> > +     * lock after that.
> > +     */
> > +    spin_lock_irqsave(old_lock, flags);
> > +
> > +    list_for_each_entry_safe(vmx, tmp, blocked_vcpus, pi_blocking.list)
> > +    {
> > +        /*
> > +         * We need to find an online cpu as the NDST of the PI descriptor, it
> > +         * doesn't matter whether it is within the cpupool of the domain or
> > +         * not. As long as it is online, the vCPU will be woken up once the
> > +         * notification event arrives.
> > +         */
> > +        new_cpu = cpumask_any(&cpu_online_map);
> > +        new_lock = &per_cpu(vmx_pi_blocking, new_cpu).lock;
> > +
> > +        spin_lock(new_lock);
> > +
> > +        ASSERT(vmx->pi_blocking.lock == old_lock);
> > +
> > +        dest = cpu_physical_id(new_cpu);
> > +        write_atomic(&vmx->pi_desc.ndst,
> > +                     x2apic_enabled ? dest : MASK_INSR(dest, PI_xAPIC_NDST_MASK));
> > +
> > +        list_move(&vmx->pi_blocking.list,
> > +                  &per_cpu(vmx_pi_blocking, new_cpu).list);
> > +        vmx->pi_blocking.lock = new_lock;
> > +
> > +        spin_unlock(new_lock);
> 
> I didn't check the whole flow... but did you suppress notification somewhere
> earlier before above list movement happens? Otherwise you may miss an
> interrupt when the target cpu is dying...
> 

Yes, this is really a good point. the wakeup notification event will miss if
it comes in before " write_atomic(&vmx->pi_desc.ndst ...." above. So we
need to suppress it before "spin_lock_irqsave(old_lock, flags);".

Thanks,
Feng
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 10976bd..5dd68ca 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -578,6 +578,7 @@  void vmx_cpu_dead(unsigned int cpu)
     vmx_free_vmcs(per_cpu(vmxon_region, cpu));
     per_cpu(vmxon_region, cpu) = 0;
     nvmx_cpu_dead(cpu);
+    vmx_pi_desc_fixup(cpu);
 }
 
 int vmx_cpu_up(void)
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index b14c84e..c71d496 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -208,6 +208,54 @@  static void vmx_pi_do_resume(struct vcpu *v)
     vmx_pi_list_remove(v);
 }
 
+void vmx_pi_desc_fixup(int cpu)
+{
+    unsigned int new_cpu, dest;
+    unsigned long flags;
+    struct arch_vmx_struct *vmx, *tmp;
+    spinlock_t *new_lock, *old_lock = &per_cpu(vmx_pi_blocking, cpu).lock;
+    struct list_head *blocked_vcpus = &per_cpu(vmx_pi_blocking, cpu).list;
+
+    if ( !iommu_intpost )
+        return;
+
+    /*
+     * We are in the context of CPU_DEAD or CPU_UP_CANCELED notification,
+     * and it is impossible for a second CPU go down in parallel. So we
+     * can safely acquire the old cpu's lock and then acquire the new_cpu's
+     * lock after that.
+     */
+    spin_lock_irqsave(old_lock, flags);
+
+    list_for_each_entry_safe(vmx, tmp, blocked_vcpus, pi_blocking.list)
+    {
+        /*
+         * We need to find an online cpu as the NDST of the PI descriptor, it
+         * doesn't matter whether it is within the cpupool of the domain or
+         * not. As long as it is online, the vCPU will be woken up once the
+         * notification event arrives.
+         */
+        new_cpu = cpumask_any(&cpu_online_map);
+        new_lock = &per_cpu(vmx_pi_blocking, new_cpu).lock;
+
+        spin_lock(new_lock);
+
+        ASSERT(vmx->pi_blocking.lock == old_lock);
+
+        dest = cpu_physical_id(new_cpu);
+        write_atomic(&vmx->pi_desc.ndst,
+                     x2apic_enabled ? dest : MASK_INSR(dest, PI_xAPIC_NDST_MASK));
+
+        list_move(&vmx->pi_blocking.list,
+                  &per_cpu(vmx_pi_blocking, new_cpu).list);
+        vmx->pi_blocking.lock = new_lock;
+
+        spin_unlock(new_lock);
+    }
+
+    spin_unlock_irqrestore(old_lock, flags);
+}
+
 /* This function is called when pcidevs_lock is held */
 void vmx_pi_hooks_assign(struct domain *d)
 {
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h b/xen/include/asm-x86/hvm/vmx/vmx.h
index 4cdd9b1..9783c70 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -569,6 +569,7 @@  void free_p2m_hap_data(struct p2m_domain *p2m);
 void p2m_init_hap_data(struct p2m_domain *p2m);
 
 void vmx_pi_per_cpu_init(unsigned int cpu);
+void vmx_pi_desc_fixup(int cpu);
 
 void vmx_pi_hooks_assign(struct domain *d);
 void vmx_pi_hooks_deassign(struct domain *d);