@@ -23,13 +23,20 @@
*/
static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu);
/*
- * Protect the per-CPU list with a per-CPU spinlock to handle task migration.
+ * Protect the per-CPU list with two per-CPU spinlocks to handle task migration.
+ * IRQs must be disabled when taking the two locks, otherwise deadlock will
+ * occur if a wakeup IRQ arrives and attempts to acquire the locks.
+ * ->sched_out() path before a vCPU blocking takes the "out lock", which will not
+ * be taken in the wakeup IRQ handler that running at the same pCPU as the
+ * ->sched_out() path.
* When a blocking vCPU is awakened _and_ migrated to a different pCPU, the
* ->sched_in() path will need to take the vCPU off the list of the _previous_
- * CPU. IRQs must be disabled when taking this lock, otherwise deadlock will
- * occur if a wakeup IRQ arrives and attempts to acquire the lock.
+ * CPU. It takes both "in lock" and "out lock" to take care of list racing of the
+ * _previous_ CPU.
*/
-static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock);
+static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock_in);
+static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock_out);
+
static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
{
@@ -57,7 +64,6 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
struct pi_desc old, new;
unsigned long flags;
unsigned int dest;
-
/*
* To simplify hot-plug and dynamic toggling of APICv, keep PI.NDST and
* PI.SN up-to-date even if there is no assigned device or if APICv is
@@ -89,9 +95,11 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
* current pCPU if the task was migrated.
*/
if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) {
- raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
+ raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock_in, vcpu->cpu));
+ raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock_out, vcpu->cpu));
list_del(&vmx->pi_wakeup_list);
- raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
+ raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock_out, vcpu->cpu));
+ raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock_in, vcpu->cpu));
}
dest = cpu_physical_id(cpu);
@@ -152,10 +160,10 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
local_irq_save(flags);
- raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
+ raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock_out, vcpu->cpu));
list_add_tail(&vmx->pi_wakeup_list,
&per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu));
- raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
+ raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock_out, vcpu->cpu));
WARN(pi_desc->sn, "PI descriptor SN field set before blocking");
@@ -219,12 +227,11 @@ void pi_wakeup_handler(void)
{
int cpu = smp_processor_id();
struct list_head *wakeup_list = &per_cpu(wakeup_vcpus_on_cpu, cpu);
- raw_spinlock_t *spinlock = &per_cpu(wakeup_vcpus_on_cpu_lock, cpu);
+ raw_spinlock_t *spinlock = &per_cpu(wakeup_vcpus_on_cpu_lock_in, cpu);
struct vcpu_vmx *vmx;
raw_spin_lock(spinlock);
list_for_each_entry(vmx, wakeup_list, pi_wakeup_list) {
-
if (pi_test_on(&vmx->pi_desc))
kvm_vcpu_wake_up(&vmx->vcpu);
}
@@ -234,7 +241,8 @@ void pi_wakeup_handler(void)
void __init pi_init_cpu(int cpu)
{
INIT_LIST_HEAD(&per_cpu(wakeup_vcpus_on_cpu, cpu));
- raw_spin_lock_init(&per_cpu(wakeup_vcpus_on_cpu_lock, cpu));
+ raw_spin_lock_init(&per_cpu(wakeup_vcpus_on_cpu_lock_in, cpu));
+ raw_spin_lock_init(&per_cpu(wakeup_vcpus_on_cpu_lock_out, cpu));
}
bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)