diff mbox series

[RFC,12/12] Drivers: hv: vmbus: Ensure IRQ affinity isn't set to a CPU going offline

Message ID 20240604050940.859909-13-mhklinux@outlook.com (mailing list archive)
State Not Applicable
Headers show
Series Hyper-V guests use Linux IRQs for channel interrupts | expand

Commit Message

Michael Kelley June 4, 2024, 5:09 a.m. UTC
From: Michael Kelley <mhklinux@outlook.com>

hv_synic_cleanup() currently prevents a CPU from going offline if any
VMBus channel IRQs are targeted at that CPU. However, current code has a
race in that an IRQ could be affinitized to the CPU after the check in
hv_synic_cleanup() and before the CPU is removed from cpu_online_mask.
Any channel interrupts could be lost and the channel would hang.

Fix this by adding a flag for each CPU indicating if the synic is online.
Filter the new affinity with these flags so that vmbus_irq_set_affinity()
doesn't pick a CPU where the synic is already offline.

Also add a spin lock so that vmbus_irq_set_affinity() changing the
channel target_cpu and sending the MODIFYCHANNEL message to Hyper-V
are atomic with respect to the checks made in hv_synic_cleanup().
hv_synic_cleanup() needs these operations to be atomic so that it
can correctly count the MODIFYCHANNEL messages that need to be
ack'ed by Hyper-V.

Signed-off-by: Michael Kelley <mhklinux@outlook.com>
---
 drivers/hv/connection.c   |  1 +
 drivers/hv/hv.c           | 22 ++++++++++++++++++++--
 drivers/hv/hyperv_vmbus.h |  2 ++
 drivers/hv/vmbus_drv.c    | 34 ++++++++++++++++++++++++++++------
 4 files changed, 51 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index a105eecdeec2..b44ce3d39135 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -213,6 +213,7 @@  int vmbus_connect(void)
 
 	INIT_LIST_HEAD(&vmbus_connection.chn_list);
 	mutex_init(&vmbus_connection.channel_mutex);
+	spin_lock_init(&vmbus_connection.set_affinity_lock);
 
 	/*
 	 * Setup the vmbus event connection for channel interrupt
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 76658dfc5008..89e491219129 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -338,6 +338,8 @@  int hv_synic_init(unsigned int cpu)
 {
 	hv_synic_enable_regs(cpu);
 
+	cpumask_set_cpu(cpu, &vmbus_connection.synic_online);
+
 	hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT);
 
 	return 0;
@@ -513,6 +515,17 @@  int hv_synic_cleanup(unsigned int cpu)
 	 * TODO: Re-bind the channels to different CPUs.
 	 */
 	mutex_lock(&vmbus_connection.channel_mutex);
+	spin_lock(&vmbus_connection.set_affinity_lock);
+
+	/*
+	 * Once the check for channels assigned to this CPU is complete, we
+	 * must not allow a channel to be assigned to this CPU. So mark
+	 * the synic as no longer online. This cpumask is checked in
+	 * vmbus_irq_set_affinity() to prevent setting the affinity of
+	 * an IRQ to such a CPU.
+	 */
+	cpumask_clear_cpu(cpu, &vmbus_connection.synic_online);
+
 	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
 		if (channel->target_cpu == cpu) {
 			channel_found = true;
@@ -527,10 +540,11 @@  int hv_synic_cleanup(unsigned int cpu)
 		if (channel_found)
 			break;
 	}
+	spin_unlock(&vmbus_connection.set_affinity_lock);
 	mutex_unlock(&vmbus_connection.channel_mutex);
 
 	if (channel_found)
-		return -EBUSY;
+		goto set_online;
 
 	/*
 	 * channel_found == false means that any channels that were previously
@@ -547,7 +561,7 @@  int hv_synic_cleanup(unsigned int cpu)
 		if (hv_synic_event_pending()) {
 			pr_err("Events pending when trying to offline CPU %d\n",
 					cpu);
-			return -EBUSY;
+			goto set_online;
 		}
 	}
 
@@ -557,4 +571,8 @@  int hv_synic_cleanup(unsigned int cpu)
 	hv_synic_disable_regs(cpu);
 
 	return 0;
+
+set_online:
+	cpumask_set_cpu(cpu, &vmbus_connection.synic_online);
+	return -EBUSY;
 }
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 571b2955b38e..92ae5af10778 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -263,6 +263,8 @@  struct vmbus_connection {
 	struct fwnode_handle *vmbus_fwnode;
 	struct irq_domain *vmbus_irq_domain;
 	struct irq_chip	vmbus_irq_chip;
+	cpumask_t synic_online;
+	spinlock_t set_affinity_lock;
 
 	/*
 	 * VM-wide counts of MODIFYCHANNEL messages sent and completed.
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 87f2f3436136..3430ad42d7ba 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1351,6 +1351,14 @@  int vmbus_irq_set_affinity(struct irq_data *data,
 		return -EINVAL;
 	}
 
+	/*
+	 * The spin lock must be held so that checking synic_online, sending
+	 * the MODIFYCHANNEL message, and setting channel->target_cpu are
+	 * atomic with respect to hv_synic_cleanup() clearing the CPU in
+	 * synic_online and doing the search.
+	 */
+	spin_lock(&vmbus_connection.set_affinity_lock);
+
 	/* Don't consider CPUs that are isolated */
 	if (housekeeping_enabled(HK_TYPE_MANAGED_IRQ))
 		cpumask_and(&tempmask, dest,
@@ -1367,30 +1375,39 @@  int vmbus_irq_set_affinity(struct irq_data *data,
 	origin_cpu = channel->target_cpu;
 	if (cpumask_test_cpu(origin_cpu, &tempmask)) {
 		target_cpu = origin_cpu;
+		spin_unlock(&vmbus_connection.set_affinity_lock);
 		goto update_effective;
 	}
 
 	/*
 	 * Pick a CPU from the new affinity mask. As a simple heuristic to
 	 * spread out the selection when the mask contains multiple CPUs,
-	 * start with whatever CPU was last selected.
+	 * start with whatever CPU was last selected. Also filter out any
+	 * CPUs where synic_online isn't set -- these CPUs are in the process
+	 * of going offline and must not have channel interrupts assigned
+	 * to them.
 	 */
+	cpumask_and(&tempmask, &tempmask, &vmbus_connection.synic_online);
 	target_cpu = cpumask_next_wrap(next_cpu, &tempmask, nr_cpu_ids, false);
-	if (target_cpu >= nr_cpu_ids)
-		return -EINVAL;
+	if (target_cpu >= nr_cpu_ids) {
+		ret = -EINVAL;
+		goto unlock;
+	}
 	next_cpu = target_cpu;
 
 	/*
 	 * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels;
 	 * avoid sending the message and fail here for such channels.
 	 */
-	if (channel->state != CHANNEL_OPENED_STATE)
-		return -EIO;
+	if (channel->state != CHANNEL_OPENED_STATE) {
+		ret = -EIO;
+		goto unlock;
+	}
 
 	ret = vmbus_send_modifychannel(channel,
 				     hv_cpu_number_to_vp_number(target_cpu));
 	if (ret)
-		return ret;
+		goto unlock;
 
 	/*
 	 * Warning.  At this point, there is *no* guarantee that the host will
@@ -1408,6 +1425,7 @@  int vmbus_irq_set_affinity(struct irq_data *data,
 	 */
 
 	channel->target_cpu = target_cpu;
+	spin_unlock(&vmbus_connection.set_affinity_lock);
 
 	/* See init_vp_index(). */
 	if (hv_is_perf_channel(channel))
@@ -1422,6 +1440,10 @@  int vmbus_irq_set_affinity(struct irq_data *data,
 update_effective:
 	irq_data_update_effective_affinity(data, cpumask_of(target_cpu));
 	return IRQ_SET_MASK_OK;
+
+unlock:
+	spin_unlock(&vmbus_connection.set_affinity_lock);
+	return ret;
 }
 
 /*