@@ -9,6 +9,8 @@ KVM Lock Overview
The acquisition orders for mutexes are as follows:
+- cpus_read_lock() is taken outside kvm_lock
+
- kvm->lock is taken outside vcpu->mutex
- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
@@ -216,15 +218,10 @@ time it will be set using the Dirty tracking mechanism described above.
:Type: mutex
:Arch: any
:Protects: - vm_list
-
-``kvm_count_lock``
-^^^^^^^^^^^^^^^^^^
-
-:Type: raw_spinlock_t
-:Arch: any
-:Protects: - hardware virtualization enable/disable
-:Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
- migration.
+ - kvm_usage_count
+ - hardware virtualization enable/disable
+:Comment: KVM also disables CPU hotplug via cpus_read_lock() during
+ enable/disable.
``kvm->mn_invalidate_lock``
^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -288,3 +285,7 @@ time it will be set using the Dirty tracking mechanism described above.
:Type: mutex
:Arch: x86
:Protects: loading a vendor module (kvm_amd or kvm_intel)
+:Comment: Exists because using kvm_lock leads to deadlock. cpu_hotplug_lock is
+ taken outside of kvm_lock, e.g. in KVM's CPU online/offline callbacks, and
+ many operations need to take cpu_hotplug_lock when loading a vendor module,
+ e.g. updating static calls.
@@ -100,7 +100,6 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
*/
DEFINE_MUTEX(kvm_lock);
-static DEFINE_RAW_SPINLOCK(kvm_count_lock);
LIST_HEAD(vm_list);
static cpumask_var_t cpus_hardware_enabled;
@@ -5054,17 +5053,18 @@ static int kvm_online_cpu(unsigned int cpu)
* be enabled. Otherwise running VMs would encounter unrecoverable
* errors when scheduled to this CPU.
*/
- raw_spin_lock(&kvm_count_lock);
+ mutex_lock(&kvm_lock);
if (kvm_usage_count) {
WARN_ON_ONCE(atomic_read(&hardware_enable_failed));
hardware_enable_nolock(NULL);
+
if (atomic_read(&hardware_enable_failed)) {
atomic_set(&hardware_enable_failed, 0);
ret = -EIO;
}
}
- raw_spin_unlock(&kvm_count_lock);
+ mutex_unlock(&kvm_lock);
return ret;
}
@@ -5080,10 +5080,10 @@ static void hardware_disable_nolock(void *junk)
static int kvm_offline_cpu(unsigned int cpu)
{
- raw_spin_lock(&kvm_count_lock);
+ mutex_lock(&kvm_lock);
if (kvm_usage_count)
hardware_disable_nolock(NULL);
- raw_spin_unlock(&kvm_count_lock);
+ mutex_unlock(&kvm_lock);
return 0;
}
@@ -5099,9 +5099,9 @@ static void hardware_disable_all_nolock(void)
static void hardware_disable_all(void)
{
cpus_read_lock();
- raw_spin_lock(&kvm_count_lock);
+ mutex_lock(&kvm_lock);
hardware_disable_all_nolock();
- raw_spin_unlock(&kvm_count_lock);
+ mutex_unlock(&kvm_lock);
cpus_read_unlock();
}
@@ -5118,7 +5118,7 @@ static int hardware_enable_all(void)
* enable hardware multiple times.
*/
cpus_read_lock();
- raw_spin_lock(&kvm_count_lock);
+ mutex_lock(&kvm_lock);
kvm_usage_count++;
if (kvm_usage_count == 1) {
@@ -5131,7 +5131,7 @@ static int hardware_enable_all(void)
}
}
- raw_spin_unlock(&kvm_count_lock);
+ mutex_unlock(&kvm_lock);
cpus_read_unlock();
return r;
@@ -5737,6 +5737,17 @@ static void kvm_init_debug(void)
static int kvm_suspend(void)
{
+ /*
+ * Secondary CPUs and CPU hotplug are disabled across the suspend/resume
+ * callbacks, i.e. no need to acquire kvm_lock to ensure the usage count
+ * is stable. Assert that kvm_lock is not held to ensure the system
+ * isn't suspended while KVM is enabling hardware. Hardware enabling
+ * can be preempted, but the task cannot be frozen until it has dropped
+ * all locks (userspace tasks are frozen via a fake signal).
+ */
+ lockdep_assert_not_held(&kvm_lock);
+ lockdep_assert_irqs_disabled();
+
if (kvm_usage_count)
hardware_disable_nolock(NULL);
return 0;
@@ -5744,10 +5755,11 @@ static int kvm_suspend(void)
static void kvm_resume(void)
{
- if (kvm_usage_count) {
- lockdep_assert_not_held(&kvm_count_lock);
+ lockdep_assert_not_held(&kvm_lock);
+ lockdep_assert_irqs_disabled();
+
+ if (kvm_usage_count)
hardware_enable_nolock(NULL);
- }
}
static struct syscore_ops kvm_syscore_ops = {