diff mbox series

KVM: arm64: Destroy mpidr_data for 'late' vCPU creation

Message ID 20240507192912.1096658-1-oliver.upton@linux.dev (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: Destroy mpidr_data for 'late' vCPU creation | expand

Commit Message

Oliver Upton May 7, 2024, 7:29 p.m. UTC
A particularly annoying userspace could create a vCPU after KVM has
computed mpidr_data for the VM, either by racing against VGIC
initialization or having a userspace irqchip.

In any case, this means mpidr_data no longer fully describes the VM, and
attempts to find the new vCPU with kvm_mpidr_to_vcpu() will fail. The
fix is to discard mpidr_data altogether, as it is only a performance
optimization and not required for correctness. In all likelihood KVM
will recompute the mappings when KVM_RUN is called on the new vCPU.

Note that reads of mpidr_data are not guarded by a lock; promote to RCU
to cope with the possibility of mpidr_data being invalidated at runtime.

Fixes: 54a8006d0b49 ("KVM: arm64: Fast-track kvm_mpidr_to_vcpu() when mpidr_data is available")
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 arch/arm64/kvm/arm.c | 49 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 40 insertions(+), 9 deletions(-)


base-commit: fec50db7033ea478773b159e0e2efb135270e3b7

Comments

Marc Zyngier May 8, 2024, 6:39 a.m. UTC | #1
On Tue, 07 May 2024 20:29:12 +0100,
Oliver Upton <oliver.upton@linux.dev> wrote:
> 
> A particularly annoying userspace could create a vCPU after KVM has
> computed mpidr_data for the VM, either by racing against VGIC
> initialization or having a userspace irqchip.
> 
> In any case, this means mpidr_data no longer fully describes the VM, and
> attempts to find the new vCPU with kvm_mpidr_to_vcpu() will fail. The
> fix is to discard mpidr_data altogether, as it is only a performance
> optimization and not required for correctness. In all likelihood KVM
> will recompute the mappings when KVM_RUN is called on the new vCPU.
> 
> Note that reads of mpidr_data are not guarded by a lock; promote to RCU
> to cope with the possibility of mpidr_data being invalidated at runtime.
> 
> Fixes: 54a8006d0b49 ("KVM: arm64: Fast-track kvm_mpidr_to_vcpu() when mpidr_data is available")
> Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
> ---
>  arch/arm64/kvm/arm.c | 49 ++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 40 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index c4a0a35e02c7..0d845131a0e0 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -195,6 +195,22 @@ void kvm_arch_create_vm_debugfs(struct kvm *kvm)
>  	kvm_sys_regs_create_debugfs(kvm);
>  }
>  
> +static void kvm_destroy_mpidr_data(struct kvm *kvm)
> +{
> +	struct kvm_mpidr_data *data;
> +
> +	mutex_lock(&kvm->arch.config_lock);
> +
> +	data = rcu_dereference_raw(kvm->arch.mpidr_data);

I'm slightly worried by this. Why can't we use the "cooked" version?
If anything I'd like to see a comment about this, as it is usually
frowned upon.

Thanks,

	M.
Oliver Upton May 8, 2024, 6:50 a.m. UTC | #2
On Wed, May 08, 2024 at 07:39:20AM +0100, Marc Zyngier wrote:
> On Tue, 07 May 2024 20:29:12 +0100,
> Oliver Upton <oliver.upton@linux.dev> wrote:
> > 
> > A particularly annoying userspace could create a vCPU after KVM has
> > computed mpidr_data for the VM, either by racing against VGIC
> > initialization or having a userspace irqchip.
> > 
> > In any case, this means mpidr_data no longer fully describes the VM, and
> > attempts to find the new vCPU with kvm_mpidr_to_vcpu() will fail. The
> > fix is to discard mpidr_data altogether, as it is only a performance
> > optimization and not required for correctness. In all likelihood KVM
> > will recompute the mappings when KVM_RUN is called on the new vCPU.
> > 
> > Note that reads of mpidr_data are not guarded by a lock; promote to RCU
> > to cope with the possibility of mpidr_data being invalidated at runtime.
> > 
> > Fixes: 54a8006d0b49 ("KVM: arm64: Fast-track kvm_mpidr_to_vcpu() when mpidr_data is available")
> > Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
> > ---
> >  arch/arm64/kvm/arm.c | 49 ++++++++++++++++++++++++++++++++++++--------
> >  1 file changed, 40 insertions(+), 9 deletions(-)
> > 
> > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> > index c4a0a35e02c7..0d845131a0e0 100644
> > --- a/arch/arm64/kvm/arm.c
> > +++ b/arch/arm64/kvm/arm.c
> > @@ -195,6 +195,22 @@ void kvm_arch_create_vm_debugfs(struct kvm *kvm)
> >  	kvm_sys_regs_create_debugfs(kvm);
> >  }
> >  
> > +static void kvm_destroy_mpidr_data(struct kvm *kvm)
> > +{
> > +	struct kvm_mpidr_data *data;
> > +
> > +	mutex_lock(&kvm->arch.config_lock);
> > +
> > +	data = rcu_dereference_raw(kvm->arch.mpidr_data);
> 
> I'm slightly worried by this. Why can't we use the "cooked" version?
> If anything I'd like to see a comment about this, as it is usually
> frowned upon.

No reason other than my own laziness... This really should be:

	rcu_dereference_protected(kvm->arch.mpidr_data,
				  lockdep_is_held(&kvm->arch.config_lock));

since we're behind the update-side lock.
diff mbox series

Patch

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index c4a0a35e02c7..0d845131a0e0 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -195,6 +195,22 @@  void kvm_arch_create_vm_debugfs(struct kvm *kvm)
 	kvm_sys_regs_create_debugfs(kvm);
 }
 
+static void kvm_destroy_mpidr_data(struct kvm *kvm)
+{
+	struct kvm_mpidr_data *data;
+
+	mutex_lock(&kvm->arch.config_lock);
+
+	data = rcu_dereference_raw(kvm->arch.mpidr_data);
+	if (data) {
+		rcu_assign_pointer(kvm->arch.mpidr_data, NULL);
+		synchronize_rcu();
+		kfree(data);
+	}
+
+	mutex_unlock(&kvm->arch.config_lock);
+}
+
 /**
  * kvm_arch_destroy_vm - destroy the VM data structure
  * @kvm:	pointer to the KVM struct
@@ -209,7 +225,8 @@  void kvm_arch_destroy_vm(struct kvm *kvm)
 	if (is_protected_kvm_enabled())
 		pkvm_destroy_hyp_vm(kvm);
 
-	kfree(kvm->arch.mpidr_data);
+	kvm_destroy_mpidr_data(kvm);
+
 	kfree(kvm->arch.sysreg_masks);
 	kvm_destroy_vcpus(kvm);
 
@@ -395,6 +412,13 @@  int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
 
+	/*
+	 * This vCPU may have been created after mpidr_data was initialized.
+	 * Throw out the pre-computed mappings if that is the case which forces
+	 * KVM to fall back to iteratively searching the vCPUs.
+	 */
+	kvm_destroy_mpidr_data(vcpu->kvm);
+
 	err = kvm_vgic_vcpu_init(vcpu);
 	if (err)
 		return err;
@@ -594,7 +618,8 @@  static void kvm_init_mpidr_data(struct kvm *kvm)
 
 	mutex_lock(&kvm->arch.config_lock);
 
-	if (kvm->arch.mpidr_data || atomic_read(&kvm->online_vcpus) == 1)
+	if (rcu_access_pointer(kvm->arch.mpidr_data) ||
+	    atomic_read(&kvm->online_vcpus) == 1)
 		goto out;
 
 	kvm_for_each_vcpu(c, vcpu, kvm) {
@@ -631,7 +656,7 @@  static void kvm_init_mpidr_data(struct kvm *kvm)
 		data->cmpidr_to_idx[index] = c;
 	}
 
-	kvm->arch.mpidr_data = data;
+	rcu_assign_pointer(kvm->arch.mpidr_data, data);
 out:
 	mutex_unlock(&kvm->arch.config_lock);
 }
@@ -2470,21 +2495,27 @@  static int __init init_hyp_mode(void)
 
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
 {
-	struct kvm_vcpu *vcpu;
+	struct kvm_vcpu *vcpu = NULL;
+	struct kvm_mpidr_data *data;
 	unsigned long i;
 
 	mpidr &= MPIDR_HWID_BITMASK;
 
-	if (kvm->arch.mpidr_data) {
-		u16 idx = kvm_mpidr_index(kvm->arch.mpidr_data, mpidr);
+	rcu_read_lock();
+	data = rcu_dereference(kvm->arch.mpidr_data);
 
-		vcpu = kvm_get_vcpu(kvm,
-				    kvm->arch.mpidr_data->cmpidr_to_idx[idx]);
+	if (data) {
+		u16 idx = kvm_mpidr_index(data, mpidr);
+
+		vcpu = kvm_get_vcpu(kvm, data->cmpidr_to_idx[idx]);
 		if (mpidr != kvm_vcpu_get_mpidr_aff(vcpu))
 			vcpu = NULL;
+	}
 
+	rcu_read_unlock();
+
+	if (vcpu)
 		return vcpu;
-	}
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))