diff mbox series

[v3,05/19] KVM: arm64: vgic-debug: Use an xarray mark for debug iterator

Message ID 20240422200158.2606761-6-oliver.upton@linux.dev (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: Transition to a per-ITS translation cache | expand

Commit Message

Oliver Upton April 22, 2024, 8:01 p.m. UTC
The vgic debug iterator is the final user of vgic_copy_lpi_list(), but
is a bit more complicated to transition to something else. Use a mark
in the LPI xarray to record the indices 'known' to the debug iterator.
Protect against the LPIs from being freed by associating an additional
reference with the xarray mark.

Rework iter_next() to let the xarray walk 'drive' the iteration after
visiting all of the SGIs, PPIs, and SPIs.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 arch/arm64/kvm/vgic/vgic-debug.c | 82 +++++++++++++++++++++++---------
 arch/arm64/kvm/vgic/vgic-its.c   |  4 +-
 arch/arm64/kvm/vgic/vgic.h       |  1 +
 include/kvm/arm_vgic.h           |  2 +
 4 files changed, 64 insertions(+), 25 deletions(-)

Comments

Zenghui Yu Aug. 6, 2024, 9:23 a.m. UTC | #1
Hi Oliver,

On 2024/4/23 4:01, Oliver Upton wrote:
> The vgic debug iterator is the final user of vgic_copy_lpi_list(), but
> is a bit more complicated to transition to something else. Use a mark
> in the LPI xarray to record the indices 'known' to the debug iterator.
> Protect against the LPIs from being freed by associating an additional
> reference with the xarray mark.
> 
> Rework iter_next() to let the xarray walk 'drive' the iteration after
> visiting all of the SGIs, PPIs, and SPIs.
> 
> Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
> ---
>  arch/arm64/kvm/vgic/vgic-debug.c | 82 +++++++++++++++++++++++---------
>  arch/arm64/kvm/vgic/vgic-its.c   |  4 +-
>  arch/arm64/kvm/vgic/vgic.h       |  1 +
>  include/kvm/arm_vgic.h           |  2 +
>  4 files changed, 64 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c
> index 389025ce7749..bcbc8c986b1d 100644
> --- a/arch/arm64/kvm/vgic/vgic-debug.c
> +++ b/arch/arm64/kvm/vgic/vgic-debug.c
> @@ -28,27 +28,65 @@ struct vgic_state_iter {
>  	int nr_lpis;
>  	int dist_id;
>  	int vcpu_id;
> -	int intid;
> +	unsigned long intid;
>  	int lpi_idx;
> -	u32 *lpi_array;
>  };
>  
> -static void iter_next(struct vgic_state_iter *iter)
> +static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter)
>  {
> +	struct vgic_dist *dist = &kvm->arch.vgic;
> +
>  	if (iter->dist_id == 0) {
>  		iter->dist_id++;
>  		return;
>  	}
>  
> +	/*
> +	 * Let the xarray drive the iterator after the last SPI, as the iterator
> +	 * has exhausted the sequentially-allocated INTID space.
> +	 */
> +	if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1)) {
> +		if (iter->lpi_idx < iter->nr_lpis)
> +			xa_find_after(&dist->lpi_xa, &iter->intid,
> +				      VGIC_LPI_MAX_INTID,
> +				      LPI_XA_MARK_DEBUG_ITER);
> +		iter->lpi_idx++;
> +		return;
> +	}

What's the purpose of moving the LPI handling up here?

> +
>  	iter->intid++;
>  	if (iter->intid == VGIC_NR_PRIVATE_IRQS &&
>  	    ++iter->vcpu_id < iter->nr_cpus)
>  		iter->intid = 0;

In case the guest *doesn't* have any LPI, we previously relied on the
iterator setting

	'intid = nr_spis + VGIC_NR_PRIVATE_IRQS' && 'lpi_idx = 1'

to exit the iterator. But it was broken with this refactor -- the intid
remains at 'nr_spis + VGIC_NR_PRIVATE_IRQS - 1', and vgic_debug_show()
endlessly prints the last SPI's state.

The following diff seems work for me.

diff --git a/arch/arm64/kvm/vgic/vgic-debug.c 
b/arch/arm64/kvm/vgic/vgic-debug.c
index 6faa1d16c9ce..f56f74c8cf54 100644
--- a/arch/arm64/kvm/vgic/vgic-debug.c
+++ b/arch/arm64/kvm/vgic/vgic-debug.c
@@ -41,11 +41,16 @@ static void iter_next(struct kvm *kvm, struct 
vgic_state_iter *iter)
  		return;
  	}

+	iter->intid++;
+	if (iter->intid == VGIC_NR_PRIVATE_IRQS &&
+	    ++iter->vcpu_id < iter->nr_cpus)
+		iter->intid = 0;
+
  	/*
  	 * Let the xarray drive the iterator after the last SPI, as the iterator
  	 * has exhausted the sequentially-allocated INTID space.
  	 */
-	if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1)) {
+	if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS)) {
  		if (iter->lpi_idx < iter->nr_lpis)
  			xa_find_after(&dist->lpi_xa, &iter->intid,
  				      VGIC_LPI_MAX_INTID,
@@ -53,11 +58,6 @@ static void iter_next(struct kvm *kvm, struct 
vgic_state_iter *iter)
  		iter->lpi_idx++;
  		return;
  	}
-
-	iter->intid++;
-	if (iter->intid == VGIC_NR_PRIVATE_IRQS &&
-	    ++iter->vcpu_id < iter->nr_cpus)
-		iter->intid = 0;
  }

  static int iter_mark_lpis(struct kvm *kvm)
Zenghui Yu Aug. 6, 2024, 12:39 p.m. UTC | #2
On 2024/8/6 17:23, Zenghui Yu wrote:
> The following diff seems work for me.
> 
> diff --git a/arch/arm64/kvm/vgic/vgic-debug.c 
> b/arch/arm64/kvm/vgic/vgic-debug.c
> index 6faa1d16c9ce..f56f74c8cf54 100644
> --- a/arch/arm64/kvm/vgic/vgic-debug.c
> +++ b/arch/arm64/kvm/vgic/vgic-debug.c
> @@ -41,11 +41,16 @@ static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter)
>  		return;
>  	}
>  
> +	iter->intid++;

[*]

> +	if (iter->intid == VGIC_NR_PRIVATE_IRQS &&
> +	    ++iter->vcpu_id < iter->nr_cpus)
> +		iter->intid = 0;
> +
>  	/*
>  	 * Let the xarray drive the iterator after the last SPI, as the iterator
>  	 * has exhausted the sequentially-allocated INTID space.
>  	 */
> -	if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1)) {
> +	if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS)) {
>  		if (iter->lpi_idx < iter->nr_lpis)
>  		    xa_find_after(&dist->lpi_xa, &iter->intid,

Just noticed that it's wrong to increase intid before xa_find_after(),
which would break the LPI case. Let me have a think...

Zenghui
Zenghui Yu Aug. 6, 2024, 2:11 p.m. UTC | #3
On 2024/8/6 20:39, Zenghui Yu wrote:
> On 2024/8/6 17:23, Zenghui Yu wrote:
> > The following diff seems work for me.
> >
> > diff --git a/arch/arm64/kvm/vgic/vgic-debug.c 
> > b/arch/arm64/kvm/vgic/vgic-debug.c
> > index 6faa1d16c9ce..f56f74c8cf54 100644
> > --- a/arch/arm64/kvm/vgic/vgic-debug.c
> > +++ b/arch/arm64/kvm/vgic/vgic-debug.c
> > @@ -41,11 +41,16 @@ static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter)
> >  		return;
> >  	}
> > 
> > +	iter->intid++;
> 
> [*]
> 
> > +	if (iter->intid == VGIC_NR_PRIVATE_IRQS &&
> > +	    ++iter->vcpu_id < iter->nr_cpus)
> > +		iter->intid = 0;
> > +
> >  	/*
> >  	 * Let the xarray drive the iterator after the last SPI, as the iterator
> >  	 * has exhausted the sequentially-allocated INTID space.
> >  	 */
> > -	if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1)) {
> > +	if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS)) {
> >  		if (iter->lpi_idx < iter->nr_lpis)
> >  			xa_find_after(&dist->lpi_xa, &iter->intid,
> 
> Just noticed that it's wrong to increase intid before xa_find_after(),
> which would break the LPI case. Let me have a think...

So searching the LPI xarray and populating lpi_idx when the guest
doesn't have LPI is pointless. We can fix the reported issue by dealing
with the 'nr_lpis == 0' case directly, which might be the easiest
approach. Let me know what do you think.

diff --git a/arch/arm64/kvm/vgic/vgic-debug.c 
b/arch/arm64/kvm/vgic/vgic-debug.c
index bcbc8c986b1d..8177e5972ea8 100644
--- a/arch/arm64/kvm/vgic/vgic-debug.c
+++ b/arch/arm64/kvm/vgic/vgic-debug.c
@@ -45,7 +45,8 @@ static void iter_next(struct kvm *kvm, struct 
vgic_state_iter *iter)
  	 * Let the xarray drive the iterator after the last SPI, as the iterator
  	 * has exhausted the sequentially-allocated INTID space.
  	 */
-	if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1)) {
+	if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1) &&
+	    iter->nr_lpis) {
  		if (iter->lpi_idx < iter->nr_lpis)
  			xa_find_after(&dist->lpi_xa, &iter->intid,
  				      VGIC_LPI_MAX_INTID,
@@ -112,7 +113,7 @@ static bool end_of_vgic(struct vgic_state_iter *iter)
  	return iter->dist_id > 0 &&
  		iter->vcpu_id == iter->nr_cpus &&
  		iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS) &&
-		iter->lpi_idx > iter->nr_lpis;
+		(iter->lpi_idx > iter->nr_lpis || !iter->nr_lpis);
  }

  static void *vgic_debug_start(struct seq_file *s, loff_t *pos)

Thanks,
Zenghui
Zenghui Yu Aug. 6, 2024, 4 p.m. UTC | #4
On 2024/8/6 22:11, Zenghui Yu wrote:
> @@ -112,7 +113,7 @@ static bool end_of_vgic(struct vgic_state_iter *iter)
>  	return iter->dist_id > 0 &&
>  		iter->vcpu_id == iter->nr_cpus &&
>  		iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS) &&
> -		iter->lpi_idx > iter->nr_lpis;
> +		(iter->lpi_idx > iter->nr_lpis || !iter->nr_lpis);

And this should actually be written as:

iter->lpi_idx >= iter->nr_lpis

even in the first commit adding the LPI status in debugfs (e294cb3a6d1a)
if I understand it correctly. I will give it a bit more tests tomorrow..
Marc Zyngier Aug. 6, 2024, 4:21 p.m. UTC | #5
On Tue, 06 Aug 2024 17:00:44 +0100,
Zenghui Yu <zenghui.yu@linux.dev> wrote:
> 
> On 2024/8/6 22:11, Zenghui Yu wrote:
> > @@ -112,7 +113,7 @@ static bool end_of_vgic(struct vgic_state_iter *iter)
> >  	return iter->dist_id > 0 &&
> >  		iter->vcpu_id == iter->nr_cpus &&
> >  		iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS) &&
> > -		iter->lpi_idx > iter->nr_lpis;
> > +		(iter->lpi_idx > iter->nr_lpis || !iter->nr_lpis);
> 
> And this should actually be written as:
> 
> iter->lpi_idx >= iter->nr_lpis
> 
> even in the first commit adding the LPI status in debugfs (e294cb3a6d1a)
> if I understand it correctly. I will give it a bit more tests tomorrow..

Yup, this looks like a long-standing bug (/me pleads guilty).

Maybe worth fixing them independently in order to facilitate the
inevitable backports?

Thanks,

	M.
Zenghui Yu Aug. 7, 2024, 5:23 a.m. UTC | #6
On 2024/8/7 0:21, Marc Zyngier wrote:
> On Tue, 06 Aug 2024 17:00:44 +0100,
> Zenghui Yu <zenghui.yu@linux.dev> wrote:
> >
> > On 2024/8/6 22:11, Zenghui Yu wrote:
> > > @@ -112,7 +113,7 @@ static bool end_of_vgic(struct vgic_state_iter *iter)
> > >  	return iter->dist_id > 0 &&
> > >  		iter->vcpu_id == iter->nr_cpus &&
> > >  		iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS) &&
> > > -		iter->lpi_idx > iter->nr_lpis;
> > > +		(iter->lpi_idx > iter->nr_lpis || !iter->nr_lpis);
> >
> > And this should actually be written as:
> >
> > iter->lpi_idx >= iter->nr_lpis
> >
> > even in the first commit adding the LPI status in debugfs (e294cb3a6d1a)
> > if I understand it correctly. I will give it a bit more tests tomorrow..
> 
> Yup, this looks like a long-standing bug (/me pleads guilty).
> 
> Maybe worth fixing them independently in order to facilitate the
> inevitable backports?

I'm sorry that I misread the code again (shouldn't have sent spam late
at night :-( ).

Consider the last LPI:

|vgic_debug_next() {
|	iter_next()		// get the last valid LPI intid
|	end_of_vgic()		// lpi_idx == nr_lpis
|}

We need to go ahead to print this LPI's state and go through one more
vgic_debug_next() to exit the iterator. So there's no problem in the
current implementation for LPI, it's just that the code is a bit hard to
follow.

I've sent the "easiest approach" [*] out now.

Thanks,
Zenghui

[*] 
https://lore.kernel.org/kvmarm/20240807052024.2084-1-yuzenghui@huawei.com
diff mbox series

Patch

diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c
index 389025ce7749..bcbc8c986b1d 100644
--- a/arch/arm64/kvm/vgic/vgic-debug.c
+++ b/arch/arm64/kvm/vgic/vgic-debug.c
@@ -28,27 +28,65 @@  struct vgic_state_iter {
 	int nr_lpis;
 	int dist_id;
 	int vcpu_id;
-	int intid;
+	unsigned long intid;
 	int lpi_idx;
-	u32 *lpi_array;
 };
 
-static void iter_next(struct vgic_state_iter *iter)
+static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter)
 {
+	struct vgic_dist *dist = &kvm->arch.vgic;
+
 	if (iter->dist_id == 0) {
 		iter->dist_id++;
 		return;
 	}
 
+	/*
+	 * Let the xarray drive the iterator after the last SPI, as the iterator
+	 * has exhausted the sequentially-allocated INTID space.
+	 */
+	if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1)) {
+		if (iter->lpi_idx < iter->nr_lpis)
+			xa_find_after(&dist->lpi_xa, &iter->intid,
+				      VGIC_LPI_MAX_INTID,
+				      LPI_XA_MARK_DEBUG_ITER);
+		iter->lpi_idx++;
+		return;
+	}
+
 	iter->intid++;
 	if (iter->intid == VGIC_NR_PRIVATE_IRQS &&
 	    ++iter->vcpu_id < iter->nr_cpus)
 		iter->intid = 0;
+}
 
-	if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS)) {
-		if (iter->lpi_idx < iter->nr_lpis)
-			iter->intid = iter->lpi_array[iter->lpi_idx];
-		iter->lpi_idx++;
+static int iter_mark_lpis(struct kvm *kvm)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	struct vgic_irq *irq;
+	unsigned long intid;
+	int nr_lpis = 0;
+
+	xa_for_each(&dist->lpi_xa, intid, irq) {
+		if (!vgic_try_get_irq_kref(irq))
+			continue;
+
+		xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
+		nr_lpis++;
+	}
+
+	return nr_lpis;
+}
+
+static void iter_unmark_lpis(struct kvm *kvm)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	struct vgic_irq *irq;
+	unsigned long intid;
+
+	xa_for_each(&dist->lpi_xa, intid, irq) {
+		xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
+		vgic_put_irq(kvm, irq);
 	}
 }
 
@@ -61,15 +99,12 @@  static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter,
 
 	iter->nr_cpus = nr_cpus;
 	iter->nr_spis = kvm->arch.vgic.nr_spis;
-	if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
-		iter->nr_lpis = vgic_copy_lpi_list(kvm, NULL, &iter->lpi_array);
-		if (iter->nr_lpis < 0)
-			iter->nr_lpis = 0;
-	}
+	if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
+		iter->nr_lpis = iter_mark_lpis(kvm);
 
 	/* Fast forward to the right position if needed */
 	while (pos--)
-		iter_next(iter);
+		iter_next(kvm, iter);
 }
 
 static bool end_of_vgic(struct vgic_state_iter *iter)
@@ -114,7 +149,7 @@  static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos)
 	struct vgic_state_iter *iter = kvm->arch.vgic.iter;
 
 	++*pos;
-	iter_next(iter);
+	iter_next(kvm, iter);
 	if (end_of_vgic(iter))
 		iter = NULL;
 	return iter;
@@ -134,13 +169,14 @@  static void vgic_debug_stop(struct seq_file *s, void *v)
 
 	mutex_lock(&kvm->arch.config_lock);
 	iter = kvm->arch.vgic.iter;
-	kfree(iter->lpi_array);
+	iter_unmark_lpis(kvm);
 	kfree(iter);
 	kvm->arch.vgic.iter = NULL;
 	mutex_unlock(&kvm->arch.config_lock);
 }
 
-static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
+static void print_dist_state(struct seq_file *s, struct vgic_dist *dist,
+			     struct vgic_state_iter *iter)
 {
 	bool v3 = dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3;
 
@@ -149,7 +185,7 @@  static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
 	seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2");
 	seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis);
 	if (v3)
-		seq_printf(s, "nr_lpis:\t%d\n", atomic_read(&dist->lpi_count));
+		seq_printf(s, "nr_lpis:\t%d\n", iter->nr_lpis);
 	seq_printf(s, "enabled:\t%d\n", dist->enabled);
 	seq_printf(s, "\n");
 
@@ -236,7 +272,7 @@  static int vgic_debug_show(struct seq_file *s, void *v)
 	unsigned long flags;
 
 	if (iter->dist_id == 0) {
-		print_dist_state(s, &kvm->arch.vgic);
+		print_dist_state(s, &kvm->arch.vgic, iter);
 		return 0;
 	}
 
@@ -246,11 +282,13 @@  static int vgic_debug_show(struct seq_file *s, void *v)
 	if (iter->vcpu_id < iter->nr_cpus)
 		vcpu = kvm_get_vcpu(kvm, iter->vcpu_id);
 
+	/*
+	 * Expect this to succeed, as iter_mark_lpis() takes a reference on
+	 * every LPI to be visited.
+	 */
 	irq = vgic_get_irq(kvm, vcpu, iter->intid);
-	if (!irq) {
-		seq_printf(s, "       LPI %4d freed\n", iter->intid);
-		return 0;
-	}
+	if (WARN_ON_ONCE(!irq))
+		return -EINVAL;
 
 	raw_spin_lock_irqsave(&irq->irq_lock, flags);
 	print_irq_state(s, irq, vcpu);
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 420a71597b78..5025ac968d27 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -316,8 +316,6 @@  static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
 	return 0;
 }
 
-#define GIC_LPI_MAX_INTID	((1 << INTERRUPT_ID_BITS_ITS) - 1)
-
 /*
  * Create a snapshot of the current LPIs targeting @vcpu, so that we can
  * enumerate those LPIs without holding any lock.
@@ -347,7 +345,7 @@  int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
 	raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
 	rcu_read_lock();
 
-	xas_for_each(&xas, irq, GIC_LPI_MAX_INTID) {
+	xas_for_each(&xas, irq, VGIC_LPI_MAX_INTID) {
 		if (i == irq_count)
 			break;
 		/* We don't need to "get" the IRQ, as we hold the list lock. */
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 0c2b82de8fa3..e0c77e1bd9f6 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -16,6 +16,7 @@ 
 
 #define INTERRUPT_ID_BITS_SPIS	10
 #define INTERRUPT_ID_BITS_ITS	16
+#define VGIC_LPI_MAX_INTID	((1 << INTERRUPT_ID_BITS_ITS) - 1)
 #define VGIC_PRI_BITS		5
 
 #define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS)
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 47035946648e..8eb72721dac1 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -276,6 +276,8 @@  struct vgic_dist {
 
 	/* Protects the lpi_list. */
 	raw_spinlock_t		lpi_list_lock;
+
+#define LPI_XA_MARK_DEBUG_ITER	XA_MARK_0
 	struct xarray		lpi_xa;
 	atomic_t		lpi_count;