diff mbox series

KVM: arm64: vgic-v4: Move the GICv4 residency flow to be driven by vcpu_load/put

Message ID 20190903155747.219802-1-maz@kernel.org (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: vgic-v4: Move the GICv4 residency flow to be driven by vcpu_load/put | expand

Commit Message

Marc Zyngier Sept. 3, 2019, 3:57 p.m. UTC
When the VHE code was reworked, a lot of the vgic stuff was moved around,
but the GICv4 residency code did stay untouched, meaning that we come
in and out of residency on each flush/sync, which is obviously suboptimal.

To address this, let's move things around a bit:

- Residency entry (flush) moves to vcpu_load
- Residency exit (sync) moves to vcpu_put
- On blocking (entry to WFI), we "put"
- On unblocking (exit from WFI, we "load"

Because these can nest (load/block/put/load/unblock/put, for example),
we now have per-VPE tracking of the residency state.

Additionally, vgic_v4_put gains a "need doorbell" parameter, which only
gets set to true when blocking because of a WFI. This allows a finer
control of the doorbell, which now also gets disabled as soon as
it gets signaled.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/irqchip/irq-gic-v4.c       |  7 +++-
 include/kvm/arm_vgic.h             |  4 +--
 include/linux/irqchip/arm-gic-v4.h |  2 ++
 virt/kvm/arm/arm.c                 | 12 ++++---
 virt/kvm/arm/vgic/vgic-v3.c        |  4 +++
 virt/kvm/arm/vgic/vgic-v4.c        | 55 ++++++++++++++----------------
 virt/kvm/arm/vgic/vgic.c           |  4 ---
 virt/kvm/arm/vgic/vgic.h           |  2 --
 8 files changed, 48 insertions(+), 42 deletions(-)

Comments

Andrew Murray Sept. 5, 2019, 1:04 p.m. UTC | #1
Hi Marc,

Some feedback below, but mostly questions to aid my understanding...

On Tue, Sep 03, 2019 at 04:57:47PM +0100, Marc Zyngier wrote:
> When the VHE code was reworked, a lot of the vgic stuff was moved around,
> but the GICv4 residency code did stay untouched, meaning that we come
> in and out of residency on each flush/sync, which is obviously suboptimal.
> 
> To address this, let's move things around a bit:
> 
> - Residency entry (flush) moves to vcpu_load
> - Residency exit (sync) moves to vcpu_put
> - On blocking (entry to WFI), we "put"
> - On unblocking (exit from WFI, we "load"
> 
> Because these can nest (load/block/put/load/unblock/put, for example),
> we now have per-VPE tracking of the residency state.
> 
> Additionally, vgic_v4_put gains a "need doorbell" parameter, which only
> gets set to true when blocking because of a WFI. This allows a finer
> control of the doorbell, which now also gets disabled as soon as
> it gets signaled.
> 
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---
>  drivers/irqchip/irq-gic-v4.c       |  7 +++-
>  include/kvm/arm_vgic.h             |  4 +--
>  include/linux/irqchip/arm-gic-v4.h |  2 ++
>  virt/kvm/arm/arm.c                 | 12 ++++---
>  virt/kvm/arm/vgic/vgic-v3.c        |  4 +++
>  virt/kvm/arm/vgic/vgic-v4.c        | 55 ++++++++++++++----------------
>  virt/kvm/arm/vgic/vgic.c           |  4 ---
>  virt/kvm/arm/vgic/vgic.h           |  2 --
>  8 files changed, 48 insertions(+), 42 deletions(-)
> 
> diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c
> index 563e87ed0766..45969927cc81 100644
> --- a/drivers/irqchip/irq-gic-v4.c
> +++ b/drivers/irqchip/irq-gic-v4.c
> @@ -141,12 +141,17 @@ static int its_send_vpe_cmd(struct its_vpe *vpe, struct its_cmd_info *info)
>  int its_schedule_vpe(struct its_vpe *vpe, bool on)
>  {
>  	struct its_cmd_info info;
> +	int ret;
>  
>  	WARN_ON(preemptible());
>  
>  	info.cmd_type = on ? SCHEDULE_VPE : DESCHEDULE_VPE;
>  
> -	return its_send_vpe_cmd(vpe, &info);
> +	ret = its_send_vpe_cmd(vpe, &info);
> +	if (!ret)
> +		vpe->resident = on;
> +

We make an assumption here that its_schedule_vpe is the only caller of
its_send_vpe_cmd where we may pass SCHEDULE_VPE. I guess this is currently
the case.

Why do we also set the residency flag for DESCHEDULE_VPE?

And by residency we mean that interrupts are delivered to VM, instead of
doorbell?

> +	return ret;
>  }
>  
>  int its_invall_vpe(struct its_vpe *vpe)
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index af4f09c02bf1..4dc58d7a0010 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -396,7 +396,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq,
>  int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
>  				 struct kvm_kernel_irq_routing_entry *irq_entry);
>  
> -void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu);
> -void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu);
> +int vgic_v4_load(struct kvm_vcpu *vcpu);
> +int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db);
>  
>  #endif /* __KVM_ARM_VGIC_H */
> diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h
> index e6b155713b47..ab1396afe08a 100644
> --- a/include/linux/irqchip/arm-gic-v4.h
> +++ b/include/linux/irqchip/arm-gic-v4.h
> @@ -35,6 +35,8 @@ struct its_vpe {
>  	/* Doorbell interrupt */
>  	int			irq;
>  	irq_hw_number_t		vpe_db_lpi;
> +	/* VPE resident */
> +	bool			resident;
>  	/* VPE proxy mapping */
>  	int			vpe_proxy_event;
>  	/*
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index 35a069815baf..4e69268621b6 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -321,20 +321,24 @@ void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
>  	/*
>  	 * If we're about to block (most likely because we've just hit a
>  	 * WFI), we need to sync back the state of the GIC CPU interface
> -	 * so that we have the lastest PMR and group enables. This ensures
> +	 * so that we have the latest PMR and group enables. This ensures
>  	 * that kvm_arch_vcpu_runnable has up-to-date data to decide
>  	 * whether we have pending interrupts.
> +	 *
> +	 * For the same reason, we want to tell GICv4 that we need
> +	 * doorbells to be signalled, should an interrupt become pending.

As I understand, and as indicated by removal of kvm_vgic_v4_enable_doorbell
below, we've now abstracted enabling the doorbell behind the concept of a
v4_put.

Why then, do we break that abstraction by adding this comment? Surely we just
want to indicate that we're done with ITS for now - do whatever you need to do.

This would have made more sense to me if the comment above was removed in this
patch rather than added.

>  	 */
>  	preempt_disable();
>  	kvm_vgic_vmcr_sync(vcpu);
> +	vgic_v4_put(vcpu, true);
>  	preempt_enable();
> -
> -	kvm_vgic_v4_enable_doorbell(vcpu);
>  }
>  
>  void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
>  {
> -	kvm_vgic_v4_disable_doorbell(vcpu);
> +	preempt_disable();
> +	vgic_v4_load(vcpu);
> +	preempt_enable();
>  }
>  
>  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
> index 8d69f007dd0c..48307a9eb1d8 100644
> --- a/virt/kvm/arm/vgic/vgic-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-v3.c
> @@ -664,6 +664,8 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
>  
>  	if (has_vhe())
>  		__vgic_v3_activate_traps(vcpu);
> +
> +	WARN_ON(vgic_v4_load(vcpu));
>  }
>  
>  void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
> @@ -676,6 +678,8 @@ void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
>  
>  void vgic_v3_put(struct kvm_vcpu *vcpu)
>  {
> +	WARN_ON(vgic_v4_put(vcpu, false));
> +
>  	vgic_v3_vmcr_sync(vcpu);
>  
>  	kvm_call_hyp(__vgic_v3_save_aprs, vcpu);
> diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
> index 477af6aebb97..3a8a28854b13 100644
> --- a/virt/kvm/arm/vgic/vgic-v4.c
> +++ b/virt/kvm/arm/vgic/vgic-v4.c
> @@ -85,6 +85,10 @@ static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
>  {
>  	struct kvm_vcpu *vcpu = info;
>  
> +	/* We got the message, no need to fire again */
> +	if (!irqd_irq_disabled(&irq_to_desc(irq)->irq_data))
> +		disable_irq_nosync(irq);
> +
>  	vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
>  	kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
>  	kvm_vcpu_kick(vcpu);

This is because the doorbell will fire each time any guest device interrupts,
however we only need to tell the guest just once that something has happened
right?

> @@ -192,20 +196,30 @@ void vgic_v4_teardown(struct kvm *kvm)
>  	its_vm->vpes = NULL;
>  }
>  
> -int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu)
> +int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db)
>  {
> -	if (!vgic_supports_direct_msis(vcpu->kvm))
> +	struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
> +	struct irq_desc *desc = irq_to_desc(vpe->irq);
> +
> +	if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident)
>  		return 0;

Are we using !vpe->resident to avoid pointlessly doing work we've
already done?

>  
> -	return its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, false);
> +	/*
> +	 * If blocking, a doorbell is required. Undo the nested
> +	 * disable_irq() calls...
> +	 */
> +	while (need_db && irqd_irq_disabled(&desc->irq_data))
> +		enable_irq(vpe->irq);
> +
> +	return its_schedule_vpe(vpe, false);
>  }
>  
> -int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
> +int vgic_v4_load(struct kvm_vcpu *vcpu)
>  {
> -	int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
> +	struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
>  	int err;
>  
> -	if (!vgic_supports_direct_msis(vcpu->kvm))
> +	if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident)
>  		return 0;
>  
>  	/*
> @@ -214,11 +228,14 @@ int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
>  	 * doc in drivers/irqchip/irq-gic-v4.c to understand how this
>  	 * turns into a VMOVP command at the ITS level.
>  	 */
> -	err = irq_set_affinity(irq, cpumask_of(smp_processor_id()));
> +	err = irq_set_affinity(vpe->irq, cpumask_of(smp_processor_id()));
>  	if (err)
>  		return err;
>  
> -	err = its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, true);
> +	/* Disabled the doorbell, as we're about to enter the guest */
> +	disable_irq(vpe->irq);
> +
> +	err = its_schedule_vpe(vpe, true);
>  	if (err)
>  		return err;

Given that the doorbell corresponds with vpe residency, it could make sense
to add a helper here that calls its_schedule_vpe and [disable,enable]_irq.
Though I see that vgic_v3_put calls vgic_v4_put with need_db=false. I wonder
what effect setting that to true would be for vgic_v3_put? Could it be known
that v3 won't set need_db to true?

Thanks,

Andrew Murray

>  
> @@ -226,9 +243,7 @@ int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
>  	 * Now that the VPE is resident, let's get rid of a potential
>  	 * doorbell interrupt that would still be pending.
>  	 */
> -	err = irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, false);
> -
> -	return err;
> +	return irq_set_irqchip_state(vpe->irq, IRQCHIP_STATE_PENDING, false);
>  }
>  
>  static struct vgic_its *vgic_get_its(struct kvm *kvm,
> @@ -335,21 +350,3 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq,
>  	mutex_unlock(&its->its_lock);
>  	return ret;
>  }
> -
> -void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu)
> -{
> -	if (vgic_supports_direct_msis(vcpu->kvm)) {
> -		int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
> -		if (irq)
> -			enable_irq(irq);
> -	}
> -}
> -
> -void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu)
> -{
> -	if (vgic_supports_direct_msis(vcpu->kvm)) {
> -		int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
> -		if (irq)
> -			disable_irq(irq);
> -	}
> -}
> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
> index 45a870cb63f5..99b02ca730a8 100644
> --- a/virt/kvm/arm/vgic/vgic.c
> +++ b/virt/kvm/arm/vgic/vgic.c
> @@ -857,8 +857,6 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
>  {
>  	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
>  
> -	WARN_ON(vgic_v4_sync_hwstate(vcpu));
> -
>  	/* An empty ap_list_head implies used_lrs == 0 */
>  	if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
>  		return;
> @@ -882,8 +880,6 @@ static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
>  /* Flush our emulation state into the GIC hardware before entering the guest. */
>  void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
>  {
> -	WARN_ON(vgic_v4_flush_hwstate(vcpu));
> -
>  	/*
>  	 * If there are no virtual interrupts active or pending for this
>  	 * VCPU, then there is no work to do and we can bail out without
> diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
> index 83066a81b16a..c7fefd6b1c80 100644
> --- a/virt/kvm/arm/vgic/vgic.h
> +++ b/virt/kvm/arm/vgic/vgic.h
> @@ -316,7 +316,5 @@ void vgic_its_invalidate_cache(struct kvm *kvm);
>  bool vgic_supports_direct_msis(struct kvm *kvm);
>  int vgic_v4_init(struct kvm *kvm);
>  void vgic_v4_teardown(struct kvm *kvm);
> -int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu);
> -int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu);
>  
>  #endif
> -- 
> 2.20.1
>
Marc Zyngier Sept. 5, 2019, 1:26 p.m. UTC | #2
On 05/09/2019 14:04, Andrew Murray wrote:
> Hi Marc,
> 
> Some feedback below, but mostly questions to aid my understanding...
> 
> On Tue, Sep 03, 2019 at 04:57:47PM +0100, Marc Zyngier wrote:
>> When the VHE code was reworked, a lot of the vgic stuff was moved around,
>> but the GICv4 residency code did stay untouched, meaning that we come
>> in and out of residency on each flush/sync, which is obviously suboptimal.
>>
>> To address this, let's move things around a bit:
>>
>> - Residency entry (flush) moves to vcpu_load
>> - Residency exit (sync) moves to vcpu_put
>> - On blocking (entry to WFI), we "put"
>> - On unblocking (exit from WFI, we "load"
>>
>> Because these can nest (load/block/put/load/unblock/put, for example),
>> we now have per-VPE tracking of the residency state.
>>
>> Additionally, vgic_v4_put gains a "need doorbell" parameter, which only
>> gets set to true when blocking because of a WFI. This allows a finer
>> control of the doorbell, which now also gets disabled as soon as
>> it gets signaled.
>>
>> Signed-off-by: Marc Zyngier <maz@kernel.org>
>> ---
>>  drivers/irqchip/irq-gic-v4.c       |  7 +++-
>>  include/kvm/arm_vgic.h             |  4 +--
>>  include/linux/irqchip/arm-gic-v4.h |  2 ++
>>  virt/kvm/arm/arm.c                 | 12 ++++---
>>  virt/kvm/arm/vgic/vgic-v3.c        |  4 +++
>>  virt/kvm/arm/vgic/vgic-v4.c        | 55 ++++++++++++++----------------
>>  virt/kvm/arm/vgic/vgic.c           |  4 ---
>>  virt/kvm/arm/vgic/vgic.h           |  2 --
>>  8 files changed, 48 insertions(+), 42 deletions(-)
>>
>> diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c
>> index 563e87ed0766..45969927cc81 100644
>> --- a/drivers/irqchip/irq-gic-v4.c
>> +++ b/drivers/irqchip/irq-gic-v4.c
>> @@ -141,12 +141,17 @@ static int its_send_vpe_cmd(struct its_vpe *vpe, struct its_cmd_info *info)
>>  int its_schedule_vpe(struct its_vpe *vpe, bool on)
>>  {
>>  	struct its_cmd_info info;
>> +	int ret;
>>  
>>  	WARN_ON(preemptible());
>>  
>>  	info.cmd_type = on ? SCHEDULE_VPE : DESCHEDULE_VPE;
>>  
>> -	return its_send_vpe_cmd(vpe, &info);
>> +	ret = its_send_vpe_cmd(vpe, &info);
>> +	if (!ret)
>> +		vpe->resident = on;
>> +
> 
> We make an assumption here that its_schedule_vpe is the only caller of
> its_send_vpe_cmd where we may pass SCHEDULE_VPE. I guess this is currently
> the case.

It is, and it is intended to stay that way.

> Why do we also set the residency flag for DESCHEDULE_VPE?

We don't.

> And by residency we mean that interrupts are delivered to VM, instead of
> doorbell?

Interrupts are always delivered to the VPE, whether it is resident or
not. Residency is defined as the VPE that is currently programmed in the
redistributor (by virtue of programming the VPROPBASER and VPENDBASER
registers).

> 
>> +	return ret;
>>  }
>>  
>>  int its_invall_vpe(struct its_vpe *vpe)
>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>> index af4f09c02bf1..4dc58d7a0010 100644
>> --- a/include/kvm/arm_vgic.h
>> +++ b/include/kvm/arm_vgic.h
>> @@ -396,7 +396,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq,
>>  int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
>>  				 struct kvm_kernel_irq_routing_entry *irq_entry);
>>  
>> -void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu);
>> -void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu);
>> +int vgic_v4_load(struct kvm_vcpu *vcpu);
>> +int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db);
>>  
>>  #endif /* __KVM_ARM_VGIC_H */
>> diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h
>> index e6b155713b47..ab1396afe08a 100644
>> --- a/include/linux/irqchip/arm-gic-v4.h
>> +++ b/include/linux/irqchip/arm-gic-v4.h
>> @@ -35,6 +35,8 @@ struct its_vpe {
>>  	/* Doorbell interrupt */
>>  	int			irq;
>>  	irq_hw_number_t		vpe_db_lpi;
>> +	/* VPE resident */
>> +	bool			resident;
>>  	/* VPE proxy mapping */
>>  	int			vpe_proxy_event;
>>  	/*
>> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
>> index 35a069815baf..4e69268621b6 100644
>> --- a/virt/kvm/arm/arm.c
>> +++ b/virt/kvm/arm/arm.c
>> @@ -321,20 +321,24 @@ void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
>>  	/*
>>  	 * If we're about to block (most likely because we've just hit a
>>  	 * WFI), we need to sync back the state of the GIC CPU interface
>> -	 * so that we have the lastest PMR and group enables. This ensures
>> +	 * so that we have the latest PMR and group enables. This ensures
>>  	 * that kvm_arch_vcpu_runnable has up-to-date data to decide
>>  	 * whether we have pending interrupts.
>> +	 *
>> +	 * For the same reason, we want to tell GICv4 that we need
>> +	 * doorbells to be signalled, should an interrupt become pending.
> 
> As I understand, and as indicated by removal of kvm_vgic_v4_enable_doorbell
> below, we've now abstracted enabling the doorbell behind the concept of a
> v4_put.
> 
> Why then, do we break that abstraction by adding this comment? Surely we just
> want to indicate that we're done with ITS for now - do whatever you need to do.

Well, I don't think you can realistically pretend that KVM doesn't know
about the intricacies of GICv4. They are intimately linked.

> This would have made more sense to me if the comment above was removed in this
> patch rather than added.

I disagree. The very reason we to a put on GICv4 is to get a doorbell.
If we didn't need one, we'd just let schedule() do a non
doorbell-generating vcpu_put.

>>  	 */
>>  	preempt_disable();
>>  	kvm_vgic_vmcr_sync(vcpu);
>> +	vgic_v4_put(vcpu, true);
>>  	preempt_enable();
>> -
>> -	kvm_vgic_v4_enable_doorbell(vcpu);
>>  }
>>  
>>  void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
>>  {
>> -	kvm_vgic_v4_disable_doorbell(vcpu);
>> +	preempt_disable();
>> +	vgic_v4_load(vcpu);
>> +	preempt_enable();
>>  }
>>  
>>  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>> diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
>> index 8d69f007dd0c..48307a9eb1d8 100644
>> --- a/virt/kvm/arm/vgic/vgic-v3.c
>> +++ b/virt/kvm/arm/vgic/vgic-v3.c
>> @@ -664,6 +664,8 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
>>  
>>  	if (has_vhe())
>>  		__vgic_v3_activate_traps(vcpu);
>> +
>> +	WARN_ON(vgic_v4_load(vcpu));
>>  }
>>  
>>  void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
>> @@ -676,6 +678,8 @@ void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
>>  
>>  void vgic_v3_put(struct kvm_vcpu *vcpu)
>>  {
>> +	WARN_ON(vgic_v4_put(vcpu, false));
>> +
>>  	vgic_v3_vmcr_sync(vcpu);
>>  
>>  	kvm_call_hyp(__vgic_v3_save_aprs, vcpu);
>> diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
>> index 477af6aebb97..3a8a28854b13 100644
>> --- a/virt/kvm/arm/vgic/vgic-v4.c
>> +++ b/virt/kvm/arm/vgic/vgic-v4.c
>> @@ -85,6 +85,10 @@ static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
>>  {
>>  	struct kvm_vcpu *vcpu = info;
>>  
>> +	/* We got the message, no need to fire again */
>> +	if (!irqd_irq_disabled(&irq_to_desc(irq)->irq_data))
>> +		disable_irq_nosync(irq);
>> +
>>  	vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
>>  	kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
>>  	kvm_vcpu_kick(vcpu);
> 
> This is because the doorbell will fire each time any guest device interrupts,
> however we only need to tell the guest just once that something has happened
> right?

Not for any guest interrupt. Only for VLPIs. And yes, there is no need
to get multiple doorbells. Once you got one, you know you're runnable
and don't need to be told another 50k times...

> 
>> @@ -192,20 +196,30 @@ void vgic_v4_teardown(struct kvm *kvm)
>>  	its_vm->vpes = NULL;
>>  }
>>  
>> -int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu)
>> +int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db)
>>  {
>> -	if (!vgic_supports_direct_msis(vcpu->kvm))
>> +	struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
>> +	struct irq_desc *desc = irq_to_desc(vpe->irq);
>> +
>> +	if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident)
>>  		return 0;
> 
> Are we using !vpe->resident to avoid pointlessly doing work we've
> already done?

And also to avoid corrupting the state that we've saved by re-reading
what could potentially be an invalid state.

> 
>>  
>> -	return its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, false);
>> +	/*
>> +	 * If blocking, a doorbell is required. Undo the nested
>> +	 * disable_irq() calls...
>> +	 */
>> +	while (need_db && irqd_irq_disabled(&desc->irq_data))
>> +		enable_irq(vpe->irq);
>> +
>> +	return its_schedule_vpe(vpe, false);
>>  }
>>  
>> -int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
>> +int vgic_v4_load(struct kvm_vcpu *vcpu)
>>  {
>> -	int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
>> +	struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
>>  	int err;
>>  
>> -	if (!vgic_supports_direct_msis(vcpu->kvm))
>> +	if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident)
>>  		return 0;
>>  
>>  	/*
>> @@ -214,11 +228,14 @@ int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
>>  	 * doc in drivers/irqchip/irq-gic-v4.c to understand how this
>>  	 * turns into a VMOVP command at the ITS level.
>>  	 */
>> -	err = irq_set_affinity(irq, cpumask_of(smp_processor_id()));
>> +	err = irq_set_affinity(vpe->irq, cpumask_of(smp_processor_id()));
>>  	if (err)
>>  		return err;
>>  
>> -	err = its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, true);
>> +	/* Disabled the doorbell, as we're about to enter the guest */
>> +	disable_irq(vpe->irq);
>> +
>> +	err = its_schedule_vpe(vpe, true);
>>  	if (err)
>>  		return err;
> 
> Given that the doorbell corresponds with vpe residency, it could make sense
> to add a helper here that calls its_schedule_vpe and [disable,enable]_irq.
> Though I see that vgic_v3_put calls vgic_v4_put with need_db=false. I wonder
> what effect setting that to true would be for vgic_v3_put? Could it be known
> that v3 won't set need_db to true?

There is no doorbells for GICv3.

	M.
Zenghui Yu Sept. 17, 2019, 8:10 a.m. UTC | #3
Hi Marc,

I've run this patch on my box and got the following messages:

---8<

[ 2258.490030] BUG: sleeping function called from invalid context at 
kernel/irq/manage.c:138
[ 2258.490034] in_atomic(): 1, irqs_disabled(): 0, pid: 59278, name: CPU 
0/KVM
[ 2258.490039] CPU: 32 PID: 59278 Comm: CPU 0/KVM Kdump: loaded Tainted: 
G        W         5.3.0+ #26
[ 2258.490041] Hardware name: Huawei TaiShan 2280 /BC11SPCD, BIOS 1.58 
10/29/2018
[ 2258.490043] Call trace:
[ 2258.490056]  dump_backtrace+0x0/0x188
[ 2258.490060]  show_stack+0x24/0x30
[ 2258.490066]  dump_stack+0xb0/0xf4
[ 2258.490072]  ___might_sleep+0x10c/0x130
[ 2258.490074]  __might_sleep+0x58/0x90
[ 2258.490078]  synchronize_irq+0x58/0xd8
[ 2258.490079]  disable_irq+0x2c/0x38
[ 2258.490083]  vgic_v4_load+0x9c/0xc0
[ 2258.490084]  vgic_v3_load+0x94/0x170
[ 2258.490088]  kvm_vgic_load+0x3c/0x60
[ 2258.490092]  kvm_arch_vcpu_load+0xd4/0x1d0
[ 2258.490095]  vcpu_load+0x50/0x70
[ 2258.490097]  kvm_arch_vcpu_ioctl_run+0x94/0x978
[ 2258.490098]  kvm_vcpu_ioctl+0x3d8/0xa28
[ 2258.490104]  do_vfs_ioctl+0xc4/0x8e8
[ 2258.490106]  ksys_ioctl+0x8c/0xa0
[ 2258.490108]  __arm64_sys_ioctl+0x28/0x58
[ 2258.490112]  el0_svc_common.constprop.0+0x7c/0x188
[ 2258.490114]  el0_svc_handler+0x34/0xb8
[ 2258.490117]  el0_svc+0x8/0xc
[ 2259.497070] BUG: sleeping function called from invalid context at 
kernel/irq/manage.c:138
[ 2259.497077] in_atomic(): 1, irqs_disabled(): 0, pid: 59278, name: CPU 
0/KVM
[ 2259.497082] CPU: 33 PID: 59278 Comm: CPU 0/KVM Kdump: loaded Tainted: 
G        W         5.3.0+ #26
[ 2259.497083] Hardware name: Huawei TaiShan 2280 /BC11SPCD, BIOS 1.58 
10/29/2018
[ 2259.497086] Call trace:
[ 2259.497098]  dump_backtrace+0x0/0x188
[ 2259.497101]  show_stack+0x24/0x30
[ 2259.497109]  dump_stack+0xb0/0xf4
[ 2259.497115]  ___might_sleep+0x10c/0x130
[ 2259.497117]  __might_sleep+0x58/0x90
[ 2259.497120]  synchronize_irq+0x58/0xd8
[ 2259.497122]  disable_irq+0x2c/0x38
[ 2259.497126]  vgic_v4_load+0x9c/0xc0
[ 2259.497127]  vgic_v3_load+0x94/0x170
[ 2259.497130]  kvm_vgic_load+0x3c/0x60
[ 2259.497134]  kvm_arch_vcpu_load+0xd4/0x1d0
[ 2259.497137]  kvm_sched_in+0x30/0x40
[ 2259.497139]  finish_task_switch+0x134/0x258
[ 2259.497142]  __schedule+0x33c/0x780
[ 2259.497144]  schedule+0x48/0xd8
[ 2259.497147]  kvm_vcpu_block+0xb8/0x390
[ 2259.497148]  kvm_handle_wfx+0xa0/0x230
[ 2259.497150]  handle_exit+0x14c/0x1c8
[ 2259.497152]  kvm_arch_vcpu_ioctl_run+0x354/0x978
[ 2259.497154]  kvm_vcpu_ioctl+0x3d8/0xa28
[ 2259.497161]  do_vfs_ioctl+0xc4/0x8e8
[ 2259.497163]  ksys_ioctl+0x8c/0xa0
[ 2259.497165]  __arm64_sys_ioctl+0x28/0x58
[ 2259.497168]  el0_svc_common.constprop.0+0x7c/0x188
[ 2259.497171]  el0_svc_handler+0x34/0xb8
[ 2259.497175]  el0_svc+0x8/0xc


The logic of disabling the doorbell interrupt in vgic_v4_load() might
need a fix?


Thanks,
zenghui

On 2019/9/3 23:57, Marc Zyngier wrote:
> When the VHE code was reworked, a lot of the vgic stuff was moved around,
> but the GICv4 residency code did stay untouched, meaning that we come
> in and out of residency on each flush/sync, which is obviously suboptimal.
> 
> To address this, let's move things around a bit:
> 
> - Residency entry (flush) moves to vcpu_load
> - Residency exit (sync) moves to vcpu_put
> - On blocking (entry to WFI), we "put"
> - On unblocking (exit from WFI, we "load"
> 
> Because these can nest (load/block/put/load/unblock/put, for example),
> we now have per-VPE tracking of the residency state.
> 
> Additionally, vgic_v4_put gains a "need doorbell" parameter, which only
> gets set to true when blocking because of a WFI. This allows a finer
> control of the doorbell, which now also gets disabled as soon as
> it gets signaled.
> 
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---
>   drivers/irqchip/irq-gic-v4.c       |  7 +++-
>   include/kvm/arm_vgic.h             |  4 +--
>   include/linux/irqchip/arm-gic-v4.h |  2 ++
>   virt/kvm/arm/arm.c                 | 12 ++++---
>   virt/kvm/arm/vgic/vgic-v3.c        |  4 +++
>   virt/kvm/arm/vgic/vgic-v4.c        | 55 ++++++++++++++----------------
>   virt/kvm/arm/vgic/vgic.c           |  4 ---
>   virt/kvm/arm/vgic/vgic.h           |  2 --
>   8 files changed, 48 insertions(+), 42 deletions(-)
> 
> diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c
> index 563e87ed0766..45969927cc81 100644
> --- a/drivers/irqchip/irq-gic-v4.c
> +++ b/drivers/irqchip/irq-gic-v4.c
> @@ -141,12 +141,17 @@ static int its_send_vpe_cmd(struct its_vpe *vpe, struct its_cmd_info *info)
>   int its_schedule_vpe(struct its_vpe *vpe, bool on)
>   {
>   	struct its_cmd_info info;
> +	int ret;
>   
>   	WARN_ON(preemptible());
>   
>   	info.cmd_type = on ? SCHEDULE_VPE : DESCHEDULE_VPE;
>   
> -	return its_send_vpe_cmd(vpe, &info);
> +	ret = its_send_vpe_cmd(vpe, &info);
> +	if (!ret)
> +		vpe->resident = on;
> +
> +	return ret;
>   }
>   
>   int its_invall_vpe(struct its_vpe *vpe)
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index af4f09c02bf1..4dc58d7a0010 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -396,7 +396,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq,
>   int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
>   				 struct kvm_kernel_irq_routing_entry *irq_entry);
>   
> -void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu);
> -void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu);
> +int vgic_v4_load(struct kvm_vcpu *vcpu);
> +int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db);
>   
>   #endif /* __KVM_ARM_VGIC_H */
> diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h
> index e6b155713b47..ab1396afe08a 100644
> --- a/include/linux/irqchip/arm-gic-v4.h
> +++ b/include/linux/irqchip/arm-gic-v4.h
> @@ -35,6 +35,8 @@ struct its_vpe {
>   	/* Doorbell interrupt */
>   	int			irq;
>   	irq_hw_number_t		vpe_db_lpi;
> +	/* VPE resident */
> +	bool			resident;
>   	/* VPE proxy mapping */
>   	int			vpe_proxy_event;
>   	/*
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index 35a069815baf..4e69268621b6 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -321,20 +321,24 @@ void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
>   	/*
>   	 * If we're about to block (most likely because we've just hit a
>   	 * WFI), we need to sync back the state of the GIC CPU interface
> -	 * so that we have the lastest PMR and group enables. This ensures
> +	 * so that we have the latest PMR and group enables. This ensures
>   	 * that kvm_arch_vcpu_runnable has up-to-date data to decide
>   	 * whether we have pending interrupts.
> +	 *
> +	 * For the same reason, we want to tell GICv4 that we need
> +	 * doorbells to be signalled, should an interrupt become pending.
>   	 */
>   	preempt_disable();
>   	kvm_vgic_vmcr_sync(vcpu);
> +	vgic_v4_put(vcpu, true);
>   	preempt_enable();
> -
> -	kvm_vgic_v4_enable_doorbell(vcpu);
>   }
>   
>   void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
>   {
> -	kvm_vgic_v4_disable_doorbell(vcpu);
> +	preempt_disable();
> +	vgic_v4_load(vcpu);
> +	preempt_enable();
>   }
>   
>   int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
> index 8d69f007dd0c..48307a9eb1d8 100644
> --- a/virt/kvm/arm/vgic/vgic-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-v3.c
> @@ -664,6 +664,8 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
>   
>   	if (has_vhe())
>   		__vgic_v3_activate_traps(vcpu);
> +
> +	WARN_ON(vgic_v4_load(vcpu));
>   }
>   
>   void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
> @@ -676,6 +678,8 @@ void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
>   
>   void vgic_v3_put(struct kvm_vcpu *vcpu)
>   {
> +	WARN_ON(vgic_v4_put(vcpu, false));
> +
>   	vgic_v3_vmcr_sync(vcpu);
>   
>   	kvm_call_hyp(__vgic_v3_save_aprs, vcpu);
> diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
> index 477af6aebb97..3a8a28854b13 100644
> --- a/virt/kvm/arm/vgic/vgic-v4.c
> +++ b/virt/kvm/arm/vgic/vgic-v4.c
> @@ -85,6 +85,10 @@ static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
>   {
>   	struct kvm_vcpu *vcpu = info;
>   
> +	/* We got the message, no need to fire again */
> +	if (!irqd_irq_disabled(&irq_to_desc(irq)->irq_data))
> +		disable_irq_nosync(irq);
> +
>   	vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
>   	kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
>   	kvm_vcpu_kick(vcpu);
> @@ -192,20 +196,30 @@ void vgic_v4_teardown(struct kvm *kvm)
>   	its_vm->vpes = NULL;
>   }
>   
> -int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu)
> +int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db)
>   {
> -	if (!vgic_supports_direct_msis(vcpu->kvm))
> +	struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
> +	struct irq_desc *desc = irq_to_desc(vpe->irq);
> +
> +	if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident)
>   		return 0;
>   
> -	return its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, false);
> +	/*
> +	 * If blocking, a doorbell is required. Undo the nested
> +	 * disable_irq() calls...
> +	 */
> +	while (need_db && irqd_irq_disabled(&desc->irq_data))
> +		enable_irq(vpe->irq);
> +
> +	return its_schedule_vpe(vpe, false);
>   }
>   
> -int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
> +int vgic_v4_load(struct kvm_vcpu *vcpu)
>   {
> -	int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
> +	struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
>   	int err;
>   
> -	if (!vgic_supports_direct_msis(vcpu->kvm))
> +	if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident)
>   		return 0;
>   
>   	/*
> @@ -214,11 +228,14 @@ int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
>   	 * doc in drivers/irqchip/irq-gic-v4.c to understand how this
>   	 * turns into a VMOVP command at the ITS level.
>   	 */
> -	err = irq_set_affinity(irq, cpumask_of(smp_processor_id()));
> +	err = irq_set_affinity(vpe->irq, cpumask_of(smp_processor_id()));
>   	if (err)
>   		return err;
>   
> -	err = its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, true);
> +	/* Disabled the doorbell, as we're about to enter the guest */
> +	disable_irq(vpe->irq);
> +
> +	err = its_schedule_vpe(vpe, true);
>   	if (err)
>   		return err;
>   
> @@ -226,9 +243,7 @@ int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
>   	 * Now that the VPE is resident, let's get rid of a potential
>   	 * doorbell interrupt that would still be pending.
>   	 */
> -	err = irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, false);
> -
> -	return err;
> +	return irq_set_irqchip_state(vpe->irq, IRQCHIP_STATE_PENDING, false);
>   }
>   
>   static struct vgic_its *vgic_get_its(struct kvm *kvm,
> @@ -335,21 +350,3 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq,
>   	mutex_unlock(&its->its_lock);
>   	return ret;
>   }
> -
> -void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu)
> -{
> -	if (vgic_supports_direct_msis(vcpu->kvm)) {
> -		int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
> -		if (irq)
> -			enable_irq(irq);
> -	}
> -}
> -
> -void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu)
> -{
> -	if (vgic_supports_direct_msis(vcpu->kvm)) {
> -		int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
> -		if (irq)
> -			disable_irq(irq);
> -	}
> -}
> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
> index 45a870cb63f5..99b02ca730a8 100644
> --- a/virt/kvm/arm/vgic/vgic.c
> +++ b/virt/kvm/arm/vgic/vgic.c
> @@ -857,8 +857,6 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
>   {
>   	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
>   
> -	WARN_ON(vgic_v4_sync_hwstate(vcpu));
> -
>   	/* An empty ap_list_head implies used_lrs == 0 */
>   	if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
>   		return;
> @@ -882,8 +880,6 @@ static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
>   /* Flush our emulation state into the GIC hardware before entering the guest. */
>   void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
>   {
> -	WARN_ON(vgic_v4_flush_hwstate(vcpu));
> -
>   	/*
>   	 * If there are no virtual interrupts active or pending for this
>   	 * VCPU, then there is no work to do and we can bail out without
> diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
> index 83066a81b16a..c7fefd6b1c80 100644
> --- a/virt/kvm/arm/vgic/vgic.h
> +++ b/virt/kvm/arm/vgic/vgic.h
> @@ -316,7 +316,5 @@ void vgic_its_invalidate_cache(struct kvm *kvm);
>   bool vgic_supports_direct_msis(struct kvm *kvm);
>   int vgic_v4_init(struct kvm *kvm);
>   void vgic_v4_teardown(struct kvm *kvm);
> -int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu);
> -int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu);
>   
>   #endif
>
Marc Zyngier Sept. 17, 2019, 8:35 a.m. UTC | #4
Hi Zenghui,

On 17/09/2019 09:10, Zenghui Yu wrote:
> Hi Marc,
> 
> I've run this patch on my box and got the following messages:
> 
> ---8<
> 
> [ 2258.490030] BUG: sleeping function called from invalid context at 
> kernel/irq/manage.c:138
> [ 2258.490034] in_atomic(): 1, irqs_disabled(): 0, pid: 59278, name: CPU 
> 0/KVM
> [ 2258.490039] CPU: 32 PID: 59278 Comm: CPU 0/KVM Kdump: loaded Tainted: 
> G        W         5.3.0+ #26
> [ 2258.490041] Hardware name: Huawei TaiShan 2280 /BC11SPCD, BIOS 1.58 
> 10/29/2018
> [ 2258.490043] Call trace:
> [ 2258.490056]  dump_backtrace+0x0/0x188
> [ 2258.490060]  show_stack+0x24/0x30
> [ 2258.490066]  dump_stack+0xb0/0xf4
> [ 2258.490072]  ___might_sleep+0x10c/0x130
> [ 2258.490074]  __might_sleep+0x58/0x90
> [ 2258.490078]  synchronize_irq+0x58/0xd8
> [ 2258.490079]  disable_irq+0x2c/0x38
> [ 2258.490083]  vgic_v4_load+0x9c/0xc0
> [ 2258.490084]  vgic_v3_load+0x94/0x170
> [ 2258.490088]  kvm_vgic_load+0x3c/0x60
> [ 2258.490092]  kvm_arch_vcpu_load+0xd4/0x1d0
> [ 2258.490095]  vcpu_load+0x50/0x70
> [ 2258.490097]  kvm_arch_vcpu_ioctl_run+0x94/0x978
> [ 2258.490098]  kvm_vcpu_ioctl+0x3d8/0xa28
> [ 2258.490104]  do_vfs_ioctl+0xc4/0x8e8
> [ 2258.490106]  ksys_ioctl+0x8c/0xa0
> [ 2258.490108]  __arm64_sys_ioctl+0x28/0x58
> [ 2258.490112]  el0_svc_common.constprop.0+0x7c/0x188
> [ 2258.490114]  el0_svc_handler+0x34/0xb8
> [ 2258.490117]  el0_svc+0x8/0xc
> [ 2259.497070] BUG: sleeping function called from invalid context at 
> kernel/irq/manage.c:138

Thanks for reporting this.

[...]

> The logic of disabling the doorbell interrupt in vgic_v4_load() might
> need a fix?

The logic itself looks OK, but doing a full blown disable_irq() is both
counter productive (if we race against a doorbell, there is not much we
can do about it and waiting for it to end is pointless) and wrong
(despite the comment that this can be called in IRQ context, it is
pretty unsafe to do so).

Can you try turning it into a disable_irq_nosync() and let me know if
that helps?

Thanks,

	M.
Zenghui Yu Sept. 17, 2019, 9:31 a.m. UTC | #5
On 2019/9/17 16:35, Marc Zyngier wrote:
> Hi Zenghui,
> 
> On 17/09/2019 09:10, Zenghui Yu wrote:
>> Hi Marc,
>>
>> I've run this patch on my box and got the following messages:
>>
>> ---8<
>>
>> [ 2258.490030] BUG: sleeping function called from invalid context at
>> kernel/irq/manage.c:138
>> [ 2258.490034] in_atomic(): 1, irqs_disabled(): 0, pid: 59278, name: CPU
>> 0/KVM
>> [ 2258.490039] CPU: 32 PID: 59278 Comm: CPU 0/KVM Kdump: loaded Tainted:
>> G        W         5.3.0+ #26
>> [ 2258.490041] Hardware name: Huawei TaiShan 2280 /BC11SPCD, BIOS 1.58
>> 10/29/2018
>> [ 2258.490043] Call trace:
>> [ 2258.490056]  dump_backtrace+0x0/0x188
>> [ 2258.490060]  show_stack+0x24/0x30
>> [ 2258.490066]  dump_stack+0xb0/0xf4
>> [ 2258.490072]  ___might_sleep+0x10c/0x130
>> [ 2258.490074]  __might_sleep+0x58/0x90
>> [ 2258.490078]  synchronize_irq+0x58/0xd8
>> [ 2258.490079]  disable_irq+0x2c/0x38
>> [ 2258.490083]  vgic_v4_load+0x9c/0xc0
>> [ 2258.490084]  vgic_v3_load+0x94/0x170
>> [ 2258.490088]  kvm_vgic_load+0x3c/0x60
>> [ 2258.490092]  kvm_arch_vcpu_load+0xd4/0x1d0
>> [ 2258.490095]  vcpu_load+0x50/0x70
>> [ 2258.490097]  kvm_arch_vcpu_ioctl_run+0x94/0x978
>> [ 2258.490098]  kvm_vcpu_ioctl+0x3d8/0xa28
>> [ 2258.490104]  do_vfs_ioctl+0xc4/0x8e8
>> [ 2258.490106]  ksys_ioctl+0x8c/0xa0
>> [ 2258.490108]  __arm64_sys_ioctl+0x28/0x58
>> [ 2258.490112]  el0_svc_common.constprop.0+0x7c/0x188
>> [ 2258.490114]  el0_svc_handler+0x34/0xb8
>> [ 2258.490117]  el0_svc+0x8/0xc
>> [ 2259.497070] BUG: sleeping function called from invalid context at
>> kernel/irq/manage.c:138
> 
> Thanks for reporting this.
> 
> [...]
> 
>> The logic of disabling the doorbell interrupt in vgic_v4_load() might
>> need a fix?
> 
> The logic itself looks OK, but doing a full blown disable_irq() is both
> counter productive (if we race against a doorbell, there is not much we
> can do about it and waiting for it to end is pointless) and wrong
> (despite the comment that this can be called in IRQ context, it is
> pretty unsafe to do so).
> 
> Can you try turning it into a disable_irq_nosync() and let me know if
> that helps?

Yes, the above BUG messages disappear with disable_irq_nosync().

But this time I got the following WARNING:


[  921.004322] ======================================================
[  921.010489] WARNING: possible circular locking dependency detected
[  921.016657] 5.3.0+ #27 Not tainted
[  921.020132] ------------------------------------------------------
[  921.026299] CPU 1/KVM/816 is trying to acquire lock:
[  921.031250] ffff002fb42b35b0 (&irq_desc_lock_class){-.-.}, at: 
__irq_get_desc_lock+0x60/0xa0
[  921.039684]
                but task is already holding lock:
[  921.045503] ffff002fbbb07258 (&rq->lock){-.-.}, at: __schedule+0xd4/0x988
[  921.052283]
                which lock already depends on the new lock.

[  921.060445]
                the existing dependency chain (in reverse order) is:
[  921.067913]
                -> #3 (&rq->lock){-.-.}:
[  921.072955]        lock_acquire+0xd4/0x268
[  921.077041]        _raw_spin_lock+0x44/0x58
[  921.081212]        task_fork_fair+0x54/0x160
[  921.085471]        sched_fork+0xfc/0x238
[  921.089383]        copy_process+0x474/0x1738
[  921.093639]        _do_fork+0x70/0x6e0
[  921.097376]        kernel_thread+0x70/0x98
[  921.101459]        rest_init+0x34/0x278
[  921.105286]        arch_call_rest_init+0x14/0x1c
[  921.109891]        start_kernel+0x548/0x574
[  921.114060]
                -> #2 (&p->pi_lock){-.-.}:
[  921.119275]        lock_acquire+0xd4/0x268
[  921.123360]        _raw_spin_lock_irqsave+0x60/0x80
[  921.128225]        try_to_wake_up+0x60/0xbf0
[  921.132483]        wake_up_process+0x28/0x38
[  921.136739]        __up.isra.0+0x58/0x68
[  921.140649]        up+0x64/0x80
[  921.143777]        __up_console_sem+0x60/0xa8
[  921.148121]        console_unlock+0x31c/0x5f0
[  921.152465]        vprintk_emit+0x28c/0x438
[  921.156637]        dev_vprintk_emit+0x1d8/0x218
[  921.161157]        dev_printk_emit+0x84/0xa8
[  921.165414]        __dev_printk+0x78/0xa0
[  921.169411]        _dev_info+0x7c/0xa0
[  921.173148]        hub_port_init+0xa5c/0xb68
[  921.177405]        hub_port_connect+0x2f0/0xa08
[  921.181923]        port_event+0x548/0x828
[  921.185920]        hub_event+0x20c/0x418
[  921.189831]        process_one_work+0x24c/0x700
[  921.194349]        worker_thread+0x4c/0x448
[  921.198519]        kthread+0x130/0x138
[  921.202256]        ret_from_fork+0x10/0x18
[  921.206338]
                -> #1 ((console_sem).lock){-.-.}:
[  921.212160]        lock_acquire+0xd4/0x268
[  921.216244]        _raw_spin_lock_irqsave+0x60/0x80
[  921.221110]        down_trylock+0x20/0x50
[  921.225106]        __down_trylock_console_sem+0x50/0xe0
[  921.230320]        console_trylock+0x20/0x88
[  921.234577]        vprintk_emit+0x18c/0x438
[  921.238747]        vprintk_default+0x54/0x90
[  921.243004]        vprintk_func+0xe4/0x268
[  921.247087]        printk+0x74/0x94
[  921.250564]        show_interrupts+0x4dc/0x4f8
[  921.254997]        seq_read+0x2b4/0x4e0
[  921.258820]        proc_reg_read+0x94/0xe8
[  921.262905]        __vfs_read+0x48/0x80
[  921.266729]        vfs_read+0xa0/0x160
[  921.270465]        ksys_read+0x74/0xf8
[  921.274202]        __arm64_sys_read+0x24/0x30
[  921.278547]        el0_svc_common.constprop.0+0x80/0x1b8
[  921.283846]        el0_svc_handler+0x34/0xb8
[  921.288102]        el0_svc+0x8/0xc
[  921.291491]
                -> #0 (&irq_desc_lock_class){-.-.}:
[  921.297486]        check_prev_add+0xac/0x9f8
[  921.301743]        __lock_acquire+0x1164/0x12b8
[  921.306260]        lock_acquire+0xd4/0x268
[  921.310344]        _raw_spin_lock_irqsave+0x60/0x80
[  921.315209]        __irq_get_desc_lock+0x60/0xa0
[  921.319814]        irq_set_vcpu_affinity+0x48/0xc8
[  921.324592]        its_schedule_vpe+0x68/0xb0
[  921.328937]        vgic_v4_put+0x80/0xa8
[  921.332846]        vgic_v3_put+0x24/0xf0
[  921.336756]        kvm_vgic_put+0x3c/0x60
[  921.340754]        kvm_arch_vcpu_put+0x38/0x60
[  921.345184]        kvm_sched_out+0x38/0x48
[  921.349267]        __schedule+0x5a4/0x988
[  921.353263]        schedule+0x40/0xc8
[  921.356912]        kvm_arch_vcpu_ioctl_run+0x130/0xb08
[  921.362037]        kvm_vcpu_ioctl+0x3e0/0xb08
[  921.366381]        do_vfs_ioctl+0xc4/0x890
[  921.370464]        ksys_ioctl+0x8c/0xa0
[  921.374287]        __arm64_sys_ioctl+0x28/0x38
[  921.378717]        el0_svc_common.constprop.0+0x80/0x1b8
[  921.384016]        el0_svc_handler+0x34/0xb8
[  921.388272]        el0_svc+0x8/0xc
[  921.391660]
                other info that might help us debug this:

[  921.399649] Chain exists of:
                  &irq_desc_lock_class --> &p->pi_lock --> &rq->lock

[  921.409984]  Possible unsafe locking scenario:

[  921.415889]        CPU0                    CPU1
[  921.420405]        ----                    ----
[  921.424921]   lock(&rq->lock);
[  921.427962]                                lock(&p->pi_lock);
[  921.433694]                                lock(&rq->lock);
[  921.439253]   lock(&irq_desc_lock_class);
[  921.443249]
                 *** DEADLOCK ***

[  921.449155] 2 locks held by CPU 1/KVM/816:
[  921.453237]  #0: ffff002fa3862aa8 (&vcpu->mutex){+.+.}, at: 
kvm_vcpu_ioctl+0x80/0xb08
[  921.461055]  #1: ffff002fbbb07258 (&rq->lock){-.-.}, at: 
__schedule+0xd4/0x988
[  921.468265]
                stack backtrace:
[  921.472610] CPU: 24 PID: 816 Comm: CPU 1/KVM Kdump: loaded Not 
tainted 5.3.0+ #27
[  921.480165] Hardware name: Huawei TaiShan 2280 /BC11SPCD, BIOS 1.58 
10/29/2018
[  921.487372] Call trace:
[  921.489806]  dump_backtrace+0x0/0x188
[  921.493455]  show_stack+0x24/0x30
[  921.496757]  dump_stack+0xcc/0x134
[  921.500146]  print_circular_bug.isra.20+0x204/0x2d8
[  921.505011]  check_noncircular+0x130/0x1c0
[  921.509094]  check_prev_add+0xac/0x9f8
[  921.512829]  __lock_acquire+0x1164/0x12b8
[  921.516825]  lock_acquire+0xd4/0x268
[  921.520388]  _raw_spin_lock_irqsave+0x60/0x80
[  921.524732]  __irq_get_desc_lock+0x60/0xa0
[  921.528815]  irq_set_vcpu_affinity+0x48/0xc8
[  921.533071]  its_schedule_vpe+0x68/0xb0
[  921.536894]  vgic_v4_put+0x80/0xa8
[  921.540282]  vgic_v3_put+0x24/0xf0
[  921.543671]  kvm_vgic_put+0x3c/0x60
[  921.547147]  kvm_arch_vcpu_put+0x38/0x60
[  921.551057]  kvm_sched_out+0x38/0x48
[  921.554618]  __schedule+0x5a4/0x988
[  921.558094]  schedule+0x40/0xc8
[  921.561222]  kvm_arch_vcpu_ioctl_run+0x130/0xb08
[  921.565826]  kvm_vcpu_ioctl+0x3e0/0xb08
[  921.569649]  do_vfs_ioctl+0xc4/0x890
[  921.573211]  ksys_ioctl+0x8c/0xa0
[  921.576513]  __arm64_sys_ioctl+0x28/0x38
[  921.580423]  el0_svc_common.constprop.0+0x80/0x1b8
[  921.585201]  el0_svc_handler+0x34/0xb8
[  921.588937]  el0_svc+0x8/0xc



Thanks,
zenghui
Zenghui Yu Sept. 17, 2019, 10:17 a.m. UTC | #6
Hi Marc,

On 2019/9/17 17:31, Zenghui Yu wrote:
> 
> But this time I got the following WARNING:

Please ignore it. I think this is mostly caused by my local buggy
patch... Sorry for the noise.


Zenghui

> 
> [  921.004322] ======================================================
> [  921.010489] WARNING: possible circular locking dependency detected
> [  921.016657] 5.3.0+ #27 Not tainted
> [  921.020132] ------------------------------------------------------
> [  921.026299] CPU 1/KVM/816 is trying to acquire lock:
> [  921.031250] ffff002fb42b35b0 (&irq_desc_lock_class){-.-.}, at: 
> __irq_get_desc_lock+0x60/0xa0
> [  921.039684]
>                 but task is already holding lock:
> [  921.045503] ffff002fbbb07258 (&rq->lock){-.-.}, at: 
> __schedule+0xd4/0x988
> [  921.052283]
>                 which lock already depends on the new lock.
> 
> [  921.060445]
>                 the existing dependency chain (in reverse order) is:
> [  921.067913]
>                 -> #3 (&rq->lock){-.-.}:
> [  921.072955]        lock_acquire+0xd4/0x268
> [  921.077041]        _raw_spin_lock+0x44/0x58
> [  921.081212]        task_fork_fair+0x54/0x160
> [  921.085471]        sched_fork+0xfc/0x238
> [  921.089383]        copy_process+0x474/0x1738
> [  921.093639]        _do_fork+0x70/0x6e0
> [  921.097376]        kernel_thread+0x70/0x98
> [  921.101459]        rest_init+0x34/0x278
> [  921.105286]        arch_call_rest_init+0x14/0x1c
> [  921.109891]        start_kernel+0x548/0x574
> [  921.114060]
>                 -> #2 (&p->pi_lock){-.-.}:
> [  921.119275]        lock_acquire+0xd4/0x268
> [  921.123360]        _raw_spin_lock_irqsave+0x60/0x80
> [  921.128225]        try_to_wake_up+0x60/0xbf0
> [  921.132483]        wake_up_process+0x28/0x38
> [  921.136739]        __up.isra.0+0x58/0x68
> [  921.140649]        up+0x64/0x80
> [  921.143777]        __up_console_sem+0x60/0xa8
> [  921.148121]        console_unlock+0x31c/0x5f0
> [  921.152465]        vprintk_emit+0x28c/0x438
> [  921.156637]        dev_vprintk_emit+0x1d8/0x218
> [  921.161157]        dev_printk_emit+0x84/0xa8
> [  921.165414]        __dev_printk+0x78/0xa0
> [  921.169411]        _dev_info+0x7c/0xa0
> [  921.173148]        hub_port_init+0xa5c/0xb68
> [  921.177405]        hub_port_connect+0x2f0/0xa08
> [  921.181923]        port_event+0x548/0x828
> [  921.185920]        hub_event+0x20c/0x418
> [  921.189831]        process_one_work+0x24c/0x700
> [  921.194349]        worker_thread+0x4c/0x448
> [  921.198519]        kthread+0x130/0x138
> [  921.202256]        ret_from_fork+0x10/0x18
> [  921.206338]
>                 -> #1 ((console_sem).lock){-.-.}:
> [  921.212160]        lock_acquire+0xd4/0x268
> [  921.216244]        _raw_spin_lock_irqsave+0x60/0x80
> [  921.221110]        down_trylock+0x20/0x50
> [  921.225106]        __down_trylock_console_sem+0x50/0xe0
> [  921.230320]        console_trylock+0x20/0x88
> [  921.234577]        vprintk_emit+0x18c/0x438
> [  921.238747]        vprintk_default+0x54/0x90
> [  921.243004]        vprintk_func+0xe4/0x268
> [  921.247087]        printk+0x74/0x94
> [  921.250564]        show_interrupts+0x4dc/0x4f8
> [  921.254997]        seq_read+0x2b4/0x4e0
> [  921.258820]        proc_reg_read+0x94/0xe8
> [  921.262905]        __vfs_read+0x48/0x80
> [  921.266729]        vfs_read+0xa0/0x160
> [  921.270465]        ksys_read+0x74/0xf8
> [  921.274202]        __arm64_sys_read+0x24/0x30
> [  921.278547]        el0_svc_common.constprop.0+0x80/0x1b8
> [  921.283846]        el0_svc_handler+0x34/0xb8
> [  921.288102]        el0_svc+0x8/0xc
> [  921.291491]
>                 -> #0 (&irq_desc_lock_class){-.-.}:
> [  921.297486]        check_prev_add+0xac/0x9f8
> [  921.301743]        __lock_acquire+0x1164/0x12b8
> [  921.306260]        lock_acquire+0xd4/0x268
> [  921.310344]        _raw_spin_lock_irqsave+0x60/0x80
> [  921.315209]        __irq_get_desc_lock+0x60/0xa0
> [  921.319814]        irq_set_vcpu_affinity+0x48/0xc8
> [  921.324592]        its_schedule_vpe+0x68/0xb0
> [  921.328937]        vgic_v4_put+0x80/0xa8
> [  921.332846]        vgic_v3_put+0x24/0xf0
> [  921.336756]        kvm_vgic_put+0x3c/0x60
> [  921.340754]        kvm_arch_vcpu_put+0x38/0x60
> [  921.345184]        kvm_sched_out+0x38/0x48
> [  921.349267]        __schedule+0x5a4/0x988
> [  921.353263]        schedule+0x40/0xc8
> [  921.356912]        kvm_arch_vcpu_ioctl_run+0x130/0xb08
> [  921.362037]        kvm_vcpu_ioctl+0x3e0/0xb08
> [  921.366381]        do_vfs_ioctl+0xc4/0x890
> [  921.370464]        ksys_ioctl+0x8c/0xa0
> [  921.374287]        __arm64_sys_ioctl+0x28/0x38
> [  921.378717]        el0_svc_common.constprop.0+0x80/0x1b8
> [  921.384016]        el0_svc_handler+0x34/0xb8
> [  921.388272]        el0_svc+0x8/0xc
> [  921.391660]
>                 other info that might help us debug this:
> 
> [  921.399649] Chain exists of:
>                   &irq_desc_lock_class --> &p->pi_lock --> &rq->lock
> 
> [  921.409984]  Possible unsafe locking scenario:
> 
> [  921.415889]        CPU0                    CPU1
> [  921.420405]        ----                    ----
> [  921.424921]   lock(&rq->lock);
> [  921.427962]                                lock(&p->pi_lock);
> [  921.433694]                                lock(&rq->lock);
> [  921.439253]   lock(&irq_desc_lock_class);
> [  921.443249]
>                  *** DEADLOCK ***
> 
> [  921.449155] 2 locks held by CPU 1/KVM/816:
> [  921.453237]  #0: ffff002fa3862aa8 (&vcpu->mutex){+.+.}, at: 
> kvm_vcpu_ioctl+0x80/0xb08
> [  921.461055]  #1: ffff002fbbb07258 (&rq->lock){-.-.}, at: 
> __schedule+0xd4/0x988
> [  921.468265]
>                 stack backtrace:
> [  921.472610] CPU: 24 PID: 816 Comm: CPU 1/KVM Kdump: loaded Not 
> tainted 5.3.0+ #27
> [  921.480165] Hardware name: Huawei TaiShan 2280 /BC11SPCD, BIOS 1.58 
> 10/29/2018
> [  921.487372] Call trace:
> [  921.489806]  dump_backtrace+0x0/0x188
> [  921.493455]  show_stack+0x24/0x30
> [  921.496757]  dump_stack+0xcc/0x134
> [  921.500146]  print_circular_bug.isra.20+0x204/0x2d8
> [  921.505011]  check_noncircular+0x130/0x1c0
> [  921.509094]  check_prev_add+0xac/0x9f8
> [  921.512829]  __lock_acquire+0x1164/0x12b8
> [  921.516825]  lock_acquire+0xd4/0x268
> [  921.520388]  _raw_spin_lock_irqsave+0x60/0x80
> [  921.524732]  __irq_get_desc_lock+0x60/0xa0
> [  921.528815]  irq_set_vcpu_affinity+0x48/0xc8
> [  921.533071]  its_schedule_vpe+0x68/0xb0
> [  921.536894]  vgic_v4_put+0x80/0xa8
> [  921.540282]  vgic_v3_put+0x24/0xf0
> [  921.543671]  kvm_vgic_put+0x3c/0x60
> [  921.547147]  kvm_arch_vcpu_put+0x38/0x60
> [  921.551057]  kvm_sched_out+0x38/0x48
> [  921.554618]  __schedule+0x5a4/0x988
> [  921.558094]  schedule+0x40/0xc8
> [  921.561222]  kvm_arch_vcpu_ioctl_run+0x130/0xb08
> [  921.565826]  kvm_vcpu_ioctl+0x3e0/0xb08
> [  921.569649]  do_vfs_ioctl+0xc4/0x890
> [  921.573211]  ksys_ioctl+0x8c/0xa0
> [  921.576513]  __arm64_sys_ioctl+0x28/0x38
> [  921.580423]  el0_svc_common.constprop.0+0x80/0x1b8
> [  921.585201]  el0_svc_handler+0x34/0xb8
> [  921.588937]  el0_svc+0x8/0xc
> 
> 
> 
> Thanks,
> zenghui
> 
> 
> .
Marc Zyngier Sept. 17, 2019, 1:14 p.m. UTC | #7
On 17/09/2019 11:17, Zenghui Yu wrote:
> Hi Marc,
> 
> On 2019/9/17 17:31, Zenghui Yu wrote:
>>
>> But this time I got the following WARNING:
> 
> Please ignore it. I think this is mostly caused by my local buggy
> patch... Sorry for the noise.

Right. I couldn't quite figure out how this could happen with the
current state of the code...

	M.
diff mbox series

Patch

diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c
index 563e87ed0766..45969927cc81 100644
--- a/drivers/irqchip/irq-gic-v4.c
+++ b/drivers/irqchip/irq-gic-v4.c
@@ -141,12 +141,17 @@  static int its_send_vpe_cmd(struct its_vpe *vpe, struct its_cmd_info *info)
 int its_schedule_vpe(struct its_vpe *vpe, bool on)
 {
 	struct its_cmd_info info;
+	int ret;
 
 	WARN_ON(preemptible());
 
 	info.cmd_type = on ? SCHEDULE_VPE : DESCHEDULE_VPE;
 
-	return its_send_vpe_cmd(vpe, &info);
+	ret = its_send_vpe_cmd(vpe, &info);
+	if (!ret)
+		vpe->resident = on;
+
+	return ret;
 }
 
 int its_invall_vpe(struct its_vpe *vpe)
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index af4f09c02bf1..4dc58d7a0010 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -396,7 +396,7 @@  int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq,
 int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
 				 struct kvm_kernel_irq_routing_entry *irq_entry);
 
-void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu);
-void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu);
+int vgic_v4_load(struct kvm_vcpu *vcpu);
+int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db);
 
 #endif /* __KVM_ARM_VGIC_H */
diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h
index e6b155713b47..ab1396afe08a 100644
--- a/include/linux/irqchip/arm-gic-v4.h
+++ b/include/linux/irqchip/arm-gic-v4.h
@@ -35,6 +35,8 @@  struct its_vpe {
 	/* Doorbell interrupt */
 	int			irq;
 	irq_hw_number_t		vpe_db_lpi;
+	/* VPE resident */
+	bool			resident;
 	/* VPE proxy mapping */
 	int			vpe_proxy_event;
 	/*
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 35a069815baf..4e69268621b6 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -321,20 +321,24 @@  void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
 	/*
 	 * If we're about to block (most likely because we've just hit a
 	 * WFI), we need to sync back the state of the GIC CPU interface
-	 * so that we have the lastest PMR and group enables. This ensures
+	 * so that we have the latest PMR and group enables. This ensures
 	 * that kvm_arch_vcpu_runnable has up-to-date data to decide
 	 * whether we have pending interrupts.
+	 *
+	 * For the same reason, we want to tell GICv4 that we need
+	 * doorbells to be signalled, should an interrupt become pending.
 	 */
 	preempt_disable();
 	kvm_vgic_vmcr_sync(vcpu);
+	vgic_v4_put(vcpu, true);
 	preempt_enable();
-
-	kvm_vgic_v4_enable_doorbell(vcpu);
 }
 
 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
 {
-	kvm_vgic_v4_disable_doorbell(vcpu);
+	preempt_disable();
+	vgic_v4_load(vcpu);
+	preempt_enable();
 }
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 8d69f007dd0c..48307a9eb1d8 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -664,6 +664,8 @@  void vgic_v3_load(struct kvm_vcpu *vcpu)
 
 	if (has_vhe())
 		__vgic_v3_activate_traps(vcpu);
+
+	WARN_ON(vgic_v4_load(vcpu));
 }
 
 void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
@@ -676,6 +678,8 @@  void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
 
 void vgic_v3_put(struct kvm_vcpu *vcpu)
 {
+	WARN_ON(vgic_v4_put(vcpu, false));
+
 	vgic_v3_vmcr_sync(vcpu);
 
 	kvm_call_hyp(__vgic_v3_save_aprs, vcpu);
diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
index 477af6aebb97..3a8a28854b13 100644
--- a/virt/kvm/arm/vgic/vgic-v4.c
+++ b/virt/kvm/arm/vgic/vgic-v4.c
@@ -85,6 +85,10 @@  static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
 {
 	struct kvm_vcpu *vcpu = info;
 
+	/* We got the message, no need to fire again */
+	if (!irqd_irq_disabled(&irq_to_desc(irq)->irq_data))
+		disable_irq_nosync(irq);
+
 	vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
 	kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 	kvm_vcpu_kick(vcpu);
@@ -192,20 +196,30 @@  void vgic_v4_teardown(struct kvm *kvm)
 	its_vm->vpes = NULL;
 }
 
-int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu)
+int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db)
 {
-	if (!vgic_supports_direct_msis(vcpu->kvm))
+	struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
+	struct irq_desc *desc = irq_to_desc(vpe->irq);
+
+	if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident)
 		return 0;
 
-	return its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, false);
+	/*
+	 * If blocking, a doorbell is required. Undo the nested
+	 * disable_irq() calls...
+	 */
+	while (need_db && irqd_irq_disabled(&desc->irq_data))
+		enable_irq(vpe->irq);
+
+	return its_schedule_vpe(vpe, false);
 }
 
-int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
+int vgic_v4_load(struct kvm_vcpu *vcpu)
 {
-	int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
+	struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
 	int err;
 
-	if (!vgic_supports_direct_msis(vcpu->kvm))
+	if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident)
 		return 0;
 
 	/*
@@ -214,11 +228,14 @@  int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
 	 * doc in drivers/irqchip/irq-gic-v4.c to understand how this
 	 * turns into a VMOVP command at the ITS level.
 	 */
-	err = irq_set_affinity(irq, cpumask_of(smp_processor_id()));
+	err = irq_set_affinity(vpe->irq, cpumask_of(smp_processor_id()));
 	if (err)
 		return err;
 
-	err = its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, true);
+	/* Disabled the doorbell, as we're about to enter the guest */
+	disable_irq(vpe->irq);
+
+	err = its_schedule_vpe(vpe, true);
 	if (err)
 		return err;
 
@@ -226,9 +243,7 @@  int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
 	 * Now that the VPE is resident, let's get rid of a potential
 	 * doorbell interrupt that would still be pending.
 	 */
-	err = irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, false);
-
-	return err;
+	return irq_set_irqchip_state(vpe->irq, IRQCHIP_STATE_PENDING, false);
 }
 
 static struct vgic_its *vgic_get_its(struct kvm *kvm,
@@ -335,21 +350,3 @@  int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq,
 	mutex_unlock(&its->its_lock);
 	return ret;
 }
-
-void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu)
-{
-	if (vgic_supports_direct_msis(vcpu->kvm)) {
-		int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
-		if (irq)
-			enable_irq(irq);
-	}
-}
-
-void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu)
-{
-	if (vgic_supports_direct_msis(vcpu->kvm)) {
-		int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
-		if (irq)
-			disable_irq(irq);
-	}
-}
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 45a870cb63f5..99b02ca730a8 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -857,8 +857,6 @@  void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 
-	WARN_ON(vgic_v4_sync_hwstate(vcpu));
-
 	/* An empty ap_list_head implies used_lrs == 0 */
 	if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
 		return;
@@ -882,8 +880,6 @@  static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
 /* Flush our emulation state into the GIC hardware before entering the guest. */
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 {
-	WARN_ON(vgic_v4_flush_hwstate(vcpu));
-
 	/*
 	 * If there are no virtual interrupts active or pending for this
 	 * VCPU, then there is no work to do and we can bail out without
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 83066a81b16a..c7fefd6b1c80 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -316,7 +316,5 @@  void vgic_its_invalidate_cache(struct kvm *kvm);
 bool vgic_supports_direct_msis(struct kvm *kvm);
 int vgic_v4_init(struct kvm *kvm);
 void vgic_v4_teardown(struct kvm *kvm);
-int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu);
-int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu);
 
 #endif