diff mbox series

[v2,2/2] KVM: arm64: Delay the execution of the polling on the GICR_VPENDBASER.Dirty bit

Message ID 20201128141857.983-3-lushenming@huawei.com (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: Optimize the wait for the completion of the VPT analysis | expand

Commit Message

Shenming Lu Nov. 28, 2020, 2:18 p.m. UTC
In order to further reduce the impact of the wait delay of the
VPT analysis, we can delay the execution of the polling on the
GICR_VPENDBASER.Dirty bit (call it from kvm_vgic_flush_hwstate()
corresponding to vPE resident), let the GIC and the CPU work in
parallel on the entry path.

Signed-off-by: Shenming Lu <lushenming@huawei.com>
---
 arch/arm64/kvm/vgic/vgic-v4.c      | 16 ++++++++++++++++
 arch/arm64/kvm/vgic/vgic.c         |  3 +++
 drivers/irqchip/irq-gic-v3-its.c   | 16 ++++++++++++----
 drivers/irqchip/irq-gic-v4.c       | 11 +++++++++++
 include/kvm/arm_vgic.h             |  3 +++
 include/linux/irqchip/arm-gic-v4.h |  4 ++++
 6 files changed, 49 insertions(+), 4 deletions(-)

Comments

Marc Zyngier Nov. 30, 2020, 11:22 a.m. UTC | #1
On 2020-11-28 14:18, Shenming Lu wrote:
> In order to further reduce the impact of the wait delay of the
> VPT analysis, we can delay the execution of the polling on the
> GICR_VPENDBASER.Dirty bit (call it from kvm_vgic_flush_hwstate()
> corresponding to vPE resident), let the GIC and the CPU work in
> parallel on the entry path.
> 
> Signed-off-by: Shenming Lu <lushenming@huawei.com>
> ---
>  arch/arm64/kvm/vgic/vgic-v4.c      | 16 ++++++++++++++++
>  arch/arm64/kvm/vgic/vgic.c         |  3 +++
>  drivers/irqchip/irq-gic-v3-its.c   | 16 ++++++++++++----
>  drivers/irqchip/irq-gic-v4.c       | 11 +++++++++++
>  include/kvm/arm_vgic.h             |  3 +++
>  include/linux/irqchip/arm-gic-v4.h |  4 ++++
>  6 files changed, 49 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/kvm/vgic/vgic-v4.c 
> b/arch/arm64/kvm/vgic/vgic-v4.c
> index b5fa73c9fd35..b0da74809187 100644
> --- a/arch/arm64/kvm/vgic/vgic-v4.c
> +++ b/arch/arm64/kvm/vgic/vgic-v4.c
> @@ -353,6 +353,22 @@ int vgic_v4_load(struct kvm_vcpu *vcpu)
>  	return err;
>  }
> 
> +void vgic_v4_commit(struct kvm_vcpu *vcpu)
> +{
> +	struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
> +
> +	/*
> +	 * No need to wait for the vPE to be ready across a shallow guest
> +	 * exit, as only a vcpu_put will invalidate it.
> +	 */
> +	if (vpe->vpe_ready)
> +		return;
> +
> +	its_commit_vpe(vpe);
> +
> +	vpe->vpe_ready = true;

This should be written as:

if (!ready)
      commit();

and ready being driven by the commit() call itself.

> +}
> +
>  static struct vgic_its *vgic_get_its(struct kvm *kvm,
>  				     struct kvm_kernel_irq_routing_entry *irq_entry)
>  {
> diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
> index c3643b7f101b..1c597c9885fa 100644
> --- a/arch/arm64/kvm/vgic/vgic.c
> +++ b/arch/arm64/kvm/vgic/vgic.c
> @@ -915,6 +915,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
> 
>  	if (can_access_vgic_from_kernel())
>  		vgic_restore_state(vcpu);
> +
> +	if (vgic_supports_direct_msis(vcpu->kvm))
> +		vgic_v4_commit(vcpu);
>  }
> 
>  void kvm_vgic_load(struct kvm_vcpu *vcpu)
> diff --git a/drivers/irqchip/irq-gic-v3-its.c 
> b/drivers/irqchip/irq-gic-v3-its.c
> index 22f427135c6b..f30aba14933e 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -3842,8 +3842,6 @@ static void its_vpe_schedule(struct its_vpe *vpe)
>  	val |= vpe->idai ? GICR_VPENDBASER_IDAI : 0;
>  	val |= GICR_VPENDBASER_Valid;
>  	gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
> -
> -	its_wait_vpt_parse_complete();
>  }
> 
>  static void its_vpe_deschedule(struct its_vpe *vpe)
> @@ -3855,6 +3853,8 @@ static void its_vpe_deschedule(struct its_vpe 
> *vpe)
> 
>  	vpe->idai = !!(val & GICR_VPENDBASER_IDAI);
>  	vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast);
> +
> +	vpe->vpe_ready = false;

This should be set from the its_make_vpe_non_resident() call.

>  }
> 
>  static void its_vpe_invall(struct its_vpe *vpe)
> @@ -3891,6 +3891,10 @@ static int its_vpe_set_vcpu_affinity(struct
> irq_data *d, void *vcpu_info)
>  		its_vpe_deschedule(vpe);
>  		return 0;
> 
> +	case COMMIT_VPE:
> +		its_wait_vpt_parse_complete();
> +		return 0;
> +
>  	case INVALL_VPE:
>  		its_vpe_invall(vpe);
>  		return 0;
> @@ -4052,8 +4056,6 @@ static void its_vpe_4_1_schedule(struct its_vpe 
> *vpe,
>  	val |= FIELD_PREP(GICR_VPENDBASER_4_1_VPEID, vpe->vpe_id);
> 
>  	gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
> -
> -	its_wait_vpt_parse_complete();
>  }
> 
>  static void its_vpe_4_1_deschedule(struct its_vpe *vpe,
> @@ -4091,6 +4093,8 @@ static void its_vpe_4_1_deschedule(struct its_vpe 
> *vpe,
>  					    GICR_VPENDBASER_PendingLast);
>  		vpe->pending_last = true;
>  	}
> +
> +	vpe->vpe_ready = false;
>  }
> 
>  static void its_vpe_4_1_invall(struct its_vpe *vpe)
> @@ -4128,6 +4132,10 @@ static int its_vpe_4_1_set_vcpu_affinity(struct
> irq_data *d, void *vcpu_info)
>  		its_vpe_4_1_deschedule(vpe, info);
>  		return 0;
> 
> +	case COMMIT_VPE:
> +		its_wait_vpt_parse_complete();
> +		return 0;
> +
>  	case INVALL_VPE:
>  		its_vpe_4_1_invall(vpe);
>  		return 0;
> diff --git a/drivers/irqchip/irq-gic-v4.c 
> b/drivers/irqchip/irq-gic-v4.c
> index 0c18714ae13e..6cea71a4e68b 100644
> --- a/drivers/irqchip/irq-gic-v4.c
> +++ b/drivers/irqchip/irq-gic-v4.c
> @@ -258,6 +258,17 @@ int its_make_vpe_resident(struct its_vpe *vpe,
> bool g0en, bool g1en)
>  	return ret;
>  }
> 
> +int its_commit_vpe(struct its_vpe *vpe)
> +{
> +	struct its_cmd_info info = {
> +		.cmd_type = COMMIT_VPE,
> +	};
> +
> +	WARN_ON(preemptible());
> +
> +	return its_send_vpe_cmd(vpe, &info);
> +}
> +
>  int its_invall_vpe(struct its_vpe *vpe)
>  {
>  	struct its_cmd_info info = {
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index a8d8fdcd3723..f2170df6cf7c 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -401,7 +401,10 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, 
> int irq,
>  int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
>  				 struct kvm_kernel_irq_routing_entry *irq_entry);
> 
> +void vgic_v4_commit(struct kvm_vcpu *vcpu);
> +
>  int vgic_v4_load(struct kvm_vcpu *vcpu);
> +

Spurious new lines.

>  int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db);
> 
>  #endif /* __KVM_ARM_VGIC_H */
> diff --git a/include/linux/irqchip/arm-gic-v4.h
> b/include/linux/irqchip/arm-gic-v4.h
> index 6976b8331b60..936d88e482a9 100644
> --- a/include/linux/irqchip/arm-gic-v4.h
> +++ b/include/linux/irqchip/arm-gic-v4.h
> @@ -75,6 +75,8 @@ struct its_vpe {
>  	u16			vpe_id;
>  	/* Pending VLPIs on schedule out? */
>  	bool			pending_last;
> +	/* VPT parse complete */
> +	bool			vpe_ready;
>  };
> 
>  /*
> @@ -104,6 +106,7 @@ enum its_vcpu_info_cmd_type {
>  	PROP_UPDATE_AND_INV_VLPI,
>  	SCHEDULE_VPE,
>  	DESCHEDULE_VPE,
> +	COMMIT_VPE,
>  	INVALL_VPE,
>  	PROP_UPDATE_VSGI,
>  };
> @@ -129,6 +132,7 @@ int its_alloc_vcpu_irqs(struct its_vm *vm);
>  void its_free_vcpu_irqs(struct its_vm *vm);
>  int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en);
>  int its_make_vpe_non_resident(struct its_vpe *vpe, bool db);
> +int its_commit_vpe(struct its_vpe *vpe);
>  int its_invall_vpe(struct its_vpe *vpe);
>  int its_map_vlpi(int irq, struct its_vlpi_map *map);
>  int its_get_vlpi(int irq, struct its_vlpi_map *map);

In order to speed up the respin round-trip, I've taken the liberty
to refactor this patch myself. Please have a look at [1] and let
me know if you're OK with it.

Thanks,

         M.

[1] 
https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/commit/?h=kvm-arm64/misc-5.11&id=57e3cebd022fbc035dcf190ac789fd2ffc747f5b
Shenming Lu Nov. 30, 2020, 12:12 p.m. UTC | #2
On 2020/11/30 19:22, Marc Zyngier wrote:
> On 2020-11-28 14:18, Shenming Lu wrote:
>> In order to further reduce the impact of the wait delay of the
>> VPT analysis, we can delay the execution of the polling on the
>> GICR_VPENDBASER.Dirty bit (call it from kvm_vgic_flush_hwstate()
>> corresponding to vPE resident), let the GIC and the CPU work in
>> parallel on the entry path.
>>
>> Signed-off-by: Shenming Lu <lushenming@huawei.com>
>> ---
>>  arch/arm64/kvm/vgic/vgic-v4.c      | 16 ++++++++++++++++
>>  arch/arm64/kvm/vgic/vgic.c         |  3 +++
>>  drivers/irqchip/irq-gic-v3-its.c   | 16 ++++++++++++----
>>  drivers/irqchip/irq-gic-v4.c       | 11 +++++++++++
>>  include/kvm/arm_vgic.h             |  3 +++
>>  include/linux/irqchip/arm-gic-v4.h |  4 ++++
>>  6 files changed, 49 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
>> index b5fa73c9fd35..b0da74809187 100644
>> --- a/arch/arm64/kvm/vgic/vgic-v4.c
>> +++ b/arch/arm64/kvm/vgic/vgic-v4.c
>> @@ -353,6 +353,22 @@ int vgic_v4_load(struct kvm_vcpu *vcpu)
>>      return err;
>>  }
>>
>> +void vgic_v4_commit(struct kvm_vcpu *vcpu)
>> +{
>> +    struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
>> +
>> +    /*
>> +     * No need to wait for the vPE to be ready across a shallow guest
>> +     * exit, as only a vcpu_put will invalidate it.
>> +     */
>> +    if (vpe->vpe_ready)
>> +        return;
>> +
>> +    its_commit_vpe(vpe);
>> +
>> +    vpe->vpe_ready = true;
> 
> This should be written as:
> 
> if (!ready)
>      commit();
> 
> and ready being driven by the commit() call itself.
> 
>> +}
>> +
>>  static struct vgic_its *vgic_get_its(struct kvm *kvm,
>>                       struct kvm_kernel_irq_routing_entry *irq_entry)
>>  {
>> diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
>> index c3643b7f101b..1c597c9885fa 100644
>> --- a/arch/arm64/kvm/vgic/vgic.c
>> +++ b/arch/arm64/kvm/vgic/vgic.c
>> @@ -915,6 +915,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
>>
>>      if (can_access_vgic_from_kernel())
>>          vgic_restore_state(vcpu);
>> +
>> +    if (vgic_supports_direct_msis(vcpu->kvm))
>> +        vgic_v4_commit(vcpu);
>>  }
>>
>>  void kvm_vgic_load(struct kvm_vcpu *vcpu)
>> diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
>> index 22f427135c6b..f30aba14933e 100644
>> --- a/drivers/irqchip/irq-gic-v3-its.c
>> +++ b/drivers/irqchip/irq-gic-v3-its.c
>> @@ -3842,8 +3842,6 @@ static void its_vpe_schedule(struct its_vpe *vpe)
>>      val |= vpe->idai ? GICR_VPENDBASER_IDAI : 0;
>>      val |= GICR_VPENDBASER_Valid;
>>      gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
>> -
>> -    its_wait_vpt_parse_complete();
>>  }
>>
>>  static void its_vpe_deschedule(struct its_vpe *vpe)
>> @@ -3855,6 +3853,8 @@ static void its_vpe_deschedule(struct its_vpe *vpe)
>>
>>      vpe->idai = !!(val & GICR_VPENDBASER_IDAI);
>>      vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast);
>> +
>> +    vpe->vpe_ready = false;
> 
> This should be set from the its_make_vpe_non_resident() call.
> 
>>  }
>>
>>  static void its_vpe_invall(struct its_vpe *vpe)
>> @@ -3891,6 +3891,10 @@ static int its_vpe_set_vcpu_affinity(struct
>> irq_data *d, void *vcpu_info)
>>          its_vpe_deschedule(vpe);
>>          return 0;
>>
>> +    case COMMIT_VPE:
>> +        its_wait_vpt_parse_complete();
>> +        return 0;
>> +
>>      case INVALL_VPE:
>>          its_vpe_invall(vpe);
>>          return 0;
>> @@ -4052,8 +4056,6 @@ static void its_vpe_4_1_schedule(struct its_vpe *vpe,
>>      val |= FIELD_PREP(GICR_VPENDBASER_4_1_VPEID, vpe->vpe_id);
>>
>>      gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
>> -
>> -    its_wait_vpt_parse_complete();
>>  }
>>
>>  static void its_vpe_4_1_deschedule(struct its_vpe *vpe,
>> @@ -4091,6 +4093,8 @@ static void its_vpe_4_1_deschedule(struct its_vpe *vpe,
>>                          GICR_VPENDBASER_PendingLast);
>>          vpe->pending_last = true;
>>      }
>> +
>> +    vpe->vpe_ready = false;
>>  }
>>
>>  static void its_vpe_4_1_invall(struct its_vpe *vpe)
>> @@ -4128,6 +4132,10 @@ static int its_vpe_4_1_set_vcpu_affinity(struct
>> irq_data *d, void *vcpu_info)
>>          its_vpe_4_1_deschedule(vpe, info);
>>          return 0;
>>
>> +    case COMMIT_VPE:
>> +        its_wait_vpt_parse_complete();
>> +        return 0;
>> +
>>      case INVALL_VPE:
>>          its_vpe_4_1_invall(vpe);
>>          return 0;
>> diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c
>> index 0c18714ae13e..6cea71a4e68b 100644
>> --- a/drivers/irqchip/irq-gic-v4.c
>> +++ b/drivers/irqchip/irq-gic-v4.c
>> @@ -258,6 +258,17 @@ int its_make_vpe_resident(struct its_vpe *vpe,
>> bool g0en, bool g1en)
>>      return ret;
>>  }
>>
>> +int its_commit_vpe(struct its_vpe *vpe)
>> +{
>> +    struct its_cmd_info info = {
>> +        .cmd_type = COMMIT_VPE,
>> +    };
>> +
>> +    WARN_ON(preemptible());
>> +
>> +    return its_send_vpe_cmd(vpe, &info);
>> +}
>> +
>>  int its_invall_vpe(struct its_vpe *vpe)
>>  {
>>      struct its_cmd_info info = {
>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>> index a8d8fdcd3723..f2170df6cf7c 100644
>> --- a/include/kvm/arm_vgic.h
>> +++ b/include/kvm/arm_vgic.h
>> @@ -401,7 +401,10 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq,
>>  int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
>>                   struct kvm_kernel_irq_routing_entry *irq_entry);
>>
>> +void vgic_v4_commit(struct kvm_vcpu *vcpu);
>> +
>>  int vgic_v4_load(struct kvm_vcpu *vcpu);
>> +
> 
> Spurious new lines.
> 
>>  int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db);
>>
>>  #endif /* __KVM_ARM_VGIC_H */
>> diff --git a/include/linux/irqchip/arm-gic-v4.h
>> b/include/linux/irqchip/arm-gic-v4.h
>> index 6976b8331b60..936d88e482a9 100644
>> --- a/include/linux/irqchip/arm-gic-v4.h
>> +++ b/include/linux/irqchip/arm-gic-v4.h
>> @@ -75,6 +75,8 @@ struct its_vpe {
>>      u16            vpe_id;
>>      /* Pending VLPIs on schedule out? */
>>      bool            pending_last;
>> +    /* VPT parse complete */
>> +    bool            vpe_ready;
>>  };
>>
>>  /*
>> @@ -104,6 +106,7 @@ enum its_vcpu_info_cmd_type {
>>      PROP_UPDATE_AND_INV_VLPI,
>>      SCHEDULE_VPE,
>>      DESCHEDULE_VPE,
>> +    COMMIT_VPE,
>>      INVALL_VPE,
>>      PROP_UPDATE_VSGI,
>>  };
>> @@ -129,6 +132,7 @@ int its_alloc_vcpu_irqs(struct its_vm *vm);
>>  void its_free_vcpu_irqs(struct its_vm *vm);
>>  int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en);
>>  int its_make_vpe_non_resident(struct its_vpe *vpe, bool db);
>> +int its_commit_vpe(struct its_vpe *vpe);
>>  int its_invall_vpe(struct its_vpe *vpe);
>>  int its_map_vlpi(int irq, struct its_vlpi_map *map);
>>  int its_get_vlpi(int irq, struct its_vlpi_map *map);
> 
> In order to speed up the respin round-trip, I've taken the liberty
> to refactor this patch myself. Please have a look at [1] and let
> me know if you're OK with it.

I have looked at it and am OK.

By the way, will the first patch (set the delay_us to 1) be picked up
together?

Thanks,
Shenming

> 
> Thanks,
> 
>         M.
> 
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/commit/?h=kvm-arm64/misc-5.11&id=57e3cebd022fbc035dcf190ac789fd2ffc747f5b
Marc Zyngier Nov. 30, 2020, 12:28 p.m. UTC | #3
On 2020-11-30 12:12, Shenming Lu wrote:
> On 2020/11/30 19:22, Marc Zyngier wrote:
>> On 2020-11-28 14:18, Shenming Lu wrote:
>>> In order to further reduce the impact of the wait delay of the
>>> VPT analysis, we can delay the execution of the polling on the
>>> GICR_VPENDBASER.Dirty bit (call it from kvm_vgic_flush_hwstate()
>>> corresponding to vPE resident), let the GIC and the CPU work in
>>> parallel on the entry path.
>>> 
>>> Signed-off-by: Shenming Lu <lushenming@huawei.com>
>>> ---
>>>  arch/arm64/kvm/vgic/vgic-v4.c      | 16 ++++++++++++++++
>>>  arch/arm64/kvm/vgic/vgic.c         |  3 +++
>>>  drivers/irqchip/irq-gic-v3-its.c   | 16 ++++++++++++----
>>>  drivers/irqchip/irq-gic-v4.c       | 11 +++++++++++
>>>  include/kvm/arm_vgic.h             |  3 +++
>>>  include/linux/irqchip/arm-gic-v4.h |  4 ++++
>>>  6 files changed, 49 insertions(+), 4 deletions(-)
>>> 
>>> diff --git a/arch/arm64/kvm/vgic/vgic-v4.c 
>>> b/arch/arm64/kvm/vgic/vgic-v4.c
>>> index b5fa73c9fd35..b0da74809187 100644
>>> --- a/arch/arm64/kvm/vgic/vgic-v4.c
>>> +++ b/arch/arm64/kvm/vgic/vgic-v4.c
>>> @@ -353,6 +353,22 @@ int vgic_v4_load(struct kvm_vcpu *vcpu)
>>>      return err;
>>>  }
>>> 
>>> +void vgic_v4_commit(struct kvm_vcpu *vcpu)
>>> +{
>>> +    struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
>>> +
>>> +    /*
>>> +     * No need to wait for the vPE to be ready across a shallow 
>>> guest
>>> +     * exit, as only a vcpu_put will invalidate it.
>>> +     */
>>> +    if (vpe->vpe_ready)
>>> +        return;
>>> +
>>> +    its_commit_vpe(vpe);
>>> +
>>> +    vpe->vpe_ready = true;
>> 
>> This should be written as:
>> 
>> if (!ready)
>>      commit();
>> 
>> and ready being driven by the commit() call itself.
>> 
>>> +}
>>> +
>>>  static struct vgic_its *vgic_get_its(struct kvm *kvm,
>>>                       struct kvm_kernel_irq_routing_entry *irq_entry)
>>>  {
>>> diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
>>> index c3643b7f101b..1c597c9885fa 100644
>>> --- a/arch/arm64/kvm/vgic/vgic.c
>>> +++ b/arch/arm64/kvm/vgic/vgic.c
>>> @@ -915,6 +915,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu 
>>> *vcpu)
>>> 
>>>      if (can_access_vgic_from_kernel())
>>>          vgic_restore_state(vcpu);
>>> +
>>> +    if (vgic_supports_direct_msis(vcpu->kvm))
>>> +        vgic_v4_commit(vcpu);
>>>  }
>>> 
>>>  void kvm_vgic_load(struct kvm_vcpu *vcpu)
>>> diff --git a/drivers/irqchip/irq-gic-v3-its.c 
>>> b/drivers/irqchip/irq-gic-v3-its.c
>>> index 22f427135c6b..f30aba14933e 100644
>>> --- a/drivers/irqchip/irq-gic-v3-its.c
>>> +++ b/drivers/irqchip/irq-gic-v3-its.c
>>> @@ -3842,8 +3842,6 @@ static void its_vpe_schedule(struct its_vpe 
>>> *vpe)
>>>      val |= vpe->idai ? GICR_VPENDBASER_IDAI : 0;
>>>      val |= GICR_VPENDBASER_Valid;
>>>      gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
>>> -
>>> -    its_wait_vpt_parse_complete();
>>>  }
>>> 
>>>  static void its_vpe_deschedule(struct its_vpe *vpe)
>>> @@ -3855,6 +3853,8 @@ static void its_vpe_deschedule(struct its_vpe 
>>> *vpe)
>>> 
>>>      vpe->idai = !!(val & GICR_VPENDBASER_IDAI);
>>>      vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast);
>>> +
>>> +    vpe->vpe_ready = false;
>> 
>> This should be set from the its_make_vpe_non_resident() call.
>> 
>>>  }
>>> 
>>>  static void its_vpe_invall(struct its_vpe *vpe)
>>> @@ -3891,6 +3891,10 @@ static int its_vpe_set_vcpu_affinity(struct
>>> irq_data *d, void *vcpu_info)
>>>          its_vpe_deschedule(vpe);
>>>          return 0;
>>> 
>>> +    case COMMIT_VPE:
>>> +        its_wait_vpt_parse_complete();
>>> +        return 0;
>>> +
>>>      case INVALL_VPE:
>>>          its_vpe_invall(vpe);
>>>          return 0;
>>> @@ -4052,8 +4056,6 @@ static void its_vpe_4_1_schedule(struct its_vpe 
>>> *vpe,
>>>      val |= FIELD_PREP(GICR_VPENDBASER_4_1_VPEID, vpe->vpe_id);
>>> 
>>>      gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
>>> -
>>> -    its_wait_vpt_parse_complete();
>>>  }
>>> 
>>>  static void its_vpe_4_1_deschedule(struct its_vpe *vpe,
>>> @@ -4091,6 +4093,8 @@ static void its_vpe_4_1_deschedule(struct 
>>> its_vpe *vpe,
>>>                          GICR_VPENDBASER_PendingLast);
>>>          vpe->pending_last = true;
>>>      }
>>> +
>>> +    vpe->vpe_ready = false;
>>>  }
>>> 
>>>  static void its_vpe_4_1_invall(struct its_vpe *vpe)
>>> @@ -4128,6 +4132,10 @@ static int 
>>> its_vpe_4_1_set_vcpu_affinity(struct
>>> irq_data *d, void *vcpu_info)
>>>          its_vpe_4_1_deschedule(vpe, info);
>>>          return 0;
>>> 
>>> +    case COMMIT_VPE:
>>> +        its_wait_vpt_parse_complete();
>>> +        return 0;
>>> +
>>>      case INVALL_VPE:
>>>          its_vpe_4_1_invall(vpe);
>>>          return 0;
>>> diff --git a/drivers/irqchip/irq-gic-v4.c 
>>> b/drivers/irqchip/irq-gic-v4.c
>>> index 0c18714ae13e..6cea71a4e68b 100644
>>> --- a/drivers/irqchip/irq-gic-v4.c
>>> +++ b/drivers/irqchip/irq-gic-v4.c
>>> @@ -258,6 +258,17 @@ int its_make_vpe_resident(struct its_vpe *vpe,
>>> bool g0en, bool g1en)
>>>      return ret;
>>>  }
>>> 
>>> +int its_commit_vpe(struct its_vpe *vpe)
>>> +{
>>> +    struct its_cmd_info info = {
>>> +        .cmd_type = COMMIT_VPE,
>>> +    };
>>> +
>>> +    WARN_ON(preemptible());
>>> +
>>> +    return its_send_vpe_cmd(vpe, &info);
>>> +}
>>> +
>>>  int its_invall_vpe(struct its_vpe *vpe)
>>>  {
>>>      struct its_cmd_info info = {
>>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>>> index a8d8fdcd3723..f2170df6cf7c 100644
>>> --- a/include/kvm/arm_vgic.h
>>> +++ b/include/kvm/arm_vgic.h
>>> @@ -401,7 +401,10 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, 
>>> int irq,
>>>  int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
>>>                   struct kvm_kernel_irq_routing_entry *irq_entry);
>>> 
>>> +void vgic_v4_commit(struct kvm_vcpu *vcpu);
>>> +
>>>  int vgic_v4_load(struct kvm_vcpu *vcpu);
>>> +
>> 
>> Spurious new lines.
>> 
>>>  int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db);
>>> 
>>>  #endif /* __KVM_ARM_VGIC_H */
>>> diff --git a/include/linux/irqchip/arm-gic-v4.h
>>> b/include/linux/irqchip/arm-gic-v4.h
>>> index 6976b8331b60..936d88e482a9 100644
>>> --- a/include/linux/irqchip/arm-gic-v4.h
>>> +++ b/include/linux/irqchip/arm-gic-v4.h
>>> @@ -75,6 +75,8 @@ struct its_vpe {
>>>      u16            vpe_id;
>>>      /* Pending VLPIs on schedule out? */
>>>      bool            pending_last;
>>> +    /* VPT parse complete */
>>> +    bool            vpe_ready;
>>>  };
>>> 
>>>  /*
>>> @@ -104,6 +106,7 @@ enum its_vcpu_info_cmd_type {
>>>      PROP_UPDATE_AND_INV_VLPI,
>>>      SCHEDULE_VPE,
>>>      DESCHEDULE_VPE,
>>> +    COMMIT_VPE,
>>>      INVALL_VPE,
>>>      PROP_UPDATE_VSGI,
>>>  };
>>> @@ -129,6 +132,7 @@ int its_alloc_vcpu_irqs(struct its_vm *vm);
>>>  void its_free_vcpu_irqs(struct its_vm *vm);
>>>  int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool 
>>> g1en);
>>>  int its_make_vpe_non_resident(struct its_vpe *vpe, bool db);
>>> +int its_commit_vpe(struct its_vpe *vpe);
>>>  int its_invall_vpe(struct its_vpe *vpe);
>>>  int its_map_vlpi(int irq, struct its_vlpi_map *map);
>>>  int its_get_vlpi(int irq, struct its_vlpi_map *map);
>> 
>> In order to speed up the respin round-trip, I've taken the liberty
>> to refactor this patch myself. Please have a look at [1] and let
>> me know if you're OK with it.
> 
> I have looked at it and am OK.
> 
> By the way, will the first patch (set the delay_us to 1) be picked up
> together?

I'll route it via the irqchip tree.

Thanks,

         M.
diff mbox series

Patch

diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index b5fa73c9fd35..b0da74809187 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -353,6 +353,22 @@  int vgic_v4_load(struct kvm_vcpu *vcpu)
 	return err;
 }
 
+void vgic_v4_commit(struct kvm_vcpu *vcpu)
+{
+	struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
+
+	/*
+	 * No need to wait for the vPE to be ready across a shallow guest
+	 * exit, as only a vcpu_put will invalidate it.
+	 */
+	if (vpe->vpe_ready)
+		return;
+
+	its_commit_vpe(vpe);
+
+	vpe->vpe_ready = true;
+}
+
 static struct vgic_its *vgic_get_its(struct kvm *kvm,
 				     struct kvm_kernel_irq_routing_entry *irq_entry)
 {
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index c3643b7f101b..1c597c9885fa 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -915,6 +915,9 @@  void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 
 	if (can_access_vgic_from_kernel())
 		vgic_restore_state(vcpu);
+
+	if (vgic_supports_direct_msis(vcpu->kvm))
+		vgic_v4_commit(vcpu);
 }
 
 void kvm_vgic_load(struct kvm_vcpu *vcpu)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 22f427135c6b..f30aba14933e 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -3842,8 +3842,6 @@  static void its_vpe_schedule(struct its_vpe *vpe)
 	val |= vpe->idai ? GICR_VPENDBASER_IDAI : 0;
 	val |= GICR_VPENDBASER_Valid;
 	gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
-
-	its_wait_vpt_parse_complete();
 }
 
 static void its_vpe_deschedule(struct its_vpe *vpe)
@@ -3855,6 +3853,8 @@  static void its_vpe_deschedule(struct its_vpe *vpe)
 
 	vpe->idai = !!(val & GICR_VPENDBASER_IDAI);
 	vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast);
+
+	vpe->vpe_ready = false;
 }
 
 static void its_vpe_invall(struct its_vpe *vpe)
@@ -3891,6 +3891,10 @@  static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)
 		its_vpe_deschedule(vpe);
 		return 0;
 
+	case COMMIT_VPE:
+		its_wait_vpt_parse_complete();
+		return 0;
+
 	case INVALL_VPE:
 		its_vpe_invall(vpe);
 		return 0;
@@ -4052,8 +4056,6 @@  static void its_vpe_4_1_schedule(struct its_vpe *vpe,
 	val |= FIELD_PREP(GICR_VPENDBASER_4_1_VPEID, vpe->vpe_id);
 
 	gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
-
-	its_wait_vpt_parse_complete();
 }
 
 static void its_vpe_4_1_deschedule(struct its_vpe *vpe,
@@ -4091,6 +4093,8 @@  static void its_vpe_4_1_deschedule(struct its_vpe *vpe,
 					    GICR_VPENDBASER_PendingLast);
 		vpe->pending_last = true;
 	}
+
+	vpe->vpe_ready = false;
 }
 
 static void its_vpe_4_1_invall(struct its_vpe *vpe)
@@ -4128,6 +4132,10 @@  static int its_vpe_4_1_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)
 		its_vpe_4_1_deschedule(vpe, info);
 		return 0;
 
+	case COMMIT_VPE:
+		its_wait_vpt_parse_complete();
+		return 0;
+
 	case INVALL_VPE:
 		its_vpe_4_1_invall(vpe);
 		return 0;
diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c
index 0c18714ae13e..6cea71a4e68b 100644
--- a/drivers/irqchip/irq-gic-v4.c
+++ b/drivers/irqchip/irq-gic-v4.c
@@ -258,6 +258,17 @@  int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en)
 	return ret;
 }
 
+int its_commit_vpe(struct its_vpe *vpe)
+{
+	struct its_cmd_info info = {
+		.cmd_type = COMMIT_VPE,
+	};
+
+	WARN_ON(preemptible());
+
+	return its_send_vpe_cmd(vpe, &info);
+}
+
 int its_invall_vpe(struct its_vpe *vpe)
 {
 	struct its_cmd_info info = {
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index a8d8fdcd3723..f2170df6cf7c 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -401,7 +401,10 @@  int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq,
 int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
 				 struct kvm_kernel_irq_routing_entry *irq_entry);
 
+void vgic_v4_commit(struct kvm_vcpu *vcpu);
+
 int vgic_v4_load(struct kvm_vcpu *vcpu);
+
 int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db);
 
 #endif /* __KVM_ARM_VGIC_H */
diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h
index 6976b8331b60..936d88e482a9 100644
--- a/include/linux/irqchip/arm-gic-v4.h
+++ b/include/linux/irqchip/arm-gic-v4.h
@@ -75,6 +75,8 @@  struct its_vpe {
 	u16			vpe_id;
 	/* Pending VLPIs on schedule out? */
 	bool			pending_last;
+	/* VPT parse complete */
+	bool			vpe_ready;
 };
 
 /*
@@ -104,6 +106,7 @@  enum its_vcpu_info_cmd_type {
 	PROP_UPDATE_AND_INV_VLPI,
 	SCHEDULE_VPE,
 	DESCHEDULE_VPE,
+	COMMIT_VPE,
 	INVALL_VPE,
 	PROP_UPDATE_VSGI,
 };
@@ -129,6 +132,7 @@  int its_alloc_vcpu_irqs(struct its_vm *vm);
 void its_free_vcpu_irqs(struct its_vm *vm);
 int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en);
 int its_make_vpe_non_resident(struct its_vpe *vpe, bool db);
+int its_commit_vpe(struct its_vpe *vpe);
 int its_invall_vpe(struct its_vpe *vpe);
 int its_map_vlpi(int irq, struct its_vlpi_map *map);
 int its_get_vlpi(int irq, struct its_vlpi_map *map);