Message ID | 20201128141857.983-3-lushenming@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: arm64: Optimize the wait for the completion of the VPT analysis | expand |
On 2020-11-28 14:18, Shenming Lu wrote: > In order to further reduce the impact of the wait delay of the > VPT analysis, we can delay the execution of the polling on the > GICR_VPENDBASER.Dirty bit (call it from kvm_vgic_flush_hwstate() > corresponding to vPE resident), let the GIC and the CPU work in > parallel on the entry path. > > Signed-off-by: Shenming Lu <lushenming@huawei.com> > --- > arch/arm64/kvm/vgic/vgic-v4.c | 16 ++++++++++++++++ > arch/arm64/kvm/vgic/vgic.c | 3 +++ > drivers/irqchip/irq-gic-v3-its.c | 16 ++++++++++++---- > drivers/irqchip/irq-gic-v4.c | 11 +++++++++++ > include/kvm/arm_vgic.h | 3 +++ > include/linux/irqchip/arm-gic-v4.h | 4 ++++ > 6 files changed, 49 insertions(+), 4 deletions(-) > > diff --git a/arch/arm64/kvm/vgic/vgic-v4.c > b/arch/arm64/kvm/vgic/vgic-v4.c > index b5fa73c9fd35..b0da74809187 100644 > --- a/arch/arm64/kvm/vgic/vgic-v4.c > +++ b/arch/arm64/kvm/vgic/vgic-v4.c > @@ -353,6 +353,22 @@ int vgic_v4_load(struct kvm_vcpu *vcpu) > return err; > } > > +void vgic_v4_commit(struct kvm_vcpu *vcpu) > +{ > + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; > + > + /* > + * No need to wait for the vPE to be ready across a shallow guest > + * exit, as only a vcpu_put will invalidate it. > + */ > + if (vpe->vpe_ready) > + return; > + > + its_commit_vpe(vpe); > + > + vpe->vpe_ready = true; This should be written as: if (!ready) commit(); and ready being driven by the commit() call itself. > +} > + > static struct vgic_its *vgic_get_its(struct kvm *kvm, > struct kvm_kernel_irq_routing_entry *irq_entry) > { > diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c > index c3643b7f101b..1c597c9885fa 100644 > --- a/arch/arm64/kvm/vgic/vgic.c > +++ b/arch/arm64/kvm/vgic/vgic.c > @@ -915,6 +915,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) > > if (can_access_vgic_from_kernel()) > vgic_restore_state(vcpu); > + > + if (vgic_supports_direct_msis(vcpu->kvm)) > + vgic_v4_commit(vcpu); > } > > void kvm_vgic_load(struct kvm_vcpu *vcpu) > diff --git a/drivers/irqchip/irq-gic-v3-its.c > b/drivers/irqchip/irq-gic-v3-its.c > index 22f427135c6b..f30aba14933e 100644 > --- a/drivers/irqchip/irq-gic-v3-its.c > +++ b/drivers/irqchip/irq-gic-v3-its.c > @@ -3842,8 +3842,6 @@ static void its_vpe_schedule(struct its_vpe *vpe) > val |= vpe->idai ? GICR_VPENDBASER_IDAI : 0; > val |= GICR_VPENDBASER_Valid; > gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); > - > - its_wait_vpt_parse_complete(); > } > > static void its_vpe_deschedule(struct its_vpe *vpe) > @@ -3855,6 +3853,8 @@ static void its_vpe_deschedule(struct its_vpe > *vpe) > > vpe->idai = !!(val & GICR_VPENDBASER_IDAI); > vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast); > + > + vpe->vpe_ready = false; This should be set from the its_make_vpe_non_resident() call. > } > > static void its_vpe_invall(struct its_vpe *vpe) > @@ -3891,6 +3891,10 @@ static int its_vpe_set_vcpu_affinity(struct > irq_data *d, void *vcpu_info) > its_vpe_deschedule(vpe); > return 0; > > + case COMMIT_VPE: > + its_wait_vpt_parse_complete(); > + return 0; > + > case INVALL_VPE: > its_vpe_invall(vpe); > return 0; > @@ -4052,8 +4056,6 @@ static void its_vpe_4_1_schedule(struct its_vpe > *vpe, > val |= FIELD_PREP(GICR_VPENDBASER_4_1_VPEID, vpe->vpe_id); > > gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); > - > - its_wait_vpt_parse_complete(); > } > > static void its_vpe_4_1_deschedule(struct its_vpe *vpe, > @@ -4091,6 +4093,8 @@ static void its_vpe_4_1_deschedule(struct its_vpe > *vpe, > GICR_VPENDBASER_PendingLast); > vpe->pending_last = true; > } > + > + vpe->vpe_ready = false; > } > > static void its_vpe_4_1_invall(struct its_vpe *vpe) > @@ -4128,6 +4132,10 @@ static int its_vpe_4_1_set_vcpu_affinity(struct > irq_data *d, void *vcpu_info) > its_vpe_4_1_deschedule(vpe, info); > return 0; > > + case COMMIT_VPE: > + its_wait_vpt_parse_complete(); > + return 0; > + > case INVALL_VPE: > its_vpe_4_1_invall(vpe); > return 0; > diff --git a/drivers/irqchip/irq-gic-v4.c > b/drivers/irqchip/irq-gic-v4.c > index 0c18714ae13e..6cea71a4e68b 100644 > --- a/drivers/irqchip/irq-gic-v4.c > +++ b/drivers/irqchip/irq-gic-v4.c > @@ -258,6 +258,17 @@ int its_make_vpe_resident(struct its_vpe *vpe, > bool g0en, bool g1en) > return ret; > } > > +int its_commit_vpe(struct its_vpe *vpe) > +{ > + struct its_cmd_info info = { > + .cmd_type = COMMIT_VPE, > + }; > + > + WARN_ON(preemptible()); > + > + return its_send_vpe_cmd(vpe, &info); > +} > + > int its_invall_vpe(struct its_vpe *vpe) > { > struct its_cmd_info info = { > diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h > index a8d8fdcd3723..f2170df6cf7c 100644 > --- a/include/kvm/arm_vgic.h > +++ b/include/kvm/arm_vgic.h > @@ -401,7 +401,10 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, > int irq, > int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq, > struct kvm_kernel_irq_routing_entry *irq_entry); > > +void vgic_v4_commit(struct kvm_vcpu *vcpu); > + > int vgic_v4_load(struct kvm_vcpu *vcpu); > + Spurious new lines. > int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db); > > #endif /* __KVM_ARM_VGIC_H */ > diff --git a/include/linux/irqchip/arm-gic-v4.h > b/include/linux/irqchip/arm-gic-v4.h > index 6976b8331b60..936d88e482a9 100644 > --- a/include/linux/irqchip/arm-gic-v4.h > +++ b/include/linux/irqchip/arm-gic-v4.h > @@ -75,6 +75,8 @@ struct its_vpe { > u16 vpe_id; > /* Pending VLPIs on schedule out? */ > bool pending_last; > + /* VPT parse complete */ > + bool vpe_ready; > }; > > /* > @@ -104,6 +106,7 @@ enum its_vcpu_info_cmd_type { > PROP_UPDATE_AND_INV_VLPI, > SCHEDULE_VPE, > DESCHEDULE_VPE, > + COMMIT_VPE, > INVALL_VPE, > PROP_UPDATE_VSGI, > }; > @@ -129,6 +132,7 @@ int its_alloc_vcpu_irqs(struct its_vm *vm); > void its_free_vcpu_irqs(struct its_vm *vm); > int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en); > int its_make_vpe_non_resident(struct its_vpe *vpe, bool db); > +int its_commit_vpe(struct its_vpe *vpe); > int its_invall_vpe(struct its_vpe *vpe); > int its_map_vlpi(int irq, struct its_vlpi_map *map); > int its_get_vlpi(int irq, struct its_vlpi_map *map); In order to speed up the respin round-trip, I've taken the liberty to refactor this patch myself. Please have a look at [1] and let me know if you're OK with it. Thanks, M. [1] https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/commit/?h=kvm-arm64/misc-5.11&id=57e3cebd022fbc035dcf190ac789fd2ffc747f5b
On 2020/11/30 19:22, Marc Zyngier wrote: > On 2020-11-28 14:18, Shenming Lu wrote: >> In order to further reduce the impact of the wait delay of the >> VPT analysis, we can delay the execution of the polling on the >> GICR_VPENDBASER.Dirty bit (call it from kvm_vgic_flush_hwstate() >> corresponding to vPE resident), let the GIC and the CPU work in >> parallel on the entry path. >> >> Signed-off-by: Shenming Lu <lushenming@huawei.com> >> --- >> arch/arm64/kvm/vgic/vgic-v4.c | 16 ++++++++++++++++ >> arch/arm64/kvm/vgic/vgic.c | 3 +++ >> drivers/irqchip/irq-gic-v3-its.c | 16 ++++++++++++---- >> drivers/irqchip/irq-gic-v4.c | 11 +++++++++++ >> include/kvm/arm_vgic.h | 3 +++ >> include/linux/irqchip/arm-gic-v4.h | 4 ++++ >> 6 files changed, 49 insertions(+), 4 deletions(-) >> >> diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c >> index b5fa73c9fd35..b0da74809187 100644 >> --- a/arch/arm64/kvm/vgic/vgic-v4.c >> +++ b/arch/arm64/kvm/vgic/vgic-v4.c >> @@ -353,6 +353,22 @@ int vgic_v4_load(struct kvm_vcpu *vcpu) >> return err; >> } >> >> +void vgic_v4_commit(struct kvm_vcpu *vcpu) >> +{ >> + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; >> + >> + /* >> + * No need to wait for the vPE to be ready across a shallow guest >> + * exit, as only a vcpu_put will invalidate it. >> + */ >> + if (vpe->vpe_ready) >> + return; >> + >> + its_commit_vpe(vpe); >> + >> + vpe->vpe_ready = true; > > This should be written as: > > if (!ready) > commit(); > > and ready being driven by the commit() call itself. > >> +} >> + >> static struct vgic_its *vgic_get_its(struct kvm *kvm, >> struct kvm_kernel_irq_routing_entry *irq_entry) >> { >> diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c >> index c3643b7f101b..1c597c9885fa 100644 >> --- a/arch/arm64/kvm/vgic/vgic.c >> +++ b/arch/arm64/kvm/vgic/vgic.c >> @@ -915,6 +915,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) >> >> if (can_access_vgic_from_kernel()) >> vgic_restore_state(vcpu); >> + >> + if (vgic_supports_direct_msis(vcpu->kvm)) >> + vgic_v4_commit(vcpu); >> } >> >> void kvm_vgic_load(struct kvm_vcpu *vcpu) >> diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c >> index 22f427135c6b..f30aba14933e 100644 >> --- a/drivers/irqchip/irq-gic-v3-its.c >> +++ b/drivers/irqchip/irq-gic-v3-its.c >> @@ -3842,8 +3842,6 @@ static void its_vpe_schedule(struct its_vpe *vpe) >> val |= vpe->idai ? GICR_VPENDBASER_IDAI : 0; >> val |= GICR_VPENDBASER_Valid; >> gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); >> - >> - its_wait_vpt_parse_complete(); >> } >> >> static void its_vpe_deschedule(struct its_vpe *vpe) >> @@ -3855,6 +3853,8 @@ static void its_vpe_deschedule(struct its_vpe *vpe) >> >> vpe->idai = !!(val & GICR_VPENDBASER_IDAI); >> vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast); >> + >> + vpe->vpe_ready = false; > > This should be set from the its_make_vpe_non_resident() call. > >> } >> >> static void its_vpe_invall(struct its_vpe *vpe) >> @@ -3891,6 +3891,10 @@ static int its_vpe_set_vcpu_affinity(struct >> irq_data *d, void *vcpu_info) >> its_vpe_deschedule(vpe); >> return 0; >> >> + case COMMIT_VPE: >> + its_wait_vpt_parse_complete(); >> + return 0; >> + >> case INVALL_VPE: >> its_vpe_invall(vpe); >> return 0; >> @@ -4052,8 +4056,6 @@ static void its_vpe_4_1_schedule(struct its_vpe *vpe, >> val |= FIELD_PREP(GICR_VPENDBASER_4_1_VPEID, vpe->vpe_id); >> >> gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); >> - >> - its_wait_vpt_parse_complete(); >> } >> >> static void its_vpe_4_1_deschedule(struct its_vpe *vpe, >> @@ -4091,6 +4093,8 @@ static void its_vpe_4_1_deschedule(struct its_vpe *vpe, >> GICR_VPENDBASER_PendingLast); >> vpe->pending_last = true; >> } >> + >> + vpe->vpe_ready = false; >> } >> >> static void its_vpe_4_1_invall(struct its_vpe *vpe) >> @@ -4128,6 +4132,10 @@ static int its_vpe_4_1_set_vcpu_affinity(struct >> irq_data *d, void *vcpu_info) >> its_vpe_4_1_deschedule(vpe, info); >> return 0; >> >> + case COMMIT_VPE: >> + its_wait_vpt_parse_complete(); >> + return 0; >> + >> case INVALL_VPE: >> its_vpe_4_1_invall(vpe); >> return 0; >> diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c >> index 0c18714ae13e..6cea71a4e68b 100644 >> --- a/drivers/irqchip/irq-gic-v4.c >> +++ b/drivers/irqchip/irq-gic-v4.c >> @@ -258,6 +258,17 @@ int its_make_vpe_resident(struct its_vpe *vpe, >> bool g0en, bool g1en) >> return ret; >> } >> >> +int its_commit_vpe(struct its_vpe *vpe) >> +{ >> + struct its_cmd_info info = { >> + .cmd_type = COMMIT_VPE, >> + }; >> + >> + WARN_ON(preemptible()); >> + >> + return its_send_vpe_cmd(vpe, &info); >> +} >> + >> int its_invall_vpe(struct its_vpe *vpe) >> { >> struct its_cmd_info info = { >> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h >> index a8d8fdcd3723..f2170df6cf7c 100644 >> --- a/include/kvm/arm_vgic.h >> +++ b/include/kvm/arm_vgic.h >> @@ -401,7 +401,10 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq, >> int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq, >> struct kvm_kernel_irq_routing_entry *irq_entry); >> >> +void vgic_v4_commit(struct kvm_vcpu *vcpu); >> + >> int vgic_v4_load(struct kvm_vcpu *vcpu); >> + > > Spurious new lines. > >> int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db); >> >> #endif /* __KVM_ARM_VGIC_H */ >> diff --git a/include/linux/irqchip/arm-gic-v4.h >> b/include/linux/irqchip/arm-gic-v4.h >> index 6976b8331b60..936d88e482a9 100644 >> --- a/include/linux/irqchip/arm-gic-v4.h >> +++ b/include/linux/irqchip/arm-gic-v4.h >> @@ -75,6 +75,8 @@ struct its_vpe { >> u16 vpe_id; >> /* Pending VLPIs on schedule out? */ >> bool pending_last; >> + /* VPT parse complete */ >> + bool vpe_ready; >> }; >> >> /* >> @@ -104,6 +106,7 @@ enum its_vcpu_info_cmd_type { >> PROP_UPDATE_AND_INV_VLPI, >> SCHEDULE_VPE, >> DESCHEDULE_VPE, >> + COMMIT_VPE, >> INVALL_VPE, >> PROP_UPDATE_VSGI, >> }; >> @@ -129,6 +132,7 @@ int its_alloc_vcpu_irqs(struct its_vm *vm); >> void its_free_vcpu_irqs(struct its_vm *vm); >> int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en); >> int its_make_vpe_non_resident(struct its_vpe *vpe, bool db); >> +int its_commit_vpe(struct its_vpe *vpe); >> int its_invall_vpe(struct its_vpe *vpe); >> int its_map_vlpi(int irq, struct its_vlpi_map *map); >> int its_get_vlpi(int irq, struct its_vlpi_map *map); > > In order to speed up the respin round-trip, I've taken the liberty > to refactor this patch myself. Please have a look at [1] and let > me know if you're OK with it. I have looked at it and am OK. By the way, will the first patch (set the delay_us to 1) be picked up together? Thanks, Shenming > > Thanks, > > M. > > [1] https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/commit/?h=kvm-arm64/misc-5.11&id=57e3cebd022fbc035dcf190ac789fd2ffc747f5b
On 2020-11-30 12:12, Shenming Lu wrote: > On 2020/11/30 19:22, Marc Zyngier wrote: >> On 2020-11-28 14:18, Shenming Lu wrote: >>> In order to further reduce the impact of the wait delay of the >>> VPT analysis, we can delay the execution of the polling on the >>> GICR_VPENDBASER.Dirty bit (call it from kvm_vgic_flush_hwstate() >>> corresponding to vPE resident), let the GIC and the CPU work in >>> parallel on the entry path. >>> >>> Signed-off-by: Shenming Lu <lushenming@huawei.com> >>> --- >>> arch/arm64/kvm/vgic/vgic-v4.c | 16 ++++++++++++++++ >>> arch/arm64/kvm/vgic/vgic.c | 3 +++ >>> drivers/irqchip/irq-gic-v3-its.c | 16 ++++++++++++---- >>> drivers/irqchip/irq-gic-v4.c | 11 +++++++++++ >>> include/kvm/arm_vgic.h | 3 +++ >>> include/linux/irqchip/arm-gic-v4.h | 4 ++++ >>> 6 files changed, 49 insertions(+), 4 deletions(-) >>> >>> diff --git a/arch/arm64/kvm/vgic/vgic-v4.c >>> b/arch/arm64/kvm/vgic/vgic-v4.c >>> index b5fa73c9fd35..b0da74809187 100644 >>> --- a/arch/arm64/kvm/vgic/vgic-v4.c >>> +++ b/arch/arm64/kvm/vgic/vgic-v4.c >>> @@ -353,6 +353,22 @@ int vgic_v4_load(struct kvm_vcpu *vcpu) >>> return err; >>> } >>> >>> +void vgic_v4_commit(struct kvm_vcpu *vcpu) >>> +{ >>> + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; >>> + >>> + /* >>> + * No need to wait for the vPE to be ready across a shallow >>> guest >>> + * exit, as only a vcpu_put will invalidate it. >>> + */ >>> + if (vpe->vpe_ready) >>> + return; >>> + >>> + its_commit_vpe(vpe); >>> + >>> + vpe->vpe_ready = true; >> >> This should be written as: >> >> if (!ready) >> commit(); >> >> and ready being driven by the commit() call itself. >> >>> +} >>> + >>> static struct vgic_its *vgic_get_its(struct kvm *kvm, >>> struct kvm_kernel_irq_routing_entry *irq_entry) >>> { >>> diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c >>> index c3643b7f101b..1c597c9885fa 100644 >>> --- a/arch/arm64/kvm/vgic/vgic.c >>> +++ b/arch/arm64/kvm/vgic/vgic.c >>> @@ -915,6 +915,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu >>> *vcpu) >>> >>> if (can_access_vgic_from_kernel()) >>> vgic_restore_state(vcpu); >>> + >>> + if (vgic_supports_direct_msis(vcpu->kvm)) >>> + vgic_v4_commit(vcpu); >>> } >>> >>> void kvm_vgic_load(struct kvm_vcpu *vcpu) >>> diff --git a/drivers/irqchip/irq-gic-v3-its.c >>> b/drivers/irqchip/irq-gic-v3-its.c >>> index 22f427135c6b..f30aba14933e 100644 >>> --- a/drivers/irqchip/irq-gic-v3-its.c >>> +++ b/drivers/irqchip/irq-gic-v3-its.c >>> @@ -3842,8 +3842,6 @@ static void its_vpe_schedule(struct its_vpe >>> *vpe) >>> val |= vpe->idai ? GICR_VPENDBASER_IDAI : 0; >>> val |= GICR_VPENDBASER_Valid; >>> gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); >>> - >>> - its_wait_vpt_parse_complete(); >>> } >>> >>> static void its_vpe_deschedule(struct its_vpe *vpe) >>> @@ -3855,6 +3853,8 @@ static void its_vpe_deschedule(struct its_vpe >>> *vpe) >>> >>> vpe->idai = !!(val & GICR_VPENDBASER_IDAI); >>> vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast); >>> + >>> + vpe->vpe_ready = false; >> >> This should be set from the its_make_vpe_non_resident() call. >> >>> } >>> >>> static void its_vpe_invall(struct its_vpe *vpe) >>> @@ -3891,6 +3891,10 @@ static int its_vpe_set_vcpu_affinity(struct >>> irq_data *d, void *vcpu_info) >>> its_vpe_deschedule(vpe); >>> return 0; >>> >>> + case COMMIT_VPE: >>> + its_wait_vpt_parse_complete(); >>> + return 0; >>> + >>> case INVALL_VPE: >>> its_vpe_invall(vpe); >>> return 0; >>> @@ -4052,8 +4056,6 @@ static void its_vpe_4_1_schedule(struct its_vpe >>> *vpe, >>> val |= FIELD_PREP(GICR_VPENDBASER_4_1_VPEID, vpe->vpe_id); >>> >>> gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); >>> - >>> - its_wait_vpt_parse_complete(); >>> } >>> >>> static void its_vpe_4_1_deschedule(struct its_vpe *vpe, >>> @@ -4091,6 +4093,8 @@ static void its_vpe_4_1_deschedule(struct >>> its_vpe *vpe, >>> GICR_VPENDBASER_PendingLast); >>> vpe->pending_last = true; >>> } >>> + >>> + vpe->vpe_ready = false; >>> } >>> >>> static void its_vpe_4_1_invall(struct its_vpe *vpe) >>> @@ -4128,6 +4132,10 @@ static int >>> its_vpe_4_1_set_vcpu_affinity(struct >>> irq_data *d, void *vcpu_info) >>> its_vpe_4_1_deschedule(vpe, info); >>> return 0; >>> >>> + case COMMIT_VPE: >>> + its_wait_vpt_parse_complete(); >>> + return 0; >>> + >>> case INVALL_VPE: >>> its_vpe_4_1_invall(vpe); >>> return 0; >>> diff --git a/drivers/irqchip/irq-gic-v4.c >>> b/drivers/irqchip/irq-gic-v4.c >>> index 0c18714ae13e..6cea71a4e68b 100644 >>> --- a/drivers/irqchip/irq-gic-v4.c >>> +++ b/drivers/irqchip/irq-gic-v4.c >>> @@ -258,6 +258,17 @@ int its_make_vpe_resident(struct its_vpe *vpe, >>> bool g0en, bool g1en) >>> return ret; >>> } >>> >>> +int its_commit_vpe(struct its_vpe *vpe) >>> +{ >>> + struct its_cmd_info info = { >>> + .cmd_type = COMMIT_VPE, >>> + }; >>> + >>> + WARN_ON(preemptible()); >>> + >>> + return its_send_vpe_cmd(vpe, &info); >>> +} >>> + >>> int its_invall_vpe(struct its_vpe *vpe) >>> { >>> struct its_cmd_info info = { >>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h >>> index a8d8fdcd3723..f2170df6cf7c 100644 >>> --- a/include/kvm/arm_vgic.h >>> +++ b/include/kvm/arm_vgic.h >>> @@ -401,7 +401,10 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, >>> int irq, >>> int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq, >>> struct kvm_kernel_irq_routing_entry *irq_entry); >>> >>> +void vgic_v4_commit(struct kvm_vcpu *vcpu); >>> + >>> int vgic_v4_load(struct kvm_vcpu *vcpu); >>> + >> >> Spurious new lines. >> >>> int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db); >>> >>> #endif /* __KVM_ARM_VGIC_H */ >>> diff --git a/include/linux/irqchip/arm-gic-v4.h >>> b/include/linux/irqchip/arm-gic-v4.h >>> index 6976b8331b60..936d88e482a9 100644 >>> --- a/include/linux/irqchip/arm-gic-v4.h >>> +++ b/include/linux/irqchip/arm-gic-v4.h >>> @@ -75,6 +75,8 @@ struct its_vpe { >>> u16 vpe_id; >>> /* Pending VLPIs on schedule out? */ >>> bool pending_last; >>> + /* VPT parse complete */ >>> + bool vpe_ready; >>> }; >>> >>> /* >>> @@ -104,6 +106,7 @@ enum its_vcpu_info_cmd_type { >>> PROP_UPDATE_AND_INV_VLPI, >>> SCHEDULE_VPE, >>> DESCHEDULE_VPE, >>> + COMMIT_VPE, >>> INVALL_VPE, >>> PROP_UPDATE_VSGI, >>> }; >>> @@ -129,6 +132,7 @@ int its_alloc_vcpu_irqs(struct its_vm *vm); >>> void its_free_vcpu_irqs(struct its_vm *vm); >>> int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool >>> g1en); >>> int its_make_vpe_non_resident(struct its_vpe *vpe, bool db); >>> +int its_commit_vpe(struct its_vpe *vpe); >>> int its_invall_vpe(struct its_vpe *vpe); >>> int its_map_vlpi(int irq, struct its_vlpi_map *map); >>> int its_get_vlpi(int irq, struct its_vlpi_map *map); >> >> In order to speed up the respin round-trip, I've taken the liberty >> to refactor this patch myself. Please have a look at [1] and let >> me know if you're OK with it. > > I have looked at it and am OK. > > By the way, will the first patch (set the delay_us to 1) be picked up > together? I'll route it via the irqchip tree. Thanks, M.
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index b5fa73c9fd35..b0da74809187 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -353,6 +353,22 @@ int vgic_v4_load(struct kvm_vcpu *vcpu) return err; } +void vgic_v4_commit(struct kvm_vcpu *vcpu) +{ + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + + /* + * No need to wait for the vPE to be ready across a shallow guest + * exit, as only a vcpu_put will invalidate it. + */ + if (vpe->vpe_ready) + return; + + its_commit_vpe(vpe); + + vpe->vpe_ready = true; +} + static struct vgic_its *vgic_get_its(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *irq_entry) { diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index c3643b7f101b..1c597c9885fa 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -915,6 +915,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) if (can_access_vgic_from_kernel()) vgic_restore_state(vcpu); + + if (vgic_supports_direct_msis(vcpu->kvm)) + vgic_v4_commit(vcpu); } void kvm_vgic_load(struct kvm_vcpu *vcpu) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 22f427135c6b..f30aba14933e 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3842,8 +3842,6 @@ static void its_vpe_schedule(struct its_vpe *vpe) val |= vpe->idai ? GICR_VPENDBASER_IDAI : 0; val |= GICR_VPENDBASER_Valid; gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); - - its_wait_vpt_parse_complete(); } static void its_vpe_deschedule(struct its_vpe *vpe) @@ -3855,6 +3853,8 @@ static void its_vpe_deschedule(struct its_vpe *vpe) vpe->idai = !!(val & GICR_VPENDBASER_IDAI); vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast); + + vpe->vpe_ready = false; } static void its_vpe_invall(struct its_vpe *vpe) @@ -3891,6 +3891,10 @@ static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) its_vpe_deschedule(vpe); return 0; + case COMMIT_VPE: + its_wait_vpt_parse_complete(); + return 0; + case INVALL_VPE: its_vpe_invall(vpe); return 0; @@ -4052,8 +4056,6 @@ static void its_vpe_4_1_schedule(struct its_vpe *vpe, val |= FIELD_PREP(GICR_VPENDBASER_4_1_VPEID, vpe->vpe_id); gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); - - its_wait_vpt_parse_complete(); } static void its_vpe_4_1_deschedule(struct its_vpe *vpe, @@ -4091,6 +4093,8 @@ static void its_vpe_4_1_deschedule(struct its_vpe *vpe, GICR_VPENDBASER_PendingLast); vpe->pending_last = true; } + + vpe->vpe_ready = false; } static void its_vpe_4_1_invall(struct its_vpe *vpe) @@ -4128,6 +4132,10 @@ static int its_vpe_4_1_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) its_vpe_4_1_deschedule(vpe, info); return 0; + case COMMIT_VPE: + its_wait_vpt_parse_complete(); + return 0; + case INVALL_VPE: its_vpe_4_1_invall(vpe); return 0; diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c index 0c18714ae13e..6cea71a4e68b 100644 --- a/drivers/irqchip/irq-gic-v4.c +++ b/drivers/irqchip/irq-gic-v4.c @@ -258,6 +258,17 @@ int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en) return ret; } +int its_commit_vpe(struct its_vpe *vpe) +{ + struct its_cmd_info info = { + .cmd_type = COMMIT_VPE, + }; + + WARN_ON(preemptible()); + + return its_send_vpe_cmd(vpe, &info); +} + int its_invall_vpe(struct its_vpe *vpe) { struct its_cmd_info info = { diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index a8d8fdcd3723..f2170df6cf7c 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -401,7 +401,10 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq, int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq, struct kvm_kernel_irq_routing_entry *irq_entry); +void vgic_v4_commit(struct kvm_vcpu *vcpu); + int vgic_v4_load(struct kvm_vcpu *vcpu); + int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db); #endif /* __KVM_ARM_VGIC_H */ diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 6976b8331b60..936d88e482a9 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -75,6 +75,8 @@ struct its_vpe { u16 vpe_id; /* Pending VLPIs on schedule out? */ bool pending_last; + /* VPT parse complete */ + bool vpe_ready; }; /* @@ -104,6 +106,7 @@ enum its_vcpu_info_cmd_type { PROP_UPDATE_AND_INV_VLPI, SCHEDULE_VPE, DESCHEDULE_VPE, + COMMIT_VPE, INVALL_VPE, PROP_UPDATE_VSGI, }; @@ -129,6 +132,7 @@ int its_alloc_vcpu_irqs(struct its_vm *vm); void its_free_vcpu_irqs(struct its_vm *vm); int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en); int its_make_vpe_non_resident(struct its_vpe *vpe, bool db); +int its_commit_vpe(struct its_vpe *vpe); int its_invall_vpe(struct its_vpe *vpe); int its_map_vlpi(int irq, struct its_vlpi_map *map); int its_get_vlpi(int irq, struct its_vlpi_map *map);
In order to further reduce the impact of the wait delay of the VPT analysis, we can delay the execution of the polling on the GICR_VPENDBASER.Dirty bit (call it from kvm_vgic_flush_hwstate() corresponding to vPE resident), let the GIC and the CPU work in parallel on the entry path. Signed-off-by: Shenming Lu <lushenming@huawei.com> --- arch/arm64/kvm/vgic/vgic-v4.c | 16 ++++++++++++++++ arch/arm64/kvm/vgic/vgic.c | 3 +++ drivers/irqchip/irq-gic-v3-its.c | 16 ++++++++++++---- drivers/irqchip/irq-gic-v4.c | 11 +++++++++++ include/kvm/arm_vgic.h | 3 +++ include/linux/irqchip/arm-gic-v4.h | 4 ++++ 6 files changed, 49 insertions(+), 4 deletions(-)