Message ID | 20171012104141.26902-9-christoffer.dall@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Oct 12, 2017 at 12:41:12PM +0200, Christoffer Dall wrote: > Avoid saving the guest VFP registers and restoring the host VFP > registers on every exit from the VM. Only when we're about to run > userspace or other threads in the kernel do we really have to switch the > state back to the host state. > > We still initially configure the VFP registers to trap when entering the > VM, but the difference is that we now leave the guest state in the > hardware registers while running the VM. running the host. > > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> > --- > arch/arm64/include/asm/kvm_emulate.h | 5 ++++ > arch/arm64/include/asm/kvm_host.h | 3 +++ > arch/arm64/kernel/asm-offsets.c | 1 + > arch/arm64/kvm/hyp/entry.S | 3 +++ > arch/arm64/kvm/hyp/switch.c | 47 +++++++++++------------------------- > arch/arm64/kvm/hyp/sysreg-sr.c | 21 +++++++++++++--- > 6 files changed, 44 insertions(+), 36 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h > index 1fbfe96..630dd60 100644 > --- a/arch/arm64/include/asm/kvm_emulate.h > +++ b/arch/arm64/include/asm/kvm_emulate.h > @@ -56,6 +56,11 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) > return (unsigned long *)&vcpu->arch.hcr_el2; > } > > +static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) > +{ > + return (!(vcpu->arch.hcr_el2 & HCR_RW)); nit: no need for the outer (). > +} > + > static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) > { > return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > index 7d3bfa7..5e09eb9 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -210,6 +210,9 @@ struct kvm_vcpu_arch { > /* Guest debug state */ > u64 debug_flags; > > + /* 1 if the guest VFP state is loaded into the hardware */ > + u64 guest_vfp_loaded; > + Is there a chance we'll want other flags like this? Should we just make this a lazy state flags field with the (currently only) flag VFP? If not, then a bool would be nicer, although I see below the u64 was chosen in order for the 'str' to be used. > /* > * We maintain more than a single set of debug registers to support > * debugging the guest from the host and to maintain separate host and > diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c > index 612021d..9946732 100644 > --- a/arch/arm64/kernel/asm-offsets.c > +++ b/arch/arm64/kernel/asm-offsets.c > @@ -133,6 +133,7 @@ int main(void) > DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); > DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); > DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); > + DEFINE(VCPU_GUEST_VFP_LOADED, offsetof(struct kvm_vcpu, arch.guest_vfp_loaded)); > DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); > DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); > DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); > diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S > index 76cd48f..b3e7191 100644 > --- a/arch/arm64/kvm/hyp/entry.S > +++ b/arch/arm64/kvm/hyp/entry.S > @@ -185,6 +185,9 @@ alternative_endif > add x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) > bl __fpsimd_restore_state > > + mov x0, #1 > + str x0, [x3, #VCPU_GUEST_VFP_LOADED] > + > // Skip restoring fpexc32 for AArch64 guests > mrs x1, hcr_el2 > tbnz x1, #HCR_RW_SHIFT, 1f > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c > index 7703d63..ef05c59 100644 > --- a/arch/arm64/kvm/hyp/switch.c > +++ b/arch/arm64/kvm/hyp/switch.c > @@ -23,43 +23,31 @@ > #include <asm/kvm_hyp.h> > #include <asm/fpsimd.h> > > -static bool __hyp_text __fpsimd_enabled_nvhe(void) > -{ > - return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP); > -} > - > -static bool __hyp_text __fpsimd_enabled_vhe(void) > -{ > - return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN); > -} > - > -static hyp_alternate_select(__fpsimd_is_enabled, > - __fpsimd_enabled_nvhe, __fpsimd_enabled_vhe, > - ARM64_HAS_VIRT_HOST_EXTN); > - > -bool __hyp_text __fpsimd_enabled(void) > -{ > - return __fpsimd_is_enabled()(); > -} > - > -static void __hyp_text __activate_traps_vhe(void) > +static void __hyp_text __activate_traps_vhe(struct kvm_vcpu *vcpu) > { > u64 val; > > val = read_sysreg(cpacr_el1); > val |= CPACR_EL1_TTA; > - val &= ~CPACR_EL1_FPEN; > + if (vcpu->arch.guest_vfp_loaded) > + val |= CPACR_EL1_FPEN; > + else > + val &= ~CPACR_EL1_FPEN; > write_sysreg(val, cpacr_el1); > > write_sysreg(__kvm_hyp_vector, vbar_el1); > } > > -static void __hyp_text __activate_traps_nvhe(void) > +static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) > { > u64 val; > > val = CPTR_EL2_DEFAULT; > - val |= CPTR_EL2_TTA | CPTR_EL2_TFP; > + val |= CPTR_EL2_TTA; > + if (vcpu->arch.guest_vfp_loaded) > + val &= ~CPTR_EL2_TFP; > + else > + val |= CPTR_EL2_TFP; > write_sysreg(val, cptr_el2); > } > > @@ -81,7 +69,8 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) > * it will cause an exception. > */ > val = vcpu->arch.hcr_el2; > - if (!(val & HCR_RW) && system_supports_fpsimd()) { > + if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd() && > + !vcpu->arch.guest_vfp_loaded) { > write_sysreg(1 << 30, fpexc32_el2); > isb(); > } > @@ -97,7 +86,7 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) > write_sysreg(0, pmselr_el0); > write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); > write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); > - __activate_traps_arch()(); > + __activate_traps_arch()(vcpu); > } > > static void __hyp_text __deactivate_traps_vhe(void) > @@ -273,7 +262,6 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) > { > struct kvm_cpu_context *host_ctxt; > struct kvm_cpu_context *guest_ctxt; > - bool fp_enabled; > u64 exit_code; > > vcpu = kern_hyp_va(vcpu); > @@ -355,8 +343,6 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) > /* 0 falls through to be handled out of EL2 */ > } > > - fp_enabled = __fpsimd_enabled(); > - > __sysreg_save_guest_state(guest_ctxt); > __sysreg32_save_state(vcpu); > __timer_disable_traps(vcpu); > @@ -367,11 +353,6 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) > > __sysreg_restore_host_state(host_ctxt); > > - if (fp_enabled) { > - __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); > - __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); > - } > - > __debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); > /* > * This must come after restoring the host sysregs, since a non-VHE > diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c > index b7438c8..c4a3714 100644 > --- a/arch/arm64/kvm/hyp/sysreg-sr.c > +++ b/arch/arm64/kvm/hyp/sysreg-sr.c > @@ -19,6 +19,7 @@ > #include <linux/kvm_host.h> > > #include <asm/kvm_asm.h> > +#include <asm/kvm_emulate.h> > #include <asm/kvm_hyp.h> > > /* Yes, this does nothing, on purpose */ > @@ -137,6 +138,11 @@ void __hyp_text __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt) > __sysreg_restore_common_state(ctxt); > } > > +static void __hyp_text __fpsimd32_save_state(struct kvm_cpu_context *ctxt) > +{ > + ctxt->sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2); > +} > + > void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) > { > u64 *spsr, *sysreg; > @@ -155,9 +161,6 @@ void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) > sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); > sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); > > - if (__fpsimd_enabled()) > - sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2); > - > if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) > sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); > } > @@ -209,4 +212,16 @@ void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) > */ > void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) > { > + struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context; > + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; > + > + /* Restore host FP/SIMD state */ > + if (vcpu->arch.guest_vfp_loaded) { > + if (vcpu_el1_is_32bit(vcpu)) > + kvm_call_hyp(__fpsimd32_save_state, > + kern_hyp_va(guest_ctxt)); nit: might be nice to use {} since we need two lines. > + __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); > + __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); > + vcpu->arch.guest_vfp_loaded = 0; > + } > } > -- > 2.9.0 > Otherwise, Reviewed-by: Andrew Jones <drjones@redhat.com>
On Thu, Oct 12, 2017 at 12:41:12PM +0200, Christoffer Dall wrote: > Avoid saving the guest VFP registers and restoring the host VFP > registers on every exit from the VM. Only when we're about to run > userspace or other threads in the kernel do we really have to switch the > state back to the host state. Rik van Riel's recently post patch "[PATCH v2 0/2] x86,kvm: move qemu/guest FPU switching out to kvm_arch_vcpu_ioctl_run" indicates that for x86 they only need to swap guest and userspace VFP registers before exiting VCPU_RUN to userspace, not for running other threads. I imagine that's the same for ARM as well. If so, then I think this hunk > @@ -209,4 +212,16 @@ void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) > */ > void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) > { > + struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context; > + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; > + > + /* Restore host FP/SIMD state */ > + if (vcpu->arch.guest_vfp_loaded) { > + if (vcpu_el1_is_32bit(vcpu)) > + kvm_call_hyp(__fpsimd32_save_state, > + kern_hyp_va(guest_ctxt)); > + __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); > + __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); > + vcpu->arch.guest_vfp_loaded = 0; > + } > } could be moved to the return of kvm_arch_vcpu_ioctl_run(). Thanks, drew
Hi Christoffer, On Thu, Oct 12, 2017 at 12:41:12PM +0200, Christoffer Dall wrote: > Avoid saving the guest VFP registers and restoring the host VFP > registers on every exit from the VM. Only when we're about to run > userspace or other threads in the kernel do we really have to switch the > state back to the host state. > > We still initially configure the VFP registers to trap when entering the > VM, but the difference is that we now leave the guest state in the > hardware registers while running the VM. > > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> > --- > arch/arm64/include/asm/kvm_emulate.h | 5 ++++ > arch/arm64/include/asm/kvm_host.h | 3 +++ > arch/arm64/kernel/asm-offsets.c | 1 + > arch/arm64/kvm/hyp/entry.S | 3 +++ > arch/arm64/kvm/hyp/switch.c | 47 +++++++++++------------------------- > arch/arm64/kvm/hyp/sysreg-sr.c | 21 +++++++++++++--- > 6 files changed, 44 insertions(+), 36 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h > index 1fbfe96..630dd60 100644 > --- a/arch/arm64/include/asm/kvm_emulate.h > +++ b/arch/arm64/include/asm/kvm_emulate.h > @@ -56,6 +56,11 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) > return (unsigned long *)&vcpu->arch.hcr_el2; > } > > +static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) > +{ > + return (!(vcpu->arch.hcr_el2 & HCR_RW)); > +} > + > static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) > { > return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > index 7d3bfa7..5e09eb9 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -210,6 +210,9 @@ struct kvm_vcpu_arch { > /* Guest debug state */ > u64 debug_flags; > > + /* 1 if the guest VFP state is loaded into the hardware */ > + u64 guest_vfp_loaded; May it be just u8/bool? Yury > + > /* > * We maintain more than a single set of debug registers to support > * debugging the guest from the host and to maintain separate host and > diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c > index 612021d..9946732 100644 > --- a/arch/arm64/kernel/asm-offsets.c > +++ b/arch/arm64/kernel/asm-offsets.c > @@ -133,6 +133,7 @@ int main(void) > DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); > DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); > DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); > + DEFINE(VCPU_GUEST_VFP_LOADED, offsetof(struct kvm_vcpu, arch.guest_vfp_loaded)); > DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); > DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); > DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); > diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S > index 76cd48f..b3e7191 100644 > --- a/arch/arm64/kvm/hyp/entry.S > +++ b/arch/arm64/kvm/hyp/entry.S > @@ -185,6 +185,9 @@ alternative_endif > add x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) > bl __fpsimd_restore_state > > + mov x0, #1 > + str x0, [x3, #VCPU_GUEST_VFP_LOADED] > + > // Skip restoring fpexc32 for AArch64 guests > mrs x1, hcr_el2 > tbnz x1, #HCR_RW_SHIFT, 1f > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c > index 7703d63..ef05c59 100644 > --- a/arch/arm64/kvm/hyp/switch.c > +++ b/arch/arm64/kvm/hyp/switch.c > @@ -23,43 +23,31 @@ > #include <asm/kvm_hyp.h> > #include <asm/fpsimd.h> > > -static bool __hyp_text __fpsimd_enabled_nvhe(void) > -{ > - return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP); > -} > - > -static bool __hyp_text __fpsimd_enabled_vhe(void) > -{ > - return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN); > -} > - > -static hyp_alternate_select(__fpsimd_is_enabled, > - __fpsimd_enabled_nvhe, __fpsimd_enabled_vhe, > - ARM64_HAS_VIRT_HOST_EXTN); > - > -bool __hyp_text __fpsimd_enabled(void) > -{ > - return __fpsimd_is_enabled()(); > -} > - > -static void __hyp_text __activate_traps_vhe(void) > +static void __hyp_text __activate_traps_vhe(struct kvm_vcpu *vcpu) > { > u64 val; > > val = read_sysreg(cpacr_el1); > val |= CPACR_EL1_TTA; > - val &= ~CPACR_EL1_FPEN; > + if (vcpu->arch.guest_vfp_loaded) > + val |= CPACR_EL1_FPEN; > + else > + val &= ~CPACR_EL1_FPEN; > write_sysreg(val, cpacr_el1); > > write_sysreg(__kvm_hyp_vector, vbar_el1); > } > > -static void __hyp_text __activate_traps_nvhe(void) > +static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) > { > u64 val; > > val = CPTR_EL2_DEFAULT; > - val |= CPTR_EL2_TTA | CPTR_EL2_TFP; > + val |= CPTR_EL2_TTA; > + if (vcpu->arch.guest_vfp_loaded) > + val &= ~CPTR_EL2_TFP; > + else > + val |= CPTR_EL2_TFP; > write_sysreg(val, cptr_el2); > } > > @@ -81,7 +69,8 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) > * it will cause an exception. > */ > val = vcpu->arch.hcr_el2; > - if (!(val & HCR_RW) && system_supports_fpsimd()) { > + if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd() && > + !vcpu->arch.guest_vfp_loaded) { > write_sysreg(1 << 30, fpexc32_el2); > isb(); > } > @@ -97,7 +86,7 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) > write_sysreg(0, pmselr_el0); > write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); > write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); > - __activate_traps_arch()(); > + __activate_traps_arch()(vcpu); > } > > static void __hyp_text __deactivate_traps_vhe(void) > @@ -273,7 +262,6 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) > { > struct kvm_cpu_context *host_ctxt; > struct kvm_cpu_context *guest_ctxt; > - bool fp_enabled; > u64 exit_code; > > vcpu = kern_hyp_va(vcpu); > @@ -355,8 +343,6 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) > /* 0 falls through to be handled out of EL2 */ > } > > - fp_enabled = __fpsimd_enabled(); > - > __sysreg_save_guest_state(guest_ctxt); > __sysreg32_save_state(vcpu); > __timer_disable_traps(vcpu); > @@ -367,11 +353,6 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) > > __sysreg_restore_host_state(host_ctxt); > > - if (fp_enabled) { > - __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); > - __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); > - } > - > __debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); > /* > * This must come after restoring the host sysregs, since a non-VHE > diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c > index b7438c8..c4a3714 100644 > --- a/arch/arm64/kvm/hyp/sysreg-sr.c > +++ b/arch/arm64/kvm/hyp/sysreg-sr.c > @@ -19,6 +19,7 @@ > #include <linux/kvm_host.h> > > #include <asm/kvm_asm.h> > +#include <asm/kvm_emulate.h> > #include <asm/kvm_hyp.h> > > /* Yes, this does nothing, on purpose */ > @@ -137,6 +138,11 @@ void __hyp_text __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt) > __sysreg_restore_common_state(ctxt); > } > > +static void __hyp_text __fpsimd32_save_state(struct kvm_cpu_context *ctxt) > +{ > + ctxt->sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2); > +} > + > void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) > { > u64 *spsr, *sysreg; > @@ -155,9 +161,6 @@ void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) > sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); > sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); > > - if (__fpsimd_enabled()) > - sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2); > - > if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) > sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); > } > @@ -209,4 +212,16 @@ void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) > */ > void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) > { > + struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context; > + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; > + > + /* Restore host FP/SIMD state */ > + if (vcpu->arch.guest_vfp_loaded) { > + if (vcpu_el1_is_32bit(vcpu)) > + kvm_call_hyp(__fpsimd32_save_state, > + kern_hyp_va(guest_ctxt)); > + __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); > + __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); > + vcpu->arch.guest_vfp_loaded = 0; > + } > } > -- > 2.9.0
Hi Yury, On Sat, Nov 25, 2017 at 10:52:21AM +0300, Yury Norov wrote: > > On Thu, Oct 12, 2017 at 12:41:12PM +0200, Christoffer Dall wrote: > > Avoid saving the guest VFP registers and restoring the host VFP > > registers on every exit from the VM. Only when we're about to run > > userspace or other threads in the kernel do we really have to switch the > > state back to the host state. > > > > We still initially configure the VFP registers to trap when entering the > > VM, but the difference is that we now leave the guest state in the > > hardware registers while running the VM. > > > > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> > > --- > > arch/arm64/include/asm/kvm_emulate.h | 5 ++++ > > arch/arm64/include/asm/kvm_host.h | 3 +++ > > arch/arm64/kernel/asm-offsets.c | 1 + > > arch/arm64/kvm/hyp/entry.S | 3 +++ > > arch/arm64/kvm/hyp/switch.c | 47 +++++++++++------------------------- > > arch/arm64/kvm/hyp/sysreg-sr.c | 21 +++++++++++++--- > > 6 files changed, 44 insertions(+), 36 deletions(-) > > > > diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h > > index 1fbfe96..630dd60 100644 > > --- a/arch/arm64/include/asm/kvm_emulate.h > > +++ b/arch/arm64/include/asm/kvm_emulate.h > > @@ -56,6 +56,11 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) > > return (unsigned long *)&vcpu->arch.hcr_el2; > > } > > > > +static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) > > +{ > > + return (!(vcpu->arch.hcr_el2 & HCR_RW)); > > +} > > + > > static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) > > { > > return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > > index 7d3bfa7..5e09eb9 100644 > > --- a/arch/arm64/include/asm/kvm_host.h > > +++ b/arch/arm64/include/asm/kvm_host.h > > @@ -210,6 +210,9 @@ struct kvm_vcpu_arch { > > /* Guest debug state */ > > u64 debug_flags; > > > > + /* 1 if the guest VFP state is loaded into the hardware */ > > + u64 guest_vfp_loaded; > > May it be just u8/bool? > This particular field is accessed from assembly code, and I'm not sure what guarantees the compiler makes in terms of how a u8/bool is allocated with respect to padding and alignment, and I think that's why we've been using u64 fields in the past. I don't actually remember the details, but I'd rather err on the side of caution than trying to save a few bytes. However, if someone can convince me there's a completely safe way to do this, then I'm happy to change it. Thanks, -Christoffer
Hi Drew, On Wed, Nov 15, 2017 at 05:04:40PM +0100, Andrew Jones wrote: > On Thu, Oct 12, 2017 at 12:41:12PM +0200, Christoffer Dall wrote: > > Avoid saving the guest VFP registers and restoring the host VFP > > registers on every exit from the VM. Only when we're about to run > > userspace or other threads in the kernel do we really have to switch the > > state back to the host state. > > Rik van Riel's recently post patch "[PATCH v2 0/2] x86,kvm: move qemu/guest > FPU switching out to kvm_arch_vcpu_ioctl_run" indicates that for x86 they > only need to swap guest and userspace VFP registers before exiting VCPU_RUN > to userspace, not for running other threads. I imagine that's the same for > ARM as well. > > If so, then I think this hunk > > > @@ -209,4 +212,16 @@ void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) > > */ > > void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) > > { > > + struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context; > > + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; > > + > > + /* Restore host FP/SIMD state */ > > + if (vcpu->arch.guest_vfp_loaded) { > > + if (vcpu_el1_is_32bit(vcpu)) > > + kvm_call_hyp(__fpsimd32_save_state, > > + kern_hyp_va(guest_ctxt)); > > + __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); > > + __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); > > + vcpu->arch.guest_vfp_loaded = 0; > > + } > > } > > could be moved to the return of kvm_arch_vcpu_ioctl_run(). > That sounds cool. I'll keep this patch as it is now, and look at Rik's patches and post a follow up later, does that sound ok? Thanks, -Christoffer
On Tue, Nov 07, 2017 at 02:15:50PM +0100, Andrew Jones wrote: > On Thu, Oct 12, 2017 at 12:41:12PM +0200, Christoffer Dall wrote: > > Avoid saving the guest VFP registers and restoring the host VFP > > registers on every exit from the VM. Only when we're about to run > > userspace or other threads in the kernel do we really have to switch the > > state back to the host state. > > > > We still initially configure the VFP registers to trap when entering the > > VM, but the difference is that we now leave the guest state in the > > hardware registers while running the VM. > > running the host. > I actually did mean the VM, but I should clarify to mean as long as we're running the VCPU on this physical CPU, even if we trap to the host. > > > > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> > > --- > > arch/arm64/include/asm/kvm_emulate.h | 5 ++++ > > arch/arm64/include/asm/kvm_host.h | 3 +++ > > arch/arm64/kernel/asm-offsets.c | 1 + > > arch/arm64/kvm/hyp/entry.S | 3 +++ > > arch/arm64/kvm/hyp/switch.c | 47 +++++++++++------------------------- > > arch/arm64/kvm/hyp/sysreg-sr.c | 21 +++++++++++++--- > > 6 files changed, 44 insertions(+), 36 deletions(-) > > > > diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h > > index 1fbfe96..630dd60 100644 > > --- a/arch/arm64/include/asm/kvm_emulate.h > > +++ b/arch/arm64/include/asm/kvm_emulate.h > > @@ -56,6 +56,11 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) > > return (unsigned long *)&vcpu->arch.hcr_el2; > > } > > > > +static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) > > +{ > > + return (!(vcpu->arch.hcr_el2 & HCR_RW)); > > nit: no need for the outer (). > > > +} > > + > > static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) > > { > > return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > > index 7d3bfa7..5e09eb9 100644 > > --- a/arch/arm64/include/asm/kvm_host.h > > +++ b/arch/arm64/include/asm/kvm_host.h > > @@ -210,6 +210,9 @@ struct kvm_vcpu_arch { > > /* Guest debug state */ > > u64 debug_flags; > > > > + /* 1 if the guest VFP state is loaded into the hardware */ > > + u64 guest_vfp_loaded; > > + > > Is there a chance we'll want other flags like this? Should we just make > this a lazy state flags field with the (currently only) flag VFP? If not, > then a bool would be nicer, although I see below the u64 was chosen in > order for the 'str' to be used. > See my reply to Yury. In terms of merging flags I thought about merging it with the debug flags, but I didn't think it would look very nice, and I couldn't come up with a name for the variable that would describe the logic. Honestly, I didn't care about the few extra bytes per CPU, and much prefer clarity, but it may make sense to combine this with for example the sysreg and timer state later, I'll have a look. > > /* > > * We maintain more than a single set of debug registers to support > > * debugging the guest from the host and to maintain separate host and > > diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c > > index 612021d..9946732 100644 > > --- a/arch/arm64/kernel/asm-offsets.c > > +++ b/arch/arm64/kernel/asm-offsets.c > > @@ -133,6 +133,7 @@ int main(void) > > DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); > > DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); > > DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); > > + DEFINE(VCPU_GUEST_VFP_LOADED, offsetof(struct kvm_vcpu, arch.guest_vfp_loaded)); > > DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); > > DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); > > DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); > > diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S > > index 76cd48f..b3e7191 100644 > > --- a/arch/arm64/kvm/hyp/entry.S > > +++ b/arch/arm64/kvm/hyp/entry.S > > @@ -185,6 +185,9 @@ alternative_endif > > add x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) > > bl __fpsimd_restore_state > > > > + mov x0, #1 > > + str x0, [x3, #VCPU_GUEST_VFP_LOADED] > > + > > // Skip restoring fpexc32 for AArch64 guests > > mrs x1, hcr_el2 > > tbnz x1, #HCR_RW_SHIFT, 1f > > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c > > index 7703d63..ef05c59 100644 > > --- a/arch/arm64/kvm/hyp/switch.c > > +++ b/arch/arm64/kvm/hyp/switch.c > > @@ -23,43 +23,31 @@ > > #include <asm/kvm_hyp.h> > > #include <asm/fpsimd.h> > > > > -static bool __hyp_text __fpsimd_enabled_nvhe(void) > > -{ > > - return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP); > > -} > > - > > -static bool __hyp_text __fpsimd_enabled_vhe(void) > > -{ > > - return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN); > > -} > > - > > -static hyp_alternate_select(__fpsimd_is_enabled, > > - __fpsimd_enabled_nvhe, __fpsimd_enabled_vhe, > > - ARM64_HAS_VIRT_HOST_EXTN); > > - > > -bool __hyp_text __fpsimd_enabled(void) > > -{ > > - return __fpsimd_is_enabled()(); > > -} > > - > > -static void __hyp_text __activate_traps_vhe(void) > > +static void __hyp_text __activate_traps_vhe(struct kvm_vcpu *vcpu) > > { > > u64 val; > > > > val = read_sysreg(cpacr_el1); > > val |= CPACR_EL1_TTA; > > - val &= ~CPACR_EL1_FPEN; > > + if (vcpu->arch.guest_vfp_loaded) > > + val |= CPACR_EL1_FPEN; > > + else > > + val &= ~CPACR_EL1_FPEN; > > write_sysreg(val, cpacr_el1); > > > > write_sysreg(__kvm_hyp_vector, vbar_el1); > > } > > > > -static void __hyp_text __activate_traps_nvhe(void) > > +static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) > > { > > u64 val; > > > > val = CPTR_EL2_DEFAULT; > > - val |= CPTR_EL2_TTA | CPTR_EL2_TFP; > > + val |= CPTR_EL2_TTA; > > + if (vcpu->arch.guest_vfp_loaded) > > + val &= ~CPTR_EL2_TFP; > > + else > > + val |= CPTR_EL2_TFP; > > write_sysreg(val, cptr_el2); > > } > > > > @@ -81,7 +69,8 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) > > * it will cause an exception. > > */ > > val = vcpu->arch.hcr_el2; > > - if (!(val & HCR_RW) && system_supports_fpsimd()) { > > + if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd() && > > + !vcpu->arch.guest_vfp_loaded) { > > write_sysreg(1 << 30, fpexc32_el2); > > isb(); > > } > > @@ -97,7 +86,7 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) > > write_sysreg(0, pmselr_el0); > > write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); > > write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); > > - __activate_traps_arch()(); > > + __activate_traps_arch()(vcpu); > > } > > > > static void __hyp_text __deactivate_traps_vhe(void) > > @@ -273,7 +262,6 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) > > { > > struct kvm_cpu_context *host_ctxt; > > struct kvm_cpu_context *guest_ctxt; > > - bool fp_enabled; > > u64 exit_code; > > > > vcpu = kern_hyp_va(vcpu); > > @@ -355,8 +343,6 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) > > /* 0 falls through to be handled out of EL2 */ > > } > > > > - fp_enabled = __fpsimd_enabled(); > > - > > __sysreg_save_guest_state(guest_ctxt); > > __sysreg32_save_state(vcpu); > > __timer_disable_traps(vcpu); > > @@ -367,11 +353,6 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) > > > > __sysreg_restore_host_state(host_ctxt); > > > > - if (fp_enabled) { > > - __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); > > - __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); > > - } > > - > > __debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); > > /* > > * This must come after restoring the host sysregs, since a non-VHE > > diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c > > index b7438c8..c4a3714 100644 > > --- a/arch/arm64/kvm/hyp/sysreg-sr.c > > +++ b/arch/arm64/kvm/hyp/sysreg-sr.c > > @@ -19,6 +19,7 @@ > > #include <linux/kvm_host.h> > > > > #include <asm/kvm_asm.h> > > +#include <asm/kvm_emulate.h> > > #include <asm/kvm_hyp.h> > > > > /* Yes, this does nothing, on purpose */ > > @@ -137,6 +138,11 @@ void __hyp_text __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt) > > __sysreg_restore_common_state(ctxt); > > } > > > > +static void __hyp_text __fpsimd32_save_state(struct kvm_cpu_context *ctxt) > > +{ > > + ctxt->sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2); > > +} > > + > > void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) > > { > > u64 *spsr, *sysreg; > > @@ -155,9 +161,6 @@ void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) > > sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); > > sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); > > > > - if (__fpsimd_enabled()) > > - sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2); > > - > > if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) > > sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); > > } > > @@ -209,4 +212,16 @@ void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) > > */ > > void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) > > { > > + struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context; > > + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; > > + > > + /* Restore host FP/SIMD state */ > > + if (vcpu->arch.guest_vfp_loaded) { > > + if (vcpu_el1_is_32bit(vcpu)) > > + kvm_call_hyp(__fpsimd32_save_state, > > + kern_hyp_va(guest_ctxt)); > > nit: might be nice to use {} since we need two lines. > sure. > > + __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); > > + __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); > > + vcpu->arch.guest_vfp_loaded = 0; > > + } > > } > > -- > > 2.9.0 > > > > Otherwise, > > Reviewed-by: Andrew Jones <drjones@redhat.com> Thanks, -Christoffer
On Sun, Nov 26, 2017 at 05:17:16PM +0100, Christoffer Dall wrote: > Hi Yury, > > On Sat, Nov 25, 2017 at 10:52:21AM +0300, Yury Norov wrote: > > > > On Thu, Oct 12, 2017 at 12:41:12PM +0200, Christoffer Dall wrote: > > > Avoid saving the guest VFP registers and restoring the host VFP > > > registers on every exit from the VM. Only when we're about to run > > > userspace or other threads in the kernel do we really have to switch the > > > state back to the host state. > > > > > > We still initially configure the VFP registers to trap when entering the > > > VM, but the difference is that we now leave the guest state in the > > > hardware registers while running the VM. > > > > > > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> > > > --- > > > arch/arm64/include/asm/kvm_emulate.h | 5 ++++ > > > arch/arm64/include/asm/kvm_host.h | 3 +++ > > > arch/arm64/kernel/asm-offsets.c | 1 + > > > arch/arm64/kvm/hyp/entry.S | 3 +++ > > > arch/arm64/kvm/hyp/switch.c | 47 +++++++++++------------------------- > > > arch/arm64/kvm/hyp/sysreg-sr.c | 21 +++++++++++++--- > > > 6 files changed, 44 insertions(+), 36 deletions(-) > > > > > > diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h > > > index 1fbfe96..630dd60 100644 > > > --- a/arch/arm64/include/asm/kvm_emulate.h > > > +++ b/arch/arm64/include/asm/kvm_emulate.h > > > @@ -56,6 +56,11 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) > > > return (unsigned long *)&vcpu->arch.hcr_el2; > > > } > > > > > > +static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) > > > +{ > > > + return (!(vcpu->arch.hcr_el2 & HCR_RW)); > > > +} > > > + > > > static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) > > > { > > > return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; > > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > > > index 7d3bfa7..5e09eb9 100644 > > > --- a/arch/arm64/include/asm/kvm_host.h > > > +++ b/arch/arm64/include/asm/kvm_host.h > > > @@ -210,6 +210,9 @@ struct kvm_vcpu_arch { > > > /* Guest debug state */ > > > u64 debug_flags; > > > > > > + /* 1 if the guest VFP state is loaded into the hardware */ > > > + u64 guest_vfp_loaded; > > > > May it be just u8/bool? > > > This particular field is accessed from assembly code, and I'm not sure > what guarantees the compiler makes in terms of how a u8/bool is > allocated with respect to padding and alignment, and I think that's why > we've been using u64 fields in the past. > > I don't actually remember the details, but I'd rather err on the side of > caution than trying to save a few bytes. However, if someone can > convince me there's a completely safe way to do this, then I'm happy to > change it. 'strb w0, [x3, #VCPU_GUEST_VFP_LOADED]' would work. See C6.6.181 STRB (register) in ARM64 ARM. The only thing I would recommend is to reorder fields in kvm_vcpu_arch to avoid unneeded holes in the structure. It already spend 10 bytes for nothing in 3 holes. Yury
On Sun, Nov 26, 2017 at 09:58:52PM +0300, Yury Norov wrote: > On Sun, Nov 26, 2017 at 05:17:16PM +0100, Christoffer Dall wrote: > > Hi Yury, > > > > On Sat, Nov 25, 2017 at 10:52:21AM +0300, Yury Norov wrote: > > > > > > On Thu, Oct 12, 2017 at 12:41:12PM +0200, Christoffer Dall wrote: > > > > Avoid saving the guest VFP registers and restoring the host VFP > > > > registers on every exit from the VM. Only when we're about to run > > > > userspace or other threads in the kernel do we really have to switch the > > > > state back to the host state. > > > > > > > > We still initially configure the VFP registers to trap when entering the > > > > VM, but the difference is that we now leave the guest state in the > > > > hardware registers while running the VM. > > > > > > > > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> > > > > --- > > > > arch/arm64/include/asm/kvm_emulate.h | 5 ++++ > > > > arch/arm64/include/asm/kvm_host.h | 3 +++ > > > > arch/arm64/kernel/asm-offsets.c | 1 + > > > > arch/arm64/kvm/hyp/entry.S | 3 +++ > > > > arch/arm64/kvm/hyp/switch.c | 47 +++++++++++------------------------- > > > > arch/arm64/kvm/hyp/sysreg-sr.c | 21 +++++++++++++--- > > > > 6 files changed, 44 insertions(+), 36 deletions(-) > > > > > > > > diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h > > > > index 1fbfe96..630dd60 100644 > > > > --- a/arch/arm64/include/asm/kvm_emulate.h > > > > +++ b/arch/arm64/include/asm/kvm_emulate.h > > > > @@ -56,6 +56,11 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) > > > > return (unsigned long *)&vcpu->arch.hcr_el2; > > > > } > > > > > > > > +static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) > > > > +{ > > > > + return (!(vcpu->arch.hcr_el2 & HCR_RW)); > > > > +} > > > > + > > > > static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) > > > > { > > > > return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; > > > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > > > > index 7d3bfa7..5e09eb9 100644 > > > > --- a/arch/arm64/include/asm/kvm_host.h > > > > +++ b/arch/arm64/include/asm/kvm_host.h > > > > @@ -210,6 +210,9 @@ struct kvm_vcpu_arch { > > > > /* Guest debug state */ > > > > u64 debug_flags; > > > > > > > > + /* 1 if the guest VFP state is loaded into the hardware */ > > > > + u64 guest_vfp_loaded; > > > > > > May it be just u8/bool? > > > > > This particular field is accessed from assembly code, and I'm not sure > > what guarantees the compiler makes in terms of how a u8/bool is > > allocated with respect to padding and alignment, and I think that's why > > we've been using u64 fields in the past. > > > > I don't actually remember the details, but I'd rather err on the side of > > caution than trying to save a few bytes. However, if someone can > > convince me there's a completely safe way to do this, then I'm happy to > > change it. > > 'strb w0, [x3, #VCPU_GUEST_VFP_LOADED]' would work. See > C6.6.181 STRB (register) in ARM64 ARM. I'm well aware of this instruction. Thank you though. The concern was that we haven't done this in the past. I think that was because the size of a _Bool is not well-defined and we really didn't care about a couple of handful of bytes when talking about vcpu structures. Really. A u8 should work though, but probably this will all be moot if I combine the flags into a single field. > > The only thing I would recommend is to reorder fields in kvm_vcpu_arch > to avoid unneeded holes in the structure. It already spend 10 bytes for > nothing in 3 holes. > Patches are welcome. -Christoffer
On Sun, Nov 26, 2017 at 05:17:56PM +0100, Christoffer Dall wrote: > Hi Drew, > > On Wed, Nov 15, 2017 at 05:04:40PM +0100, Andrew Jones wrote: > > On Thu, Oct 12, 2017 at 12:41:12PM +0200, Christoffer Dall wrote: > > > Avoid saving the guest VFP registers and restoring the host VFP > > > registers on every exit from the VM. Only when we're about to run > > > userspace or other threads in the kernel do we really have to switch the > > > state back to the host state. > > > > Rik van Riel's recently post patch "[PATCH v2 0/2] x86,kvm: move qemu/guest > > FPU switching out to kvm_arch_vcpu_ioctl_run" indicates that for x86 they > > only need to swap guest and userspace VFP registers before exiting VCPU_RUN > > to userspace, not for running other threads. I imagine that's the same for > > ARM as well. > > > > If so, then I think this hunk > > > > > @@ -209,4 +212,16 @@ void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) > > > */ > > > void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) > > > { > > > + struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context; > > > + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; > > > + > > > + /* Restore host FP/SIMD state */ > > > + if (vcpu->arch.guest_vfp_loaded) { > > > + if (vcpu_el1_is_32bit(vcpu)) > > > + kvm_call_hyp(__fpsimd32_save_state, > > > + kern_hyp_va(guest_ctxt)); > > > + __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); > > > + __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); > > > + vcpu->arch.guest_vfp_loaded = 0; > > > + } > > > } > > > > could be moved to the return of kvm_arch_vcpu_ioctl_run(). > > > That sounds cool. > > I'll keep this patch as it is now, and look at Rik's patches and post a > follow up later, does that sound ok? Works for me. I'm guessing there's also going to be some overlap with Dave Martin's SVE work. So whichever series goes second can probably be the one to put the most consideration into it. Thanks, drew
On 26/11/17 18:58, Yury Norov wrote: > On Sun, Nov 26, 2017 at 05:17:16PM +0100, Christoffer Dall wrote: >> Hi Yury, >> >> On Sat, Nov 25, 2017 at 10:52:21AM +0300, Yury Norov wrote: >>> >>> On Thu, Oct 12, 2017 at 12:41:12PM +0200, Christoffer Dall wrote: >>>> Avoid saving the guest VFP registers and restoring the host VFP >>>> registers on every exit from the VM. Only when we're about to run >>>> userspace or other threads in the kernel do we really have to switch the >>>> state back to the host state. >>>> >>>> We still initially configure the VFP registers to trap when entering the >>>> VM, but the difference is that we now leave the guest state in the >>>> hardware registers while running the VM. >>>> >>>> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> >>>> --- >>>> arch/arm64/include/asm/kvm_emulate.h | 5 ++++ >>>> arch/arm64/include/asm/kvm_host.h | 3 +++ >>>> arch/arm64/kernel/asm-offsets.c | 1 + >>>> arch/arm64/kvm/hyp/entry.S | 3 +++ >>>> arch/arm64/kvm/hyp/switch.c | 47 +++++++++++------------------------- >>>> arch/arm64/kvm/hyp/sysreg-sr.c | 21 +++++++++++++--- >>>> 6 files changed, 44 insertions(+), 36 deletions(-) >>>> >>>> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h >>>> index 1fbfe96..630dd60 100644 >>>> --- a/arch/arm64/include/asm/kvm_emulate.h >>>> +++ b/arch/arm64/include/asm/kvm_emulate.h >>>> @@ -56,6 +56,11 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) >>>> return (unsigned long *)&vcpu->arch.hcr_el2; >>>> } >>>> >>>> +static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) >>>> +{ >>>> + return (!(vcpu->arch.hcr_el2 & HCR_RW)); >>>> +} >>>> + >>>> static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) >>>> { >>>> return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; >>>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h >>>> index 7d3bfa7..5e09eb9 100644 >>>> --- a/arch/arm64/include/asm/kvm_host.h >>>> +++ b/arch/arm64/include/asm/kvm_host.h >>>> @@ -210,6 +210,9 @@ struct kvm_vcpu_arch { >>>> /* Guest debug state */ >>>> u64 debug_flags; >>>> >>>> + /* 1 if the guest VFP state is loaded into the hardware */ >>>> + u64 guest_vfp_loaded; >>> >>> May it be just u8/bool? >>> >> This particular field is accessed from assembly code, and I'm not sure >> what guarantees the compiler makes in terms of how a u8/bool is >> allocated with respect to padding and alignment, and I think that's why >> we've been using u64 fields in the past. >> >> I don't actually remember the details, but I'd rather err on the side of >> caution than trying to save a few bytes. However, if someone can >> convince me there's a completely safe way to do this, then I'm happy to >> change it. > > 'strb w0, [x3, #VCPU_GUEST_VFP_LOADED]' would work. See > C6.6.181 STRB (register) in ARM64 ARM. > > The only thing I would recommend is to reorder fields in kvm_vcpu_arch > to avoid unneeded holes in the structure. It already spend 10 bytes for > nothing in 3 holes. Terrifying. How many vcpu are you going to run before this becomes a real bottleneck? KVM on a 6502? ;-) Now, when it comes to reordering fields, please keep in mind that the order of the fields in the structure does matter. We want the hottest fields grouped together so that they are fetched in the same cache line. Thanks, M.
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 1fbfe96..630dd60 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -56,6 +56,11 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) return (unsigned long *)&vcpu->arch.hcr_el2; } +static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) +{ + return (!(vcpu->arch.hcr_el2 & HCR_RW)); +} + static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) { return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7d3bfa7..5e09eb9 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -210,6 +210,9 @@ struct kvm_vcpu_arch { /* Guest debug state */ u64 debug_flags; + /* 1 if the guest VFP state is loaded into the hardware */ + u64 guest_vfp_loaded; + /* * We maintain more than a single set of debug registers to support * debugging the guest from the host and to maintain separate host and diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 612021d..9946732 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -133,6 +133,7 @@ int main(void) DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); + DEFINE(VCPU_GUEST_VFP_LOADED, offsetof(struct kvm_vcpu, arch.guest_vfp_loaded)); DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 76cd48f..b3e7191 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -185,6 +185,9 @@ alternative_endif add x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) bl __fpsimd_restore_state + mov x0, #1 + str x0, [x3, #VCPU_GUEST_VFP_LOADED] + // Skip restoring fpexc32 for AArch64 guests mrs x1, hcr_el2 tbnz x1, #HCR_RW_SHIFT, 1f diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 7703d63..ef05c59 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -23,43 +23,31 @@ #include <asm/kvm_hyp.h> #include <asm/fpsimd.h> -static bool __hyp_text __fpsimd_enabled_nvhe(void) -{ - return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP); -} - -static bool __hyp_text __fpsimd_enabled_vhe(void) -{ - return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN); -} - -static hyp_alternate_select(__fpsimd_is_enabled, - __fpsimd_enabled_nvhe, __fpsimd_enabled_vhe, - ARM64_HAS_VIRT_HOST_EXTN); - -bool __hyp_text __fpsimd_enabled(void) -{ - return __fpsimd_is_enabled()(); -} - -static void __hyp_text __activate_traps_vhe(void) +static void __hyp_text __activate_traps_vhe(struct kvm_vcpu *vcpu) { u64 val; val = read_sysreg(cpacr_el1); val |= CPACR_EL1_TTA; - val &= ~CPACR_EL1_FPEN; + if (vcpu->arch.guest_vfp_loaded) + val |= CPACR_EL1_FPEN; + else + val &= ~CPACR_EL1_FPEN; write_sysreg(val, cpacr_el1); write_sysreg(__kvm_hyp_vector, vbar_el1); } -static void __hyp_text __activate_traps_nvhe(void) +static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) { u64 val; val = CPTR_EL2_DEFAULT; - val |= CPTR_EL2_TTA | CPTR_EL2_TFP; + val |= CPTR_EL2_TTA; + if (vcpu->arch.guest_vfp_loaded) + val &= ~CPTR_EL2_TFP; + else + val |= CPTR_EL2_TFP; write_sysreg(val, cptr_el2); } @@ -81,7 +69,8 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) * it will cause an exception. */ val = vcpu->arch.hcr_el2; - if (!(val & HCR_RW) && system_supports_fpsimd()) { + if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd() && + !vcpu->arch.guest_vfp_loaded) { write_sysreg(1 << 30, fpexc32_el2); isb(); } @@ -97,7 +86,7 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) write_sysreg(0, pmselr_el0); write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); - __activate_traps_arch()(); + __activate_traps_arch()(vcpu); } static void __hyp_text __deactivate_traps_vhe(void) @@ -273,7 +262,6 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; - bool fp_enabled; u64 exit_code; vcpu = kern_hyp_va(vcpu); @@ -355,8 +343,6 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) /* 0 falls through to be handled out of EL2 */ } - fp_enabled = __fpsimd_enabled(); - __sysreg_save_guest_state(guest_ctxt); __sysreg32_save_state(vcpu); __timer_disable_traps(vcpu); @@ -367,11 +353,6 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg_restore_host_state(host_ctxt); - if (fp_enabled) { - __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); - __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); - } - __debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); /* * This must come after restoring the host sysregs, since a non-VHE diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index b7438c8..c4a3714 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -19,6 +19,7 @@ #include <linux/kvm_host.h> #include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h> /* Yes, this does nothing, on purpose */ @@ -137,6 +138,11 @@ void __hyp_text __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt) __sysreg_restore_common_state(ctxt); } +static void __hyp_text __fpsimd32_save_state(struct kvm_cpu_context *ctxt) +{ + ctxt->sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2); +} + void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) { u64 *spsr, *sysreg; @@ -155,9 +161,6 @@ void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); - if (__fpsimd_enabled()) - sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2); - if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); } @@ -209,4 +212,16 @@ void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) */ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) { + struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context; + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; + + /* Restore host FP/SIMD state */ + if (vcpu->arch.guest_vfp_loaded) { + if (vcpu_el1_is_32bit(vcpu)) + kvm_call_hyp(__fpsimd32_save_state, + kern_hyp_va(guest_ctxt)); + __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); + __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); + vcpu->arch.guest_vfp_loaded = 0; + } }
Avoid saving the guest VFP registers and restoring the host VFP registers on every exit from the VM. Only when we're about to run userspace or other threads in the kernel do we really have to switch the state back to the host state. We still initially configure the VFP registers to trap when entering the VM, but the difference is that we now leave the guest state in the hardware registers while running the VM. Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> --- arch/arm64/include/asm/kvm_emulate.h | 5 ++++ arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kvm/hyp/entry.S | 3 +++ arch/arm64/kvm/hyp/switch.c | 47 +++++++++++------------------------- arch/arm64/kvm/hyp/sysreg-sr.c | 21 +++++++++++++--- 6 files changed, 44 insertions(+), 36 deletions(-)