Message ID | 20190621093843.220980-16-marc.zyngier@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: arm64: ARMv8.3 Nested Virtualization support | expand |
On 06/21/2019 10:37 AM, Marc Zyngier wrote: > Extract the direct HW accessors for later reuse. > > Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> > --- > arch/arm64/kvm/sys_regs.c | 247 +++++++++++++++++++++----------------- > 1 file changed, 139 insertions(+), 108 deletions(-) > > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c > index 2b8734f75a09..e181359adadf 100644 > --- a/arch/arm64/kvm/sys_regs.c > +++ b/arch/arm64/kvm/sys_regs.c > @@ -182,99 +182,161 @@ const struct el2_sysreg_map *find_el2_sysreg(const struct el2_sysreg_map *map, > return entry; > } > > +static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) > +{ > + /* > + * System registers listed in the switch are not saved on every > + * exit from the guest but are only saved on vcpu_put. > + * > + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but > + * should never be listed below, because the guest cannot modify its > + * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's > + * thread when emulating cross-VCPU communication. > + */ > + switch (reg) { > + case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break; > + case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; > + case ACTLR_EL1: *val = read_sysreg_s(SYS_ACTLR_EL1); break; > + case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; > + case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; > + case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; > + case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; > + case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; > + case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; > + case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; > + case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; > + case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; > + case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; > + case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; > + case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; > + case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; > + case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; > + case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; > + case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; > + case PAR_EL1: *val = read_sysreg_s(SYS_PAR_EL1); break; > + case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; > + case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; > + case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; > + default: return false; > + } > + > + return true; > +} > + > +static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) > +{ > + /* > + * System registers listed in the switch are not restored on every > + * entry to the guest but are only restored on vcpu_load. > + * > + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but > + * should never be listed below, because the the MPIDR should only be > + * set once, before running the VCPU, and never changed later. > + */ > + switch (reg) { > + case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break; > + case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; > + case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); break; > + case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; > + case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; > + case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; > + case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; > + case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; > + case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; > + case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; > + case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; > + case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; > + case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; > + case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; > + case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; > + case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; > + case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; > + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; > + case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; > + case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; > + case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; > + case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; > + case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; > + default: return false; > + } > + > + return true; > +} > + > u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) > { > - u64 val; > + u64 val = 0x8badf00d8badf00d; > > if (!vcpu->arch.sysregs_loaded_on_cpu) > - goto immediate_read; > + goto memory_read; > > if (unlikely(sysreg_is_el2(reg))) { > const struct el2_sysreg_map *el2_reg; > > if (!is_hyp_ctxt(vcpu)) > - goto immediate_read; > + goto memory_read; > > switch (reg) { > + case ELR_EL2: > + return read_sysreg_el1(SYS_ELR); Hmmm, This change feels a bit out of place. Also, patch 13 added ELR_EL2 and SP_EL2 to the switch cases for physical sysreg accesses. Now ELR_EL2 is moved out of the main switch cases and SP_EL2 is completely omitted. I'd say either patch 13 needs to be reworked or there is a separate patch that should be extracted from this patch to have an intermediate state, or the commit message on this patch should be more detailed. Cheers,
On 6/21/19 10:37 AM, Marc Zyngier wrote: > Extract the direct HW accessors for later reuse. > > Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> > --- > arch/arm64/kvm/sys_regs.c | 247 +++++++++++++++++++++----------------- > 1 file changed, 139 insertions(+), 108 deletions(-) > > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c > index 2b8734f75a09..e181359adadf 100644 > --- a/arch/arm64/kvm/sys_regs.c > +++ b/arch/arm64/kvm/sys_regs.c > @@ -182,99 +182,161 @@ const struct el2_sysreg_map *find_el2_sysreg(const struct el2_sysreg_map *map, > return entry; > } > > +static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) > +{ > + /* > + * System registers listed in the switch are not saved on every > + * exit from the guest but are only saved on vcpu_put. > + * > + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but > + * should never be listed below, because the guest cannot modify its > + * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's > + * thread when emulating cross-VCPU communication. > + */ > + switch (reg) { > + case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break; > + case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; > + case ACTLR_EL1: *val = read_sysreg_s(SYS_ACTLR_EL1); break; > + case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; > + case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; > + case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; > + case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; > + case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; > + case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; > + case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; > + case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; > + case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; > + case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; > + case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; > + case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; > + case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; > + case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; > + case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; > + case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; > + case PAR_EL1: *val = read_sysreg_s(SYS_PAR_EL1); break; > + case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; > + case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; > + case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; > + default: return false; > + } > + > + return true; > +} > + > +static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) > +{ > + /* > + * System registers listed in the switch are not restored on every > + * entry to the guest but are only restored on vcpu_load. > + * > + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but > + * should never be listed below, because the the MPIDR should only be > + * set once, before running the VCPU, and never changed later. > + */ > + switch (reg) { > + case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break; > + case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; > + case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); break; > + case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; > + case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; > + case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; > + case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; > + case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; > + case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; > + case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; > + case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; > + case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; > + case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; > + case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; > + case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; > + case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; > + case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; > + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; > + case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; > + case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; > + case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; > + case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; > + case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; > + default: return false; > + } > + > + return true; > +} > + > u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) > { > - u64 val; > + u64 val = 0x8badf00d8badf00d; > > if (!vcpu->arch.sysregs_loaded_on_cpu) > - goto immediate_read; > + goto memory_read; > > if (unlikely(sysreg_is_el2(reg))) { > const struct el2_sysreg_map *el2_reg; > > if (!is_hyp_ctxt(vcpu)) > - goto immediate_read; > + goto memory_read; > > switch (reg) { > + case ELR_EL2: > + return read_sysreg_el1(SYS_ELR); > case SPSR_EL2: > val = read_sysreg_el1(SYS_SPSR); > return __fixup_spsr_el2_read(&vcpu->arch.ctxt, val); > } > > el2_reg = find_el2_sysreg(nested_sysreg_map, reg); > - if (el2_reg) { > - /* > - * If this register does not have an EL1 counterpart, > - * then read the stored EL2 version. > - */ > - if (el2_reg->mapping == __INVALID_SYSREG__) > - goto immediate_read; > - > - /* Get the current version of the EL1 counterpart. */ > - reg = el2_reg->mapping; > - } > - } else { > - /* EL1 register can't be on the CPU if the guest is in vEL2. */ > - if (unlikely(is_hyp_ctxt(vcpu))) > - goto immediate_read; > + BUG_ON(!el2_reg); > + > + /* > + * If this register does not have an EL1 counterpart, > + * then read the stored EL2 version. > + */ > + if (el2_reg->mapping == __INVALID_SYSREG__) > + goto memory_read; > + > + if (!vcpu_el2_e2h_is_set(vcpu) && > + el2_reg->translate) > + goto memory_read; Nit: the condition can be written on one line. This condition wasn't present in patch 13 which introduced EL2 register handling, and I'm struggling to understand what it does. As I understand the code, this condition basically translates into: - if the register is one of SCTLR_EL2, TTBR0_EL2, CPTR_EL2 or TCR_EL2, then read it from memory. - if the register is an EL2 register whose value is written unmodified to the corresponding EL1 register, then read the corresponding EL1 register and return that value. Looking at vcpu_write_sys_reg, the values for the EL2 registers are always saved in memory. The guest is a non-vhe guest, so writes to EL1 registers shouldn't be reflected in the corresponding EL2 register. I think it's safe to always return the value from memory. I tried testing this with the following patch: diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1235a88ec575..27d39bb9564d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -290,6 +290,9 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) el2_reg = find_el2_sysreg(nested_sysreg_map, reg); BUG_ON(!el2_reg); + if (!vcpu_el2_e2h_is_set(vcpu)) + goto memory_read; + /* * If this register does not have an EL1 counterpart, * then read the stored EL2 version. @@ -297,10 +300,6 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) if (el2_reg->mapping == __INVALID_SYSREG__) goto memory_read; - if (!vcpu_el2_e2h_is_set(vcpu) && - el2_reg->translate) - goto memory_read; - /* Get the current version of the EL1 counterpart. */ reg = el2_reg->mapping; WARN_ON(!__vcpu_read_sys_reg_from_cpu(reg, &val)); I know it's not conclusive, but I was able to boot a L2 guest under a L1 non-vhe hypervisor. > + > + /* Get the current version of the EL1 counterpart. */ > + reg = el2_reg->mapping; > + WARN_ON(!__vcpu_read_sys_reg_from_cpu(reg, &val)); > + return val; > } > > - /* > - * System registers listed in the switch are not saved on every > - * exit from the guest but are only saved on vcpu_put. > - * > - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but > - * should never be listed below, because the guest cannot modify its > - * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's > - * thread when emulating cross-VCPU communication. > - */ > - switch (reg) { > - case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1); > - case SCTLR_EL1: return read_sysreg_s(SYS_SCTLR_EL12); > - case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1); > - case CPACR_EL1: return read_sysreg_s(SYS_CPACR_EL12); > - case TTBR0_EL1: return read_sysreg_s(SYS_TTBR0_EL12); > - case TTBR1_EL1: return read_sysreg_s(SYS_TTBR1_EL12); > - case TCR_EL1: return read_sysreg_s(SYS_TCR_EL12); > - case ESR_EL1: return read_sysreg_s(SYS_ESR_EL12); > - case AFSR0_EL1: return read_sysreg_s(SYS_AFSR0_EL12); > - case AFSR1_EL1: return read_sysreg_s(SYS_AFSR1_EL12); > - case FAR_EL1: return read_sysreg_s(SYS_FAR_EL12); > - case MAIR_EL1: return read_sysreg_s(SYS_MAIR_EL12); > - case VBAR_EL1: return read_sysreg_s(SYS_VBAR_EL12); > - case CONTEXTIDR_EL1: return read_sysreg_s(SYS_CONTEXTIDR_EL12); > - case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0); > - case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0); > - case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1); > - case AMAIR_EL1: return read_sysreg_s(SYS_AMAIR_EL12); > - case CNTKCTL_EL1: return read_sysreg_s(SYS_CNTKCTL_EL12); > - case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1); > - case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2); > - case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2); > - case DBGVCR32_EL2: return read_sysreg_s(SYS_DBGVCR32_EL2); > - case SP_EL2: return read_sysreg(sp_el1); > - case ELR_EL2: return read_sysreg_el1(SYS_ELR); > - } > + /* EL1 register can't be on the CPU if the guest is in vEL2. */ > + if (unlikely(is_hyp_ctxt(vcpu))) > + goto memory_read; > + > + if (__vcpu_read_sys_reg_from_cpu(reg, &val)) > + return val; > > -immediate_read: > +memory_read: > return __vcpu_sys_reg(vcpu, reg); > } > > void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) > { > if (!vcpu->arch.sysregs_loaded_on_cpu) > - goto immediate_write; > + goto memory_write; > > if (unlikely(sysreg_is_el2(reg))) { > const struct el2_sysreg_map *el2_reg; > > if (!is_hyp_ctxt(vcpu)) > - goto immediate_write; > + goto memory_write; > > - /* Store the EL2 version in the sysregs array. */ > + /* > + * Always store a copy of the write to memory to avoid having > + * to reverse-translate virtual EL2 system registers for a > + * non-VHE guest hypervisor. > + */ > __vcpu_sys_reg(vcpu, reg) = val; > > switch (reg) { > + case ELR_EL2: > + write_sysreg_el1(val, SYS_ELR); > + return; > case SPSR_EL2: > val = __fixup_spsr_el2_write(&vcpu->arch.ctxt, val); > write_sysreg_el1(val, SYS_SPSR); > @@ -282,61 +344,30 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) > } > > el2_reg = find_el2_sysreg(nested_sysreg_map, reg); > - if (el2_reg) { > - /* Does this register have an EL1 counterpart? */ > - if (el2_reg->mapping == __INVALID_SYSREG__) > - return; > + WARN(!el2_reg, "reg: %d\n", reg); > > - if (!vcpu_el2_e2h_is_set(vcpu) && > - el2_reg->translate) > - val = el2_reg->translate(val); > + /* Does this register have an EL1 counterpart? */ > + if (el2_reg->mapping == __INVALID_SYSREG__) > + goto memory_write; > > - /* Redirect this to the EL1 version of the register. */ > - reg = el2_reg->mapping; > - } > - } else { > - /* EL1 register can't be on the CPU if the guest is in vEL2. */ > - if (unlikely(is_hyp_ctxt(vcpu))) > - goto immediate_write; > - } > + if (!vcpu_el2_e2h_is_set(vcpu) && > + el2_reg->translate) > + val = el2_reg->translate(val); > > - /* > - * System registers listed in the switch are not restored on every > - * entry to the guest but are only restored on vcpu_load. > - * > - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but > - * should never be listed below, because the the MPIDR should only be > - * set once, before running the VCPU, and never changed later. > - */ > - switch (reg) { > - case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return; > - case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); return; > - case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return; > - case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); return; > - case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); return; > - case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); return; > - case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); return; > - case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); return; > - case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); return; > - case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); return; > - case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); return; > - case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); return; > - case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); return; > - case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12); return; > - case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return; > - case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); return; > - case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return; > - case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); return; > - case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); return; > - case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); return; > - case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); return; > - case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); return; > - case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); return; > - case SP_EL2: write_sysreg(val, sp_el1); return; > - case ELR_EL2: write_sysreg_el1(val, SYS_ELR); return; > + /* Redirect this to the EL1 version of the register. */ > + reg = el2_reg->mapping; > + WARN_ON(!__vcpu_write_sys_reg_to_cpu(val, reg)); > + return; > } > > -immediate_write: > + /* EL1 register can't be on the CPU if the guest is in vEL2. */ > + if (unlikely(is_hyp_ctxt(vcpu))) > + goto memory_write; > + > + if (__vcpu_write_sys_reg_to_cpu(val, reg)) > + return; > + > +memory_write: > __vcpu_sys_reg(vcpu, reg) = val; > } >
On 24/06/2019 16:07, Julien Thierry wrote: > > > On 06/21/2019 10:37 AM, Marc Zyngier wrote: >> Extract the direct HW accessors for later reuse. >> >> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> >> --- >> arch/arm64/kvm/sys_regs.c | 247 +++++++++++++++++++++----------------- >> 1 file changed, 139 insertions(+), 108 deletions(-) >> >> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c >> index 2b8734f75a09..e181359adadf 100644 >> --- a/arch/arm64/kvm/sys_regs.c >> +++ b/arch/arm64/kvm/sys_regs.c >> @@ -182,99 +182,161 @@ const struct el2_sysreg_map *find_el2_sysreg(const struct el2_sysreg_map *map, >> return entry; >> } >> >> +static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) >> +{ >> + /* >> + * System registers listed in the switch are not saved on every >> + * exit from the guest but are only saved on vcpu_put. >> + * >> + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but >> + * should never be listed below, because the guest cannot modify its >> + * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's >> + * thread when emulating cross-VCPU communication. >> + */ >> + switch (reg) { >> + case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break; >> + case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; >> + case ACTLR_EL1: *val = read_sysreg_s(SYS_ACTLR_EL1); break; >> + case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; >> + case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; >> + case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; >> + case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; >> + case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; >> + case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; >> + case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; >> + case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; >> + case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; >> + case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; >> + case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; >> + case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; >> + case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; >> + case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; >> + case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; >> + case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; >> + case PAR_EL1: *val = read_sysreg_s(SYS_PAR_EL1); break; >> + case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; >> + case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; >> + case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; >> + default: return false; >> + } >> + >> + return true; >> +} >> + >> +static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) >> +{ >> + /* >> + * System registers listed in the switch are not restored on every >> + * entry to the guest but are only restored on vcpu_load. >> + * >> + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but >> + * should never be listed below, because the the MPIDR should only be >> + * set once, before running the VCPU, and never changed later. >> + */ >> + switch (reg) { >> + case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break; >> + case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; >> + case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); break; >> + case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; >> + case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; >> + case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; >> + case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; >> + case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; >> + case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; >> + case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; >> + case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; >> + case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; >> + case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; >> + case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; >> + case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; >> + case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; >> + case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; >> + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; >> + case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; >> + case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; >> + case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; >> + case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; >> + case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; >> + default: return false; >> + } >> + >> + return true; >> +} >> + >> u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) >> { >> - u64 val; >> + u64 val = 0x8badf00d8badf00d; >> >> if (!vcpu->arch.sysregs_loaded_on_cpu) >> - goto immediate_read; >> + goto memory_read; >> >> if (unlikely(sysreg_is_el2(reg))) { >> const struct el2_sysreg_map *el2_reg; >> >> if (!is_hyp_ctxt(vcpu)) >> - goto immediate_read; >> + goto memory_read; >> >> switch (reg) { >> + case ELR_EL2: >> + return read_sysreg_el1(SYS_ELR); > > Hmmm, This change feels a bit out of place. > > Also, patch 13 added ELR_EL2 and SP_EL2 to the switch cases for physical > sysreg accesses. Now ELR_EL2 is moved out of the main switch cases and > SP_EL2 is completely omitted. > > I'd say either patch 13 needs to be reworked or there is a separate > patch that should be extracted from this patch to have an intermediate > state, or the commit message on this patch should be more detailed. Yeah, I wanted to actually squash most of this patch into #13, and got sidetracked. Definitely room for improvement. I may even bring in the rework I mentioned against #13. Thanks, M.
On 27/06/2019 10:21, Alexandru Elisei wrote: > On 6/21/19 10:37 AM, Marc Zyngier wrote: >> Extract the direct HW accessors for later reuse. >> >> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> >> --- >> arch/arm64/kvm/sys_regs.c | 247 +++++++++++++++++++++----------------- >> 1 file changed, 139 insertions(+), 108 deletions(-) >> >> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c >> index 2b8734f75a09..e181359adadf 100644 >> --- a/arch/arm64/kvm/sys_regs.c >> +++ b/arch/arm64/kvm/sys_regs.c >> @@ -182,99 +182,161 @@ const struct el2_sysreg_map *find_el2_sysreg(const struct el2_sysreg_map *map, >> return entry; >> } >> >> +static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) >> +{ >> + /* >> + * System registers listed in the switch are not saved on every >> + * exit from the guest but are only saved on vcpu_put. >> + * >> + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but >> + * should never be listed below, because the guest cannot modify its >> + * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's >> + * thread when emulating cross-VCPU communication. >> + */ >> + switch (reg) { >> + case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break; >> + case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; >> + case ACTLR_EL1: *val = read_sysreg_s(SYS_ACTLR_EL1); break; >> + case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; >> + case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; >> + case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; >> + case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; >> + case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; >> + case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; >> + case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; >> + case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; >> + case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; >> + case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; >> + case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; >> + case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; >> + case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; >> + case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; >> + case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; >> + case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; >> + case PAR_EL1: *val = read_sysreg_s(SYS_PAR_EL1); break; >> + case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; >> + case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; >> + case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; >> + default: return false; >> + } >> + >> + return true; >> +} >> + >> +static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) >> +{ >> + /* >> + * System registers listed in the switch are not restored on every >> + * entry to the guest but are only restored on vcpu_load. >> + * >> + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but >> + * should never be listed below, because the the MPIDR should only be >> + * set once, before running the VCPU, and never changed later. >> + */ >> + switch (reg) { >> + case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break; >> + case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; >> + case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); break; >> + case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; >> + case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; >> + case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; >> + case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; >> + case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; >> + case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; >> + case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; >> + case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; >> + case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; >> + case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; >> + case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; >> + case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; >> + case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; >> + case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; >> + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; >> + case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; >> + case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; >> + case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; >> + case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; >> + case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; >> + default: return false; >> + } >> + >> + return true; >> +} >> + >> u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) >> { >> - u64 val; >> + u64 val = 0x8badf00d8badf00d; >> >> if (!vcpu->arch.sysregs_loaded_on_cpu) >> - goto immediate_read; >> + goto memory_read; >> >> if (unlikely(sysreg_is_el2(reg))) { >> const struct el2_sysreg_map *el2_reg; >> >> if (!is_hyp_ctxt(vcpu)) >> - goto immediate_read; >> + goto memory_read; >> >> switch (reg) { >> + case ELR_EL2: >> + return read_sysreg_el1(SYS_ELR); >> case SPSR_EL2: >> val = read_sysreg_el1(SYS_SPSR); >> return __fixup_spsr_el2_read(&vcpu->arch.ctxt, val); >> } >> >> el2_reg = find_el2_sysreg(nested_sysreg_map, reg); >> - if (el2_reg) { >> - /* >> - * If this register does not have an EL1 counterpart, >> - * then read the stored EL2 version. >> - */ >> - if (el2_reg->mapping == __INVALID_SYSREG__) >> - goto immediate_read; >> - >> - /* Get the current version of the EL1 counterpart. */ >> - reg = el2_reg->mapping; >> - } >> - } else { >> - /* EL1 register can't be on the CPU if the guest is in vEL2. */ >> - if (unlikely(is_hyp_ctxt(vcpu))) >> - goto immediate_read; >> + BUG_ON(!el2_reg); >> + >> + /* >> + * If this register does not have an EL1 counterpart, >> + * then read the stored EL2 version. >> + */ >> + if (el2_reg->mapping == __INVALID_SYSREG__) >> + goto memory_read; >> + >> + if (!vcpu_el2_e2h_is_set(vcpu) && >> + el2_reg->translate) >> + goto memory_read; > > Nit: the condition can be written on one line. > > This condition wasn't present in patch 13 which introduced EL2 register > handling, and I'm struggling to understand what it does. As I understand the > code, this condition basically translates into: > > - if the register is one of SCTLR_EL2, TTBR0_EL2, CPTR_EL2 or TCR_EL2, then read > it from memory. > > - if the register is an EL2 register whose value is written unmodified to the > corresponding EL1 register, then read the corresponding EL1 register and return > that value. > > Looking at vcpu_write_sys_reg, the values for the EL2 registers are always saved > in memory. The guest is a non-vhe guest, so writes to EL1 registers shouldn't be > reflected in the corresponding EL2 register. I think it's safe to always return > the value from memory. > > I tried testing this with the following patch: > > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c > index 1235a88ec575..27d39bb9564d 100644 > --- a/arch/arm64/kvm/sys_regs.c > +++ b/arch/arm64/kvm/sys_regs.c > @@ -290,6 +290,9 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) > el2_reg = find_el2_sysreg(nested_sysreg_map, reg); > BUG_ON(!el2_reg); > > + if (!vcpu_el2_e2h_is_set(vcpu)) > + goto memory_read; > + > /* > * If this register does not have an EL1 counterpart, > * then read the stored EL2 version. > @@ -297,10 +300,6 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) > if (el2_reg->mapping == __INVALID_SYSREG__) > goto memory_read; > > - if (!vcpu_el2_e2h_is_set(vcpu) && > - el2_reg->translate) > - goto memory_read; > - > /* Get the current version of the EL1 counterpart. */ > reg = el2_reg->mapping; > WARN_ON(!__vcpu_read_sys_reg_from_cpu(reg, &val)); > > I know it's not conclusive, but I was able to boot a L2 guest under a L1 non-vhe > hypervisor. And now you can't properly handle the terrible ARMv8.3 business of SPSR_EL1 being changed behind your back if you get an exception at vEL2 to vEL2 on non-VHE. To handle this, you need both the live system register and the memory backup (see __fixup_spsr_el2_read and co). More generally, some registers can be modified behind your back. That's ELR, SPSR, FAR, ESR. Anything related to taking an exception. No, this can't be observed with KVM because we don't allow exception to be taken at EL2 in the absence of RAS errors. Thanks, M.
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 2b8734f75a09..e181359adadf 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -182,99 +182,161 @@ const struct el2_sysreg_map *find_el2_sysreg(const struct el2_sysreg_map *map, return entry; } +static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) +{ + /* + * System registers listed in the switch are not saved on every + * exit from the guest but are only saved on vcpu_put. + * + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but + * should never be listed below, because the guest cannot modify its + * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's + * thread when emulating cross-VCPU communication. + */ + switch (reg) { + case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break; + case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; + case ACTLR_EL1: *val = read_sysreg_s(SYS_ACTLR_EL1); break; + case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; + case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; + case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; + case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; + case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; + case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; + case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; + case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; + case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; + case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; + case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; + case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; + case PAR_EL1: *val = read_sysreg_s(SYS_PAR_EL1); break; + case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; + case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; + default: return false; + } + + return true; +} + +static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) +{ + /* + * System registers listed in the switch are not restored on every + * entry to the guest but are only restored on vcpu_load. + * + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but + * should never be listed below, because the the MPIDR should only be + * set once, before running the VCPU, and never changed later. + */ + switch (reg) { + case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break; + case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; + case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); break; + case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; + case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; + case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; + case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; + case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; + case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; + case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; + case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; + case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; + case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; + case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; + case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; + case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; + case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; + default: return false; + } + + return true; +} + u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) { - u64 val; + u64 val = 0x8badf00d8badf00d; if (!vcpu->arch.sysregs_loaded_on_cpu) - goto immediate_read; + goto memory_read; if (unlikely(sysreg_is_el2(reg))) { const struct el2_sysreg_map *el2_reg; if (!is_hyp_ctxt(vcpu)) - goto immediate_read; + goto memory_read; switch (reg) { + case ELR_EL2: + return read_sysreg_el1(SYS_ELR); case SPSR_EL2: val = read_sysreg_el1(SYS_SPSR); return __fixup_spsr_el2_read(&vcpu->arch.ctxt, val); } el2_reg = find_el2_sysreg(nested_sysreg_map, reg); - if (el2_reg) { - /* - * If this register does not have an EL1 counterpart, - * then read the stored EL2 version. - */ - if (el2_reg->mapping == __INVALID_SYSREG__) - goto immediate_read; - - /* Get the current version of the EL1 counterpart. */ - reg = el2_reg->mapping; - } - } else { - /* EL1 register can't be on the CPU if the guest is in vEL2. */ - if (unlikely(is_hyp_ctxt(vcpu))) - goto immediate_read; + BUG_ON(!el2_reg); + + /* + * If this register does not have an EL1 counterpart, + * then read the stored EL2 version. + */ + if (el2_reg->mapping == __INVALID_SYSREG__) + goto memory_read; + + if (!vcpu_el2_e2h_is_set(vcpu) && + el2_reg->translate) + goto memory_read; + + /* Get the current version of the EL1 counterpart. */ + reg = el2_reg->mapping; + WARN_ON(!__vcpu_read_sys_reg_from_cpu(reg, &val)); + return val; } - /* - * System registers listed in the switch are not saved on every - * exit from the guest but are only saved on vcpu_put. - * - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the guest cannot modify its - * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's - * thread when emulating cross-VCPU communication. - */ - switch (reg) { - case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1); - case SCTLR_EL1: return read_sysreg_s(SYS_SCTLR_EL12); - case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1); - case CPACR_EL1: return read_sysreg_s(SYS_CPACR_EL12); - case TTBR0_EL1: return read_sysreg_s(SYS_TTBR0_EL12); - case TTBR1_EL1: return read_sysreg_s(SYS_TTBR1_EL12); - case TCR_EL1: return read_sysreg_s(SYS_TCR_EL12); - case ESR_EL1: return read_sysreg_s(SYS_ESR_EL12); - case AFSR0_EL1: return read_sysreg_s(SYS_AFSR0_EL12); - case AFSR1_EL1: return read_sysreg_s(SYS_AFSR1_EL12); - case FAR_EL1: return read_sysreg_s(SYS_FAR_EL12); - case MAIR_EL1: return read_sysreg_s(SYS_MAIR_EL12); - case VBAR_EL1: return read_sysreg_s(SYS_VBAR_EL12); - case CONTEXTIDR_EL1: return read_sysreg_s(SYS_CONTEXTIDR_EL12); - case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0); - case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0); - case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1); - case AMAIR_EL1: return read_sysreg_s(SYS_AMAIR_EL12); - case CNTKCTL_EL1: return read_sysreg_s(SYS_CNTKCTL_EL12); - case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1); - case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2); - case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2); - case DBGVCR32_EL2: return read_sysreg_s(SYS_DBGVCR32_EL2); - case SP_EL2: return read_sysreg(sp_el1); - case ELR_EL2: return read_sysreg_el1(SYS_ELR); - } + /* EL1 register can't be on the CPU if the guest is in vEL2. */ + if (unlikely(is_hyp_ctxt(vcpu))) + goto memory_read; + + if (__vcpu_read_sys_reg_from_cpu(reg, &val)) + return val; -immediate_read: +memory_read: return __vcpu_sys_reg(vcpu, reg); } void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) { if (!vcpu->arch.sysregs_loaded_on_cpu) - goto immediate_write; + goto memory_write; if (unlikely(sysreg_is_el2(reg))) { const struct el2_sysreg_map *el2_reg; if (!is_hyp_ctxt(vcpu)) - goto immediate_write; + goto memory_write; - /* Store the EL2 version in the sysregs array. */ + /* + * Always store a copy of the write to memory to avoid having + * to reverse-translate virtual EL2 system registers for a + * non-VHE guest hypervisor. + */ __vcpu_sys_reg(vcpu, reg) = val; switch (reg) { + case ELR_EL2: + write_sysreg_el1(val, SYS_ELR); + return; case SPSR_EL2: val = __fixup_spsr_el2_write(&vcpu->arch.ctxt, val); write_sysreg_el1(val, SYS_SPSR); @@ -282,61 +344,30 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) } el2_reg = find_el2_sysreg(nested_sysreg_map, reg); - if (el2_reg) { - /* Does this register have an EL1 counterpart? */ - if (el2_reg->mapping == __INVALID_SYSREG__) - return; + WARN(!el2_reg, "reg: %d\n", reg); - if (!vcpu_el2_e2h_is_set(vcpu) && - el2_reg->translate) - val = el2_reg->translate(val); + /* Does this register have an EL1 counterpart? */ + if (el2_reg->mapping == __INVALID_SYSREG__) + goto memory_write; - /* Redirect this to the EL1 version of the register. */ - reg = el2_reg->mapping; - } - } else { - /* EL1 register can't be on the CPU if the guest is in vEL2. */ - if (unlikely(is_hyp_ctxt(vcpu))) - goto immediate_write; - } + if (!vcpu_el2_e2h_is_set(vcpu) && + el2_reg->translate) + val = el2_reg->translate(val); - /* - * System registers listed in the switch are not restored on every - * entry to the guest but are only restored on vcpu_load. - * - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the the MPIDR should only be - * set once, before running the VCPU, and never changed later. - */ - switch (reg) { - case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return; - case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); return; - case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return; - case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); return; - case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); return; - case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); return; - case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); return; - case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); return; - case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); return; - case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); return; - case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); return; - case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); return; - case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); return; - case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12); return; - case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return; - case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); return; - case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return; - case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); return; - case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); return; - case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); return; - case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); return; - case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); return; - case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); return; - case SP_EL2: write_sysreg(val, sp_el1); return; - case ELR_EL2: write_sysreg_el1(val, SYS_ELR); return; + /* Redirect this to the EL1 version of the register. */ + reg = el2_reg->mapping; + WARN_ON(!__vcpu_write_sys_reg_to_cpu(val, reg)); + return; } -immediate_write: + /* EL1 register can't be on the CPU if the guest is in vEL2. */ + if (unlikely(is_hyp_ctxt(vcpu))) + goto memory_write; + + if (__vcpu_write_sys_reg_to_cpu(val, reg)) + return; + +memory_write: __vcpu_sys_reg(vcpu, reg) = val; }
Extract the direct HW accessors for later reuse. Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> --- arch/arm64/kvm/sys_regs.c | 247 +++++++++++++++++++++----------------- 1 file changed, 139 insertions(+), 108 deletions(-)