diff mbox series

[15/59] KVM: arm64: nv: Refactor vcpu_{read,write}_sys_reg

Message ID 20190621093843.220980-16-marc.zyngier@arm.com (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: ARMv8.3 Nested Virtualization support | expand

Commit Message

Marc Zyngier June 21, 2019, 9:37 a.m. UTC
Extract the direct HW accessors for later reuse.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kvm/sys_regs.c | 247 +++++++++++++++++++++-----------------
 1 file changed, 139 insertions(+), 108 deletions(-)

Comments

Julien Thierry June 24, 2019, 3:07 p.m. UTC | #1
On 06/21/2019 10:37 AM, Marc Zyngier wrote:
> Extract the direct HW accessors for later reuse.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>  arch/arm64/kvm/sys_regs.c | 247 +++++++++++++++++++++-----------------
>  1 file changed, 139 insertions(+), 108 deletions(-)
> 
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 2b8734f75a09..e181359adadf 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -182,99 +182,161 @@ const struct el2_sysreg_map *find_el2_sysreg(const struct el2_sysreg_map *map,
>  	return entry;
>  }
>  
> +static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
> +{
> +	/*
> +	 * System registers listed in the switch are not saved on every
> +	 * exit from the guest but are only saved on vcpu_put.
> +	 *
> +	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
> +	 * should never be listed below, because the guest cannot modify its
> +	 * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
> +	 * thread when emulating cross-VCPU communication.
> +	 */
> +	switch (reg) {
> +	case CSSELR_EL1:	*val = read_sysreg_s(SYS_CSSELR_EL1);	break;
> +	case SCTLR_EL1:		*val = read_sysreg_s(SYS_SCTLR_EL12);	break;
> +	case ACTLR_EL1:		*val = read_sysreg_s(SYS_ACTLR_EL1);	break;
> +	case CPACR_EL1:		*val = read_sysreg_s(SYS_CPACR_EL12);	break;
> +	case TTBR0_EL1:		*val = read_sysreg_s(SYS_TTBR0_EL12);	break;
> +	case TTBR1_EL1:		*val = read_sysreg_s(SYS_TTBR1_EL12);	break;
> +	case TCR_EL1:		*val = read_sysreg_s(SYS_TCR_EL12);	break;
> +	case ESR_EL1:		*val = read_sysreg_s(SYS_ESR_EL12);	break;
> +	case AFSR0_EL1:		*val = read_sysreg_s(SYS_AFSR0_EL12);	break;
> +	case AFSR1_EL1:		*val = read_sysreg_s(SYS_AFSR1_EL12);	break;
> +	case FAR_EL1:		*val = read_sysreg_s(SYS_FAR_EL12);	break;
> +	case MAIR_EL1:		*val = read_sysreg_s(SYS_MAIR_EL12);	break;
> +	case VBAR_EL1:		*val = read_sysreg_s(SYS_VBAR_EL12);	break;
> +	case CONTEXTIDR_EL1:	*val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
> +	case TPIDR_EL0:		*val = read_sysreg_s(SYS_TPIDR_EL0);	break;
> +	case TPIDRRO_EL0:	*val = read_sysreg_s(SYS_TPIDRRO_EL0);	break;
> +	case TPIDR_EL1:		*val = read_sysreg_s(SYS_TPIDR_EL1);	break;
> +	case AMAIR_EL1:		*val = read_sysreg_s(SYS_AMAIR_EL12);	break;
> +	case CNTKCTL_EL1:	*val = read_sysreg_s(SYS_CNTKCTL_EL12);	break;
> +	case PAR_EL1:		*val = read_sysreg_s(SYS_PAR_EL1);	break;
> +	case DACR32_EL2:	*val = read_sysreg_s(SYS_DACR32_EL2);	break;
> +	case IFSR32_EL2:	*val = read_sysreg_s(SYS_IFSR32_EL2);	break;
> +	case DBGVCR32_EL2:	*val = read_sysreg_s(SYS_DBGVCR32_EL2);	break;
> +	default:		return false;
> +	}
> +
> +	return true;
> +}
> +
> +static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
> +{
> +	/*
> +	 * System registers listed in the switch are not restored on every
> +	 * entry to the guest but are only restored on vcpu_load.
> +	 *
> +	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
> +	 * should never be listed below, because the the MPIDR should only be
> +	 * set once, before running the VCPU, and never changed later.
> +	 */
> +	switch (reg) {
> +	case CSSELR_EL1:	write_sysreg_s(val, SYS_CSSELR_EL1);	break;
> +	case SCTLR_EL1:		write_sysreg_s(val, SYS_SCTLR_EL12);	break;
> +	case ACTLR_EL1:		write_sysreg_s(val, SYS_ACTLR_EL1);	break;
> +	case CPACR_EL1:		write_sysreg_s(val, SYS_CPACR_EL12);	break;
> +	case TTBR0_EL1:		write_sysreg_s(val, SYS_TTBR0_EL12);	break;
> +	case TTBR1_EL1:		write_sysreg_s(val, SYS_TTBR1_EL12);	break;
> +	case TCR_EL1:		write_sysreg_s(val, SYS_TCR_EL12);	break;
> +	case ESR_EL1:		write_sysreg_s(val, SYS_ESR_EL12);	break;
> +	case AFSR0_EL1:		write_sysreg_s(val, SYS_AFSR0_EL12);	break;
> +	case AFSR1_EL1:		write_sysreg_s(val, SYS_AFSR1_EL12);	break;
> +	case FAR_EL1:		write_sysreg_s(val, SYS_FAR_EL12);	break;
> +	case MAIR_EL1:		write_sysreg_s(val, SYS_MAIR_EL12);	break;
> +	case VBAR_EL1:		write_sysreg_s(val, SYS_VBAR_EL12);	break;
> +	case CONTEXTIDR_EL1:	write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
> +	case TPIDR_EL0:		write_sysreg_s(val, SYS_TPIDR_EL0);	break;
> +	case TPIDRRO_EL0:	write_sysreg_s(val, SYS_TPIDRRO_EL0);	break;
> +	case TPIDR_EL1:		write_sysreg_s(val, SYS_TPIDR_EL1);	break;
> +	case AMAIR_EL1:		write_sysreg_s(val, SYS_AMAIR_EL12);	break;
> +	case CNTKCTL_EL1:	write_sysreg_s(val, SYS_CNTKCTL_EL12);	break;
> +	case PAR_EL1:		write_sysreg_s(val, SYS_PAR_EL1);	break;
> +	case DACR32_EL2:	write_sysreg_s(val, SYS_DACR32_EL2);	break;
> +	case IFSR32_EL2:	write_sysreg_s(val, SYS_IFSR32_EL2);	break;
> +	case DBGVCR32_EL2:	write_sysreg_s(val, SYS_DBGVCR32_EL2);	break;
> +	default:		return false;
> +	}
> +
> +	return true;
> +}
> +
>  u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
>  {
> -	u64 val;
> +	u64 val = 0x8badf00d8badf00d;
>  
>  	if (!vcpu->arch.sysregs_loaded_on_cpu)
> -		goto immediate_read;
> +		goto memory_read;
>  
>  	if (unlikely(sysreg_is_el2(reg))) {
>  		const struct el2_sysreg_map *el2_reg;
>  
>  		if (!is_hyp_ctxt(vcpu))
> -			goto immediate_read;
> +			goto memory_read;
>  
>  		switch (reg) {
> +		case ELR_EL2:
> +			return read_sysreg_el1(SYS_ELR);

Hmmm, This change feels a bit out of place.

Also, patch 13 added ELR_EL2 and SP_EL2 to the switch cases for physical
sysreg accesses. Now ELR_EL2 is moved out of the main switch cases and
SP_EL2 is completely omitted.

I'd say either patch 13 needs to be reworked or there is a separate
patch that should be extracted from this patch to have an intermediate
state, or the commit message on this patch should be more detailed.

Cheers,
Alexandru Elisei June 27, 2019, 9:21 a.m. UTC | #2
On 6/21/19 10:37 AM, Marc Zyngier wrote:
> Extract the direct HW accessors for later reuse.
>
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>  arch/arm64/kvm/sys_regs.c | 247 +++++++++++++++++++++-----------------
>  1 file changed, 139 insertions(+), 108 deletions(-)
>
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 2b8734f75a09..e181359adadf 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -182,99 +182,161 @@ const struct el2_sysreg_map *find_el2_sysreg(const struct el2_sysreg_map *map,
>  	return entry;
>  }
>  
> +static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
> +{
> +	/*
> +	 * System registers listed in the switch are not saved on every
> +	 * exit from the guest but are only saved on vcpu_put.
> +	 *
> +	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
> +	 * should never be listed below, because the guest cannot modify its
> +	 * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
> +	 * thread when emulating cross-VCPU communication.
> +	 */
> +	switch (reg) {
> +	case CSSELR_EL1:	*val = read_sysreg_s(SYS_CSSELR_EL1);	break;
> +	case SCTLR_EL1:		*val = read_sysreg_s(SYS_SCTLR_EL12);	break;
> +	case ACTLR_EL1:		*val = read_sysreg_s(SYS_ACTLR_EL1);	break;
> +	case CPACR_EL1:		*val = read_sysreg_s(SYS_CPACR_EL12);	break;
> +	case TTBR0_EL1:		*val = read_sysreg_s(SYS_TTBR0_EL12);	break;
> +	case TTBR1_EL1:		*val = read_sysreg_s(SYS_TTBR1_EL12);	break;
> +	case TCR_EL1:		*val = read_sysreg_s(SYS_TCR_EL12);	break;
> +	case ESR_EL1:		*val = read_sysreg_s(SYS_ESR_EL12);	break;
> +	case AFSR0_EL1:		*val = read_sysreg_s(SYS_AFSR0_EL12);	break;
> +	case AFSR1_EL1:		*val = read_sysreg_s(SYS_AFSR1_EL12);	break;
> +	case FAR_EL1:		*val = read_sysreg_s(SYS_FAR_EL12);	break;
> +	case MAIR_EL1:		*val = read_sysreg_s(SYS_MAIR_EL12);	break;
> +	case VBAR_EL1:		*val = read_sysreg_s(SYS_VBAR_EL12);	break;
> +	case CONTEXTIDR_EL1:	*val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
> +	case TPIDR_EL0:		*val = read_sysreg_s(SYS_TPIDR_EL0);	break;
> +	case TPIDRRO_EL0:	*val = read_sysreg_s(SYS_TPIDRRO_EL0);	break;
> +	case TPIDR_EL1:		*val = read_sysreg_s(SYS_TPIDR_EL1);	break;
> +	case AMAIR_EL1:		*val = read_sysreg_s(SYS_AMAIR_EL12);	break;
> +	case CNTKCTL_EL1:	*val = read_sysreg_s(SYS_CNTKCTL_EL12);	break;
> +	case PAR_EL1:		*val = read_sysreg_s(SYS_PAR_EL1);	break;
> +	case DACR32_EL2:	*val = read_sysreg_s(SYS_DACR32_EL2);	break;
> +	case IFSR32_EL2:	*val = read_sysreg_s(SYS_IFSR32_EL2);	break;
> +	case DBGVCR32_EL2:	*val = read_sysreg_s(SYS_DBGVCR32_EL2);	break;
> +	default:		return false;
> +	}
> +
> +	return true;
> +}
> +
> +static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
> +{
> +	/*
> +	 * System registers listed in the switch are not restored on every
> +	 * entry to the guest but are only restored on vcpu_load.
> +	 *
> +	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
> +	 * should never be listed below, because the the MPIDR should only be
> +	 * set once, before running the VCPU, and never changed later.
> +	 */
> +	switch (reg) {
> +	case CSSELR_EL1:	write_sysreg_s(val, SYS_CSSELR_EL1);	break;
> +	case SCTLR_EL1:		write_sysreg_s(val, SYS_SCTLR_EL12);	break;
> +	case ACTLR_EL1:		write_sysreg_s(val, SYS_ACTLR_EL1);	break;
> +	case CPACR_EL1:		write_sysreg_s(val, SYS_CPACR_EL12);	break;
> +	case TTBR0_EL1:		write_sysreg_s(val, SYS_TTBR0_EL12);	break;
> +	case TTBR1_EL1:		write_sysreg_s(val, SYS_TTBR1_EL12);	break;
> +	case TCR_EL1:		write_sysreg_s(val, SYS_TCR_EL12);	break;
> +	case ESR_EL1:		write_sysreg_s(val, SYS_ESR_EL12);	break;
> +	case AFSR0_EL1:		write_sysreg_s(val, SYS_AFSR0_EL12);	break;
> +	case AFSR1_EL1:		write_sysreg_s(val, SYS_AFSR1_EL12);	break;
> +	case FAR_EL1:		write_sysreg_s(val, SYS_FAR_EL12);	break;
> +	case MAIR_EL1:		write_sysreg_s(val, SYS_MAIR_EL12);	break;
> +	case VBAR_EL1:		write_sysreg_s(val, SYS_VBAR_EL12);	break;
> +	case CONTEXTIDR_EL1:	write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
> +	case TPIDR_EL0:		write_sysreg_s(val, SYS_TPIDR_EL0);	break;
> +	case TPIDRRO_EL0:	write_sysreg_s(val, SYS_TPIDRRO_EL0);	break;
> +	case TPIDR_EL1:		write_sysreg_s(val, SYS_TPIDR_EL1);	break;
> +	case AMAIR_EL1:		write_sysreg_s(val, SYS_AMAIR_EL12);	break;
> +	case CNTKCTL_EL1:	write_sysreg_s(val, SYS_CNTKCTL_EL12);	break;
> +	case PAR_EL1:		write_sysreg_s(val, SYS_PAR_EL1);	break;
> +	case DACR32_EL2:	write_sysreg_s(val, SYS_DACR32_EL2);	break;
> +	case IFSR32_EL2:	write_sysreg_s(val, SYS_IFSR32_EL2);	break;
> +	case DBGVCR32_EL2:	write_sysreg_s(val, SYS_DBGVCR32_EL2);	break;
> +	default:		return false;
> +	}
> +
> +	return true;
> +}
> +
>  u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
>  {
> -	u64 val;
> +	u64 val = 0x8badf00d8badf00d;
>  
>  	if (!vcpu->arch.sysregs_loaded_on_cpu)
> -		goto immediate_read;
> +		goto memory_read;
>  
>  	if (unlikely(sysreg_is_el2(reg))) {
>  		const struct el2_sysreg_map *el2_reg;
>  
>  		if (!is_hyp_ctxt(vcpu))
> -			goto immediate_read;
> +			goto memory_read;
>  
>  		switch (reg) {
> +		case ELR_EL2:
> +			return read_sysreg_el1(SYS_ELR);
>  		case SPSR_EL2:
>  			val = read_sysreg_el1(SYS_SPSR);
>  			return __fixup_spsr_el2_read(&vcpu->arch.ctxt, val);
>  		}
>  
>  		el2_reg = find_el2_sysreg(nested_sysreg_map, reg);
> -		if (el2_reg) {
> -			/*
> -			 * If this register does not have an EL1 counterpart,
> -			 * then read the stored EL2 version.
> -			 */
> -			if (el2_reg->mapping == __INVALID_SYSREG__)
> -				goto immediate_read;
> -
> -			/* Get the current version of the EL1 counterpart. */
> -			reg = el2_reg->mapping;
> -		}
> -	} else {
> -		/* EL1 register can't be on the CPU if the guest is in vEL2. */
> -		if (unlikely(is_hyp_ctxt(vcpu)))
> -			goto immediate_read;
> +		BUG_ON(!el2_reg);
> +
> +		/*
> +		 * If this register does not have an EL1 counterpart,
> +		 * then read the stored EL2 version.
> +		 */
> +		if (el2_reg->mapping == __INVALID_SYSREG__)
> +			goto memory_read;
> +
> +		if (!vcpu_el2_e2h_is_set(vcpu) &&
> +		    el2_reg->translate)
> +			goto memory_read;

Nit: the condition can be written on one line.

This condition wasn't present in patch 13 which introduced EL2 register
handling, and I'm struggling to understand what it does. As I understand the
code, this condition basically translates into:

- if the register is one of SCTLR_EL2, TTBR0_EL2, CPTR_EL2 or TCR_EL2, then read
it from memory.

- if the register is an EL2 register whose value is written unmodified to the
corresponding EL1 register, then read the corresponding EL1 register and return
that value.

Looking at vcpu_write_sys_reg, the values for the EL2 registers are always saved
in memory. The guest is a non-vhe guest, so writes to EL1 registers shouldn't be
reflected in the corresponding EL2 register. I think it's safe to always return
the value from memory.

I tried testing this with the following patch:

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1235a88ec575..27d39bb9564d 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -290,6 +290,9 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
                el2_reg = find_el2_sysreg(nested_sysreg_map, reg);
                BUG_ON(!el2_reg);
 
+               if (!vcpu_el2_e2h_is_set(vcpu))
+                       goto memory_read;
+
                /*
                 * If this register does not have an EL1 counterpart,
                 * then read the stored EL2 version.
@@ -297,10 +300,6 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
                if (el2_reg->mapping == __INVALID_SYSREG__)
                        goto memory_read;
 
-               if (!vcpu_el2_e2h_is_set(vcpu) &&
-                   el2_reg->translate)
-                       goto memory_read;
-
                /* Get the current version of the EL1 counterpart. */
                reg = el2_reg->mapping;
                WARN_ON(!__vcpu_read_sys_reg_from_cpu(reg, &val));

I know it's not conclusive, but I was able to boot a L2 guest under a L1 non-vhe
hypervisor.

> +
> +		/* Get the current version of the EL1 counterpart. */
> +		reg = el2_reg->mapping;
> +		WARN_ON(!__vcpu_read_sys_reg_from_cpu(reg, &val));
> +		return val;
>  	}
>  
> -	/*
> -	 * System registers listed in the switch are not saved on every
> -	 * exit from the guest but are only saved on vcpu_put.
> -	 *
> -	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
> -	 * should never be listed below, because the guest cannot modify its
> -	 * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
> -	 * thread when emulating cross-VCPU communication.
> -	 */
> -	switch (reg) {
> -	case CSSELR_EL1:	return read_sysreg_s(SYS_CSSELR_EL1);
> -	case SCTLR_EL1:		return read_sysreg_s(SYS_SCTLR_EL12);
> -	case ACTLR_EL1:		return read_sysreg_s(SYS_ACTLR_EL1);
> -	case CPACR_EL1:		return read_sysreg_s(SYS_CPACR_EL12);
> -	case TTBR0_EL1:		return read_sysreg_s(SYS_TTBR0_EL12);
> -	case TTBR1_EL1:		return read_sysreg_s(SYS_TTBR1_EL12);
> -	case TCR_EL1:		return read_sysreg_s(SYS_TCR_EL12);
> -	case ESR_EL1:		return read_sysreg_s(SYS_ESR_EL12);
> -	case AFSR0_EL1:		return read_sysreg_s(SYS_AFSR0_EL12);
> -	case AFSR1_EL1:		return read_sysreg_s(SYS_AFSR1_EL12);
> -	case FAR_EL1:		return read_sysreg_s(SYS_FAR_EL12);
> -	case MAIR_EL1:		return read_sysreg_s(SYS_MAIR_EL12);
> -	case VBAR_EL1:		return read_sysreg_s(SYS_VBAR_EL12);
> -	case CONTEXTIDR_EL1:	return read_sysreg_s(SYS_CONTEXTIDR_EL12);
> -	case TPIDR_EL0:		return read_sysreg_s(SYS_TPIDR_EL0);
> -	case TPIDRRO_EL0:	return read_sysreg_s(SYS_TPIDRRO_EL0);
> -	case TPIDR_EL1:		return read_sysreg_s(SYS_TPIDR_EL1);
> -	case AMAIR_EL1:		return read_sysreg_s(SYS_AMAIR_EL12);
> -	case CNTKCTL_EL1:	return read_sysreg_s(SYS_CNTKCTL_EL12);
> -	case PAR_EL1:		return read_sysreg_s(SYS_PAR_EL1);
> -	case DACR32_EL2:	return read_sysreg_s(SYS_DACR32_EL2);
> -	case IFSR32_EL2:	return read_sysreg_s(SYS_IFSR32_EL2);
> -	case DBGVCR32_EL2:	return read_sysreg_s(SYS_DBGVCR32_EL2);
> -	case SP_EL2:		return read_sysreg(sp_el1);
> -	case ELR_EL2:		return read_sysreg_el1(SYS_ELR);
> -	}
> +	/* EL1 register can't be on the CPU if the guest is in vEL2. */
> +	if (unlikely(is_hyp_ctxt(vcpu)))
> +		goto memory_read;
> +
> +	if (__vcpu_read_sys_reg_from_cpu(reg, &val))
> +		return val;
>  
> -immediate_read:
> +memory_read:
>  	return __vcpu_sys_reg(vcpu, reg);
>  }
>  
>  void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
>  {
>  	if (!vcpu->arch.sysregs_loaded_on_cpu)
> -		goto immediate_write;
> +		goto memory_write;
>  
>  	if (unlikely(sysreg_is_el2(reg))) {
>  		const struct el2_sysreg_map *el2_reg;
>  
>  		if (!is_hyp_ctxt(vcpu))
> -			goto immediate_write;
> +			goto memory_write;
>  
> -		/* Store the EL2 version in the sysregs array. */
> +		/*
> +		 * Always store a copy of the write to memory to avoid having
> +		 * to reverse-translate virtual EL2 system registers for a
> +		 * non-VHE guest hypervisor.
> +		 */
>  		__vcpu_sys_reg(vcpu, reg) = val;
>  
>  		switch (reg) {
> +		case ELR_EL2:
> +			write_sysreg_el1(val, SYS_ELR);
> +			return;
>  		case SPSR_EL2:
>  			val = __fixup_spsr_el2_write(&vcpu->arch.ctxt, val);
>  			write_sysreg_el1(val, SYS_SPSR);
> @@ -282,61 +344,30 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
>  		}
>  
>  		el2_reg = find_el2_sysreg(nested_sysreg_map, reg);
> -		if (el2_reg) {
> -			/* Does this register have an EL1 counterpart? */
> -			if (el2_reg->mapping == __INVALID_SYSREG__)
> -				return;
> +		WARN(!el2_reg, "reg: %d\n", reg);
>  
> -			if (!vcpu_el2_e2h_is_set(vcpu) &&
> -			    el2_reg->translate)
> -				val = el2_reg->translate(val);
> +		/* Does this register have an EL1 counterpart? */
> +		if (el2_reg->mapping == __INVALID_SYSREG__)
> +			goto memory_write;
>  
> -			/* Redirect this to the EL1 version of the register. */
> -			reg = el2_reg->mapping;
> -		}
> -	} else {
> -		/* EL1 register can't be on the CPU if the guest is in vEL2. */
> -		if (unlikely(is_hyp_ctxt(vcpu)))
> -			goto immediate_write;
> -	}
> +		if (!vcpu_el2_e2h_is_set(vcpu) &&
> +		    el2_reg->translate)
> +			val = el2_reg->translate(val);
>  
> -	/*
> -	 * System registers listed in the switch are not restored on every
> -	 * entry to the guest but are only restored on vcpu_load.
> -	 *
> -	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
> -	 * should never be listed below, because the the MPIDR should only be
> -	 * set once, before running the VCPU, and never changed later.
> -	 */
> -	switch (reg) {
> -	case CSSELR_EL1:	write_sysreg_s(val, SYS_CSSELR_EL1);	return;
> -	case SCTLR_EL1:		write_sysreg_s(val, SYS_SCTLR_EL12);	return;
> -	case ACTLR_EL1:		write_sysreg_s(val, SYS_ACTLR_EL1);	return;
> -	case CPACR_EL1:		write_sysreg_s(val, SYS_CPACR_EL12);	return;
> -	case TTBR0_EL1:		write_sysreg_s(val, SYS_TTBR0_EL12);	return;
> -	case TTBR1_EL1:		write_sysreg_s(val, SYS_TTBR1_EL12);	return;
> -	case TCR_EL1:		write_sysreg_s(val, SYS_TCR_EL12);	return;
> -	case ESR_EL1:		write_sysreg_s(val, SYS_ESR_EL12);	return;
> -	case AFSR0_EL1:		write_sysreg_s(val, SYS_AFSR0_EL12);	return;
> -	case AFSR1_EL1:		write_sysreg_s(val, SYS_AFSR1_EL12);	return;
> -	case FAR_EL1:		write_sysreg_s(val, SYS_FAR_EL12);	return;
> -	case MAIR_EL1:		write_sysreg_s(val, SYS_MAIR_EL12);	return;
> -	case VBAR_EL1:		write_sysreg_s(val, SYS_VBAR_EL12);	return;
> -	case CONTEXTIDR_EL1:	write_sysreg_s(val, SYS_CONTEXTIDR_EL12); return;
> -	case TPIDR_EL0:		write_sysreg_s(val, SYS_TPIDR_EL0);	return;
> -	case TPIDRRO_EL0:	write_sysreg_s(val, SYS_TPIDRRO_EL0);	return;
> -	case TPIDR_EL1:		write_sysreg_s(val, SYS_TPIDR_EL1);	return;
> -	case AMAIR_EL1:		write_sysreg_s(val, SYS_AMAIR_EL12);	return;
> -	case CNTKCTL_EL1:	write_sysreg_s(val, SYS_CNTKCTL_EL12);	return;
> -	case PAR_EL1:		write_sysreg_s(val, SYS_PAR_EL1);	return;
> -	case DACR32_EL2:	write_sysreg_s(val, SYS_DACR32_EL2);	return;
> -	case IFSR32_EL2:	write_sysreg_s(val, SYS_IFSR32_EL2);	return;
> -	case DBGVCR32_EL2:	write_sysreg_s(val, SYS_DBGVCR32_EL2);	return;
> -	case SP_EL2:		write_sysreg(val, sp_el1);		return;
> -	case ELR_EL2:		write_sysreg_el1(val, SYS_ELR);		return;
> +		/* Redirect this to the EL1 version of the register. */
> +		reg = el2_reg->mapping;
> +		WARN_ON(!__vcpu_write_sys_reg_to_cpu(val, reg));
> +		return;
>  	}
>  
> -immediate_write:
> +	/* EL1 register can't be on the CPU if the guest is in vEL2. */
> +	if (unlikely(is_hyp_ctxt(vcpu)))
> +		goto memory_write;
> +
> +	if (__vcpu_write_sys_reg_to_cpu(val, reg))
> +		return;
> +
> +memory_write:
>  	 __vcpu_sys_reg(vcpu, reg) = val;
>  }
>
Marc Zyngier July 3, 2019, 1:09 p.m. UTC | #3
On 24/06/2019 16:07, Julien Thierry wrote:
> 
> 
> On 06/21/2019 10:37 AM, Marc Zyngier wrote:
>> Extract the direct HW accessors for later reuse.
>>
>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>> ---
>>  arch/arm64/kvm/sys_regs.c | 247 +++++++++++++++++++++-----------------
>>  1 file changed, 139 insertions(+), 108 deletions(-)
>>
>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>> index 2b8734f75a09..e181359adadf 100644
>> --- a/arch/arm64/kvm/sys_regs.c
>> +++ b/arch/arm64/kvm/sys_regs.c
>> @@ -182,99 +182,161 @@ const struct el2_sysreg_map *find_el2_sysreg(const struct el2_sysreg_map *map,
>>  	return entry;
>>  }
>>  
>> +static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
>> +{
>> +	/*
>> +	 * System registers listed in the switch are not saved on every
>> +	 * exit from the guest but are only saved on vcpu_put.
>> +	 *
>> +	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
>> +	 * should never be listed below, because the guest cannot modify its
>> +	 * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
>> +	 * thread when emulating cross-VCPU communication.
>> +	 */
>> +	switch (reg) {
>> +	case CSSELR_EL1:	*val = read_sysreg_s(SYS_CSSELR_EL1);	break;
>> +	case SCTLR_EL1:		*val = read_sysreg_s(SYS_SCTLR_EL12);	break;
>> +	case ACTLR_EL1:		*val = read_sysreg_s(SYS_ACTLR_EL1);	break;
>> +	case CPACR_EL1:		*val = read_sysreg_s(SYS_CPACR_EL12);	break;
>> +	case TTBR0_EL1:		*val = read_sysreg_s(SYS_TTBR0_EL12);	break;
>> +	case TTBR1_EL1:		*val = read_sysreg_s(SYS_TTBR1_EL12);	break;
>> +	case TCR_EL1:		*val = read_sysreg_s(SYS_TCR_EL12);	break;
>> +	case ESR_EL1:		*val = read_sysreg_s(SYS_ESR_EL12);	break;
>> +	case AFSR0_EL1:		*val = read_sysreg_s(SYS_AFSR0_EL12);	break;
>> +	case AFSR1_EL1:		*val = read_sysreg_s(SYS_AFSR1_EL12);	break;
>> +	case FAR_EL1:		*val = read_sysreg_s(SYS_FAR_EL12);	break;
>> +	case MAIR_EL1:		*val = read_sysreg_s(SYS_MAIR_EL12);	break;
>> +	case VBAR_EL1:		*val = read_sysreg_s(SYS_VBAR_EL12);	break;
>> +	case CONTEXTIDR_EL1:	*val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
>> +	case TPIDR_EL0:		*val = read_sysreg_s(SYS_TPIDR_EL0);	break;
>> +	case TPIDRRO_EL0:	*val = read_sysreg_s(SYS_TPIDRRO_EL0);	break;
>> +	case TPIDR_EL1:		*val = read_sysreg_s(SYS_TPIDR_EL1);	break;
>> +	case AMAIR_EL1:		*val = read_sysreg_s(SYS_AMAIR_EL12);	break;
>> +	case CNTKCTL_EL1:	*val = read_sysreg_s(SYS_CNTKCTL_EL12);	break;
>> +	case PAR_EL1:		*val = read_sysreg_s(SYS_PAR_EL1);	break;
>> +	case DACR32_EL2:	*val = read_sysreg_s(SYS_DACR32_EL2);	break;
>> +	case IFSR32_EL2:	*val = read_sysreg_s(SYS_IFSR32_EL2);	break;
>> +	case DBGVCR32_EL2:	*val = read_sysreg_s(SYS_DBGVCR32_EL2);	break;
>> +	default:		return false;
>> +	}
>> +
>> +	return true;
>> +}
>> +
>> +static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
>> +{
>> +	/*
>> +	 * System registers listed in the switch are not restored on every
>> +	 * entry to the guest but are only restored on vcpu_load.
>> +	 *
>> +	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
>> +	 * should never be listed below, because the the MPIDR should only be
>> +	 * set once, before running the VCPU, and never changed later.
>> +	 */
>> +	switch (reg) {
>> +	case CSSELR_EL1:	write_sysreg_s(val, SYS_CSSELR_EL1);	break;
>> +	case SCTLR_EL1:		write_sysreg_s(val, SYS_SCTLR_EL12);	break;
>> +	case ACTLR_EL1:		write_sysreg_s(val, SYS_ACTLR_EL1);	break;
>> +	case CPACR_EL1:		write_sysreg_s(val, SYS_CPACR_EL12);	break;
>> +	case TTBR0_EL1:		write_sysreg_s(val, SYS_TTBR0_EL12);	break;
>> +	case TTBR1_EL1:		write_sysreg_s(val, SYS_TTBR1_EL12);	break;
>> +	case TCR_EL1:		write_sysreg_s(val, SYS_TCR_EL12);	break;
>> +	case ESR_EL1:		write_sysreg_s(val, SYS_ESR_EL12);	break;
>> +	case AFSR0_EL1:		write_sysreg_s(val, SYS_AFSR0_EL12);	break;
>> +	case AFSR1_EL1:		write_sysreg_s(val, SYS_AFSR1_EL12);	break;
>> +	case FAR_EL1:		write_sysreg_s(val, SYS_FAR_EL12);	break;
>> +	case MAIR_EL1:		write_sysreg_s(val, SYS_MAIR_EL12);	break;
>> +	case VBAR_EL1:		write_sysreg_s(val, SYS_VBAR_EL12);	break;
>> +	case CONTEXTIDR_EL1:	write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
>> +	case TPIDR_EL0:		write_sysreg_s(val, SYS_TPIDR_EL0);	break;
>> +	case TPIDRRO_EL0:	write_sysreg_s(val, SYS_TPIDRRO_EL0);	break;
>> +	case TPIDR_EL1:		write_sysreg_s(val, SYS_TPIDR_EL1);	break;
>> +	case AMAIR_EL1:		write_sysreg_s(val, SYS_AMAIR_EL12);	break;
>> +	case CNTKCTL_EL1:	write_sysreg_s(val, SYS_CNTKCTL_EL12);	break;
>> +	case PAR_EL1:		write_sysreg_s(val, SYS_PAR_EL1);	break;
>> +	case DACR32_EL2:	write_sysreg_s(val, SYS_DACR32_EL2);	break;
>> +	case IFSR32_EL2:	write_sysreg_s(val, SYS_IFSR32_EL2);	break;
>> +	case DBGVCR32_EL2:	write_sysreg_s(val, SYS_DBGVCR32_EL2);	break;
>> +	default:		return false;
>> +	}
>> +
>> +	return true;
>> +}
>> +
>>  u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
>>  {
>> -	u64 val;
>> +	u64 val = 0x8badf00d8badf00d;
>>  
>>  	if (!vcpu->arch.sysregs_loaded_on_cpu)
>> -		goto immediate_read;
>> +		goto memory_read;
>>  
>>  	if (unlikely(sysreg_is_el2(reg))) {
>>  		const struct el2_sysreg_map *el2_reg;
>>  
>>  		if (!is_hyp_ctxt(vcpu))
>> -			goto immediate_read;
>> +			goto memory_read;
>>  
>>  		switch (reg) {
>> +		case ELR_EL2:
>> +			return read_sysreg_el1(SYS_ELR);
> 
> Hmmm, This change feels a bit out of place.
> 
> Also, patch 13 added ELR_EL2 and SP_EL2 to the switch cases for physical
> sysreg accesses. Now ELR_EL2 is moved out of the main switch cases and
> SP_EL2 is completely omitted.
> 
> I'd say either patch 13 needs to be reworked or there is a separate
> patch that should be extracted from this patch to have an intermediate
> state, or the commit message on this patch should be more detailed.

Yeah, I wanted to actually squash most of this patch into #13, and got
sidetracked. Definitely room for improvement. I may even bring in the
rework I mentioned against #13.

Thanks,

	M.
Marc Zyngier July 4, 2019, 3:15 p.m. UTC | #4
On 27/06/2019 10:21, Alexandru Elisei wrote:
> On 6/21/19 10:37 AM, Marc Zyngier wrote:
>> Extract the direct HW accessors for later reuse.
>>
>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>> ---
>>  arch/arm64/kvm/sys_regs.c | 247 +++++++++++++++++++++-----------------
>>  1 file changed, 139 insertions(+), 108 deletions(-)
>>
>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>> index 2b8734f75a09..e181359adadf 100644
>> --- a/arch/arm64/kvm/sys_regs.c
>> +++ b/arch/arm64/kvm/sys_regs.c
>> @@ -182,99 +182,161 @@ const struct el2_sysreg_map *find_el2_sysreg(const struct el2_sysreg_map *map,
>>  	return entry;
>>  }
>>  
>> +static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
>> +{
>> +	/*
>> +	 * System registers listed in the switch are not saved on every
>> +	 * exit from the guest but are only saved on vcpu_put.
>> +	 *
>> +	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
>> +	 * should never be listed below, because the guest cannot modify its
>> +	 * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
>> +	 * thread when emulating cross-VCPU communication.
>> +	 */
>> +	switch (reg) {
>> +	case CSSELR_EL1:	*val = read_sysreg_s(SYS_CSSELR_EL1);	break;
>> +	case SCTLR_EL1:		*val = read_sysreg_s(SYS_SCTLR_EL12);	break;
>> +	case ACTLR_EL1:		*val = read_sysreg_s(SYS_ACTLR_EL1);	break;
>> +	case CPACR_EL1:		*val = read_sysreg_s(SYS_CPACR_EL12);	break;
>> +	case TTBR0_EL1:		*val = read_sysreg_s(SYS_TTBR0_EL12);	break;
>> +	case TTBR1_EL1:		*val = read_sysreg_s(SYS_TTBR1_EL12);	break;
>> +	case TCR_EL1:		*val = read_sysreg_s(SYS_TCR_EL12);	break;
>> +	case ESR_EL1:		*val = read_sysreg_s(SYS_ESR_EL12);	break;
>> +	case AFSR0_EL1:		*val = read_sysreg_s(SYS_AFSR0_EL12);	break;
>> +	case AFSR1_EL1:		*val = read_sysreg_s(SYS_AFSR1_EL12);	break;
>> +	case FAR_EL1:		*val = read_sysreg_s(SYS_FAR_EL12);	break;
>> +	case MAIR_EL1:		*val = read_sysreg_s(SYS_MAIR_EL12);	break;
>> +	case VBAR_EL1:		*val = read_sysreg_s(SYS_VBAR_EL12);	break;
>> +	case CONTEXTIDR_EL1:	*val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
>> +	case TPIDR_EL0:		*val = read_sysreg_s(SYS_TPIDR_EL0);	break;
>> +	case TPIDRRO_EL0:	*val = read_sysreg_s(SYS_TPIDRRO_EL0);	break;
>> +	case TPIDR_EL1:		*val = read_sysreg_s(SYS_TPIDR_EL1);	break;
>> +	case AMAIR_EL1:		*val = read_sysreg_s(SYS_AMAIR_EL12);	break;
>> +	case CNTKCTL_EL1:	*val = read_sysreg_s(SYS_CNTKCTL_EL12);	break;
>> +	case PAR_EL1:		*val = read_sysreg_s(SYS_PAR_EL1);	break;
>> +	case DACR32_EL2:	*val = read_sysreg_s(SYS_DACR32_EL2);	break;
>> +	case IFSR32_EL2:	*val = read_sysreg_s(SYS_IFSR32_EL2);	break;
>> +	case DBGVCR32_EL2:	*val = read_sysreg_s(SYS_DBGVCR32_EL2);	break;
>> +	default:		return false;
>> +	}
>> +
>> +	return true;
>> +}
>> +
>> +static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
>> +{
>> +	/*
>> +	 * System registers listed in the switch are not restored on every
>> +	 * entry to the guest but are only restored on vcpu_load.
>> +	 *
>> +	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
>> +	 * should never be listed below, because the the MPIDR should only be
>> +	 * set once, before running the VCPU, and never changed later.
>> +	 */
>> +	switch (reg) {
>> +	case CSSELR_EL1:	write_sysreg_s(val, SYS_CSSELR_EL1);	break;
>> +	case SCTLR_EL1:		write_sysreg_s(val, SYS_SCTLR_EL12);	break;
>> +	case ACTLR_EL1:		write_sysreg_s(val, SYS_ACTLR_EL1);	break;
>> +	case CPACR_EL1:		write_sysreg_s(val, SYS_CPACR_EL12);	break;
>> +	case TTBR0_EL1:		write_sysreg_s(val, SYS_TTBR0_EL12);	break;
>> +	case TTBR1_EL1:		write_sysreg_s(val, SYS_TTBR1_EL12);	break;
>> +	case TCR_EL1:		write_sysreg_s(val, SYS_TCR_EL12);	break;
>> +	case ESR_EL1:		write_sysreg_s(val, SYS_ESR_EL12);	break;
>> +	case AFSR0_EL1:		write_sysreg_s(val, SYS_AFSR0_EL12);	break;
>> +	case AFSR1_EL1:		write_sysreg_s(val, SYS_AFSR1_EL12);	break;
>> +	case FAR_EL1:		write_sysreg_s(val, SYS_FAR_EL12);	break;
>> +	case MAIR_EL1:		write_sysreg_s(val, SYS_MAIR_EL12);	break;
>> +	case VBAR_EL1:		write_sysreg_s(val, SYS_VBAR_EL12);	break;
>> +	case CONTEXTIDR_EL1:	write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
>> +	case TPIDR_EL0:		write_sysreg_s(val, SYS_TPIDR_EL0);	break;
>> +	case TPIDRRO_EL0:	write_sysreg_s(val, SYS_TPIDRRO_EL0);	break;
>> +	case TPIDR_EL1:		write_sysreg_s(val, SYS_TPIDR_EL1);	break;
>> +	case AMAIR_EL1:		write_sysreg_s(val, SYS_AMAIR_EL12);	break;
>> +	case CNTKCTL_EL1:	write_sysreg_s(val, SYS_CNTKCTL_EL12);	break;
>> +	case PAR_EL1:		write_sysreg_s(val, SYS_PAR_EL1);	break;
>> +	case DACR32_EL2:	write_sysreg_s(val, SYS_DACR32_EL2);	break;
>> +	case IFSR32_EL2:	write_sysreg_s(val, SYS_IFSR32_EL2);	break;
>> +	case DBGVCR32_EL2:	write_sysreg_s(val, SYS_DBGVCR32_EL2);	break;
>> +	default:		return false;
>> +	}
>> +
>> +	return true;
>> +}
>> +
>>  u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
>>  {
>> -	u64 val;
>> +	u64 val = 0x8badf00d8badf00d;
>>  
>>  	if (!vcpu->arch.sysregs_loaded_on_cpu)
>> -		goto immediate_read;
>> +		goto memory_read;
>>  
>>  	if (unlikely(sysreg_is_el2(reg))) {
>>  		const struct el2_sysreg_map *el2_reg;
>>  
>>  		if (!is_hyp_ctxt(vcpu))
>> -			goto immediate_read;
>> +			goto memory_read;
>>  
>>  		switch (reg) {
>> +		case ELR_EL2:
>> +			return read_sysreg_el1(SYS_ELR);
>>  		case SPSR_EL2:
>>  			val = read_sysreg_el1(SYS_SPSR);
>>  			return __fixup_spsr_el2_read(&vcpu->arch.ctxt, val);
>>  		}
>>  
>>  		el2_reg = find_el2_sysreg(nested_sysreg_map, reg);
>> -		if (el2_reg) {
>> -			/*
>> -			 * If this register does not have an EL1 counterpart,
>> -			 * then read the stored EL2 version.
>> -			 */
>> -			if (el2_reg->mapping == __INVALID_SYSREG__)
>> -				goto immediate_read;
>> -
>> -			/* Get the current version of the EL1 counterpart. */
>> -			reg = el2_reg->mapping;
>> -		}
>> -	} else {
>> -		/* EL1 register can't be on the CPU if the guest is in vEL2. */
>> -		if (unlikely(is_hyp_ctxt(vcpu)))
>> -			goto immediate_read;
>> +		BUG_ON(!el2_reg);
>> +
>> +		/*
>> +		 * If this register does not have an EL1 counterpart,
>> +		 * then read the stored EL2 version.
>> +		 */
>> +		if (el2_reg->mapping == __INVALID_SYSREG__)
>> +			goto memory_read;
>> +
>> +		if (!vcpu_el2_e2h_is_set(vcpu) &&
>> +		    el2_reg->translate)
>> +			goto memory_read;
> 
> Nit: the condition can be written on one line.
> 
> This condition wasn't present in patch 13 which introduced EL2 register
> handling, and I'm struggling to understand what it does. As I understand the
> code, this condition basically translates into:
> 
> - if the register is one of SCTLR_EL2, TTBR0_EL2, CPTR_EL2 or TCR_EL2, then read
> it from memory.
> 
> - if the register is an EL2 register whose value is written unmodified to the
> corresponding EL1 register, then read the corresponding EL1 register and return
> that value.
> 
> Looking at vcpu_write_sys_reg, the values for the EL2 registers are always saved
> in memory. The guest is a non-vhe guest, so writes to EL1 registers shouldn't be
> reflected in the corresponding EL2 register. I think it's safe to always return
> the value from memory.
> 
> I tried testing this with the following patch:
> 
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 1235a88ec575..27d39bb9564d 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -290,6 +290,9 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
>                 el2_reg = find_el2_sysreg(nested_sysreg_map, reg);
>                 BUG_ON(!el2_reg);
>  
> +               if (!vcpu_el2_e2h_is_set(vcpu))
> +                       goto memory_read;
> +
>                 /*
>                  * If this register does not have an EL1 counterpart,
>                  * then read the stored EL2 version.
> @@ -297,10 +300,6 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
>                 if (el2_reg->mapping == __INVALID_SYSREG__)
>                         goto memory_read;
>  
> -               if (!vcpu_el2_e2h_is_set(vcpu) &&
> -                   el2_reg->translate)
> -                       goto memory_read;
> -
>                 /* Get the current version of the EL1 counterpart. */
>                 reg = el2_reg->mapping;
>                 WARN_ON(!__vcpu_read_sys_reg_from_cpu(reg, &val));
> 
> I know it's not conclusive, but I was able to boot a L2 guest under a L1 non-vhe
> hypervisor.

And now you can't properly handle the terrible ARMv8.3 business of
SPSR_EL1 being changed behind your back if you get an exception at vEL2
to vEL2 on non-VHE. To handle this, you need both the live system
register and the memory backup (see __fixup_spsr_el2_read and co).

More generally, some registers can be modified behind your back. That's
ELR, SPSR, FAR, ESR.  Anything related to taking an exception. No, this
can't be observed with KVM because we don't allow exception to be taken
at EL2 in the absence of RAS errors.

Thanks,

	M.
diff mbox series

Patch

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 2b8734f75a09..e181359adadf 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -182,99 +182,161 @@  const struct el2_sysreg_map *find_el2_sysreg(const struct el2_sysreg_map *map,
 	return entry;
 }
 
+static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
+{
+	/*
+	 * System registers listed in the switch are not saved on every
+	 * exit from the guest but are only saved on vcpu_put.
+	 *
+	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
+	 * should never be listed below, because the guest cannot modify its
+	 * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
+	 * thread when emulating cross-VCPU communication.
+	 */
+	switch (reg) {
+	case CSSELR_EL1:	*val = read_sysreg_s(SYS_CSSELR_EL1);	break;
+	case SCTLR_EL1:		*val = read_sysreg_s(SYS_SCTLR_EL12);	break;
+	case ACTLR_EL1:		*val = read_sysreg_s(SYS_ACTLR_EL1);	break;
+	case CPACR_EL1:		*val = read_sysreg_s(SYS_CPACR_EL12);	break;
+	case TTBR0_EL1:		*val = read_sysreg_s(SYS_TTBR0_EL12);	break;
+	case TTBR1_EL1:		*val = read_sysreg_s(SYS_TTBR1_EL12);	break;
+	case TCR_EL1:		*val = read_sysreg_s(SYS_TCR_EL12);	break;
+	case ESR_EL1:		*val = read_sysreg_s(SYS_ESR_EL12);	break;
+	case AFSR0_EL1:		*val = read_sysreg_s(SYS_AFSR0_EL12);	break;
+	case AFSR1_EL1:		*val = read_sysreg_s(SYS_AFSR1_EL12);	break;
+	case FAR_EL1:		*val = read_sysreg_s(SYS_FAR_EL12);	break;
+	case MAIR_EL1:		*val = read_sysreg_s(SYS_MAIR_EL12);	break;
+	case VBAR_EL1:		*val = read_sysreg_s(SYS_VBAR_EL12);	break;
+	case CONTEXTIDR_EL1:	*val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
+	case TPIDR_EL0:		*val = read_sysreg_s(SYS_TPIDR_EL0);	break;
+	case TPIDRRO_EL0:	*val = read_sysreg_s(SYS_TPIDRRO_EL0);	break;
+	case TPIDR_EL1:		*val = read_sysreg_s(SYS_TPIDR_EL1);	break;
+	case AMAIR_EL1:		*val = read_sysreg_s(SYS_AMAIR_EL12);	break;
+	case CNTKCTL_EL1:	*val = read_sysreg_s(SYS_CNTKCTL_EL12);	break;
+	case PAR_EL1:		*val = read_sysreg_s(SYS_PAR_EL1);	break;
+	case DACR32_EL2:	*val = read_sysreg_s(SYS_DACR32_EL2);	break;
+	case IFSR32_EL2:	*val = read_sysreg_s(SYS_IFSR32_EL2);	break;
+	case DBGVCR32_EL2:	*val = read_sysreg_s(SYS_DBGVCR32_EL2);	break;
+	default:		return false;
+	}
+
+	return true;
+}
+
+static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
+{
+	/*
+	 * System registers listed in the switch are not restored on every
+	 * entry to the guest but are only restored on vcpu_load.
+	 *
+	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
+	 * should never be listed below, because the the MPIDR should only be
+	 * set once, before running the VCPU, and never changed later.
+	 */
+	switch (reg) {
+	case CSSELR_EL1:	write_sysreg_s(val, SYS_CSSELR_EL1);	break;
+	case SCTLR_EL1:		write_sysreg_s(val, SYS_SCTLR_EL12);	break;
+	case ACTLR_EL1:		write_sysreg_s(val, SYS_ACTLR_EL1);	break;
+	case CPACR_EL1:		write_sysreg_s(val, SYS_CPACR_EL12);	break;
+	case TTBR0_EL1:		write_sysreg_s(val, SYS_TTBR0_EL12);	break;
+	case TTBR1_EL1:		write_sysreg_s(val, SYS_TTBR1_EL12);	break;
+	case TCR_EL1:		write_sysreg_s(val, SYS_TCR_EL12);	break;
+	case ESR_EL1:		write_sysreg_s(val, SYS_ESR_EL12);	break;
+	case AFSR0_EL1:		write_sysreg_s(val, SYS_AFSR0_EL12);	break;
+	case AFSR1_EL1:		write_sysreg_s(val, SYS_AFSR1_EL12);	break;
+	case FAR_EL1:		write_sysreg_s(val, SYS_FAR_EL12);	break;
+	case MAIR_EL1:		write_sysreg_s(val, SYS_MAIR_EL12);	break;
+	case VBAR_EL1:		write_sysreg_s(val, SYS_VBAR_EL12);	break;
+	case CONTEXTIDR_EL1:	write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
+	case TPIDR_EL0:		write_sysreg_s(val, SYS_TPIDR_EL0);	break;
+	case TPIDRRO_EL0:	write_sysreg_s(val, SYS_TPIDRRO_EL0);	break;
+	case TPIDR_EL1:		write_sysreg_s(val, SYS_TPIDR_EL1);	break;
+	case AMAIR_EL1:		write_sysreg_s(val, SYS_AMAIR_EL12);	break;
+	case CNTKCTL_EL1:	write_sysreg_s(val, SYS_CNTKCTL_EL12);	break;
+	case PAR_EL1:		write_sysreg_s(val, SYS_PAR_EL1);	break;
+	case DACR32_EL2:	write_sysreg_s(val, SYS_DACR32_EL2);	break;
+	case IFSR32_EL2:	write_sysreg_s(val, SYS_IFSR32_EL2);	break;
+	case DBGVCR32_EL2:	write_sysreg_s(val, SYS_DBGVCR32_EL2);	break;
+	default:		return false;
+	}
+
+	return true;
+}
+
 u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
 {
-	u64 val;
+	u64 val = 0x8badf00d8badf00d;
 
 	if (!vcpu->arch.sysregs_loaded_on_cpu)
-		goto immediate_read;
+		goto memory_read;
 
 	if (unlikely(sysreg_is_el2(reg))) {
 		const struct el2_sysreg_map *el2_reg;
 
 		if (!is_hyp_ctxt(vcpu))
-			goto immediate_read;
+			goto memory_read;
 
 		switch (reg) {
+		case ELR_EL2:
+			return read_sysreg_el1(SYS_ELR);
 		case SPSR_EL2:
 			val = read_sysreg_el1(SYS_SPSR);
 			return __fixup_spsr_el2_read(&vcpu->arch.ctxt, val);
 		}
 
 		el2_reg = find_el2_sysreg(nested_sysreg_map, reg);
-		if (el2_reg) {
-			/*
-			 * If this register does not have an EL1 counterpart,
-			 * then read the stored EL2 version.
-			 */
-			if (el2_reg->mapping == __INVALID_SYSREG__)
-				goto immediate_read;
-
-			/* Get the current version of the EL1 counterpart. */
-			reg = el2_reg->mapping;
-		}
-	} else {
-		/* EL1 register can't be on the CPU if the guest is in vEL2. */
-		if (unlikely(is_hyp_ctxt(vcpu)))
-			goto immediate_read;
+		BUG_ON(!el2_reg);
+
+		/*
+		 * If this register does not have an EL1 counterpart,
+		 * then read the stored EL2 version.
+		 */
+		if (el2_reg->mapping == __INVALID_SYSREG__)
+			goto memory_read;
+
+		if (!vcpu_el2_e2h_is_set(vcpu) &&
+		    el2_reg->translate)
+			goto memory_read;
+
+		/* Get the current version of the EL1 counterpart. */
+		reg = el2_reg->mapping;
+		WARN_ON(!__vcpu_read_sys_reg_from_cpu(reg, &val));
+		return val;
 	}
 
-	/*
-	 * System registers listed in the switch are not saved on every
-	 * exit from the guest but are only saved on vcpu_put.
-	 *
-	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
-	 * should never be listed below, because the guest cannot modify its
-	 * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
-	 * thread when emulating cross-VCPU communication.
-	 */
-	switch (reg) {
-	case CSSELR_EL1:	return read_sysreg_s(SYS_CSSELR_EL1);
-	case SCTLR_EL1:		return read_sysreg_s(SYS_SCTLR_EL12);
-	case ACTLR_EL1:		return read_sysreg_s(SYS_ACTLR_EL1);
-	case CPACR_EL1:		return read_sysreg_s(SYS_CPACR_EL12);
-	case TTBR0_EL1:		return read_sysreg_s(SYS_TTBR0_EL12);
-	case TTBR1_EL1:		return read_sysreg_s(SYS_TTBR1_EL12);
-	case TCR_EL1:		return read_sysreg_s(SYS_TCR_EL12);
-	case ESR_EL1:		return read_sysreg_s(SYS_ESR_EL12);
-	case AFSR0_EL1:		return read_sysreg_s(SYS_AFSR0_EL12);
-	case AFSR1_EL1:		return read_sysreg_s(SYS_AFSR1_EL12);
-	case FAR_EL1:		return read_sysreg_s(SYS_FAR_EL12);
-	case MAIR_EL1:		return read_sysreg_s(SYS_MAIR_EL12);
-	case VBAR_EL1:		return read_sysreg_s(SYS_VBAR_EL12);
-	case CONTEXTIDR_EL1:	return read_sysreg_s(SYS_CONTEXTIDR_EL12);
-	case TPIDR_EL0:		return read_sysreg_s(SYS_TPIDR_EL0);
-	case TPIDRRO_EL0:	return read_sysreg_s(SYS_TPIDRRO_EL0);
-	case TPIDR_EL1:		return read_sysreg_s(SYS_TPIDR_EL1);
-	case AMAIR_EL1:		return read_sysreg_s(SYS_AMAIR_EL12);
-	case CNTKCTL_EL1:	return read_sysreg_s(SYS_CNTKCTL_EL12);
-	case PAR_EL1:		return read_sysreg_s(SYS_PAR_EL1);
-	case DACR32_EL2:	return read_sysreg_s(SYS_DACR32_EL2);
-	case IFSR32_EL2:	return read_sysreg_s(SYS_IFSR32_EL2);
-	case DBGVCR32_EL2:	return read_sysreg_s(SYS_DBGVCR32_EL2);
-	case SP_EL2:		return read_sysreg(sp_el1);
-	case ELR_EL2:		return read_sysreg_el1(SYS_ELR);
-	}
+	/* EL1 register can't be on the CPU if the guest is in vEL2. */
+	if (unlikely(is_hyp_ctxt(vcpu)))
+		goto memory_read;
+
+	if (__vcpu_read_sys_reg_from_cpu(reg, &val))
+		return val;
 
-immediate_read:
+memory_read:
 	return __vcpu_sys_reg(vcpu, reg);
 }
 
 void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
 {
 	if (!vcpu->arch.sysregs_loaded_on_cpu)
-		goto immediate_write;
+		goto memory_write;
 
 	if (unlikely(sysreg_is_el2(reg))) {
 		const struct el2_sysreg_map *el2_reg;
 
 		if (!is_hyp_ctxt(vcpu))
-			goto immediate_write;
+			goto memory_write;
 
-		/* Store the EL2 version in the sysregs array. */
+		/*
+		 * Always store a copy of the write to memory to avoid having
+		 * to reverse-translate virtual EL2 system registers for a
+		 * non-VHE guest hypervisor.
+		 */
 		__vcpu_sys_reg(vcpu, reg) = val;
 
 		switch (reg) {
+		case ELR_EL2:
+			write_sysreg_el1(val, SYS_ELR);
+			return;
 		case SPSR_EL2:
 			val = __fixup_spsr_el2_write(&vcpu->arch.ctxt, val);
 			write_sysreg_el1(val, SYS_SPSR);
@@ -282,61 +344,30 @@  void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
 		}
 
 		el2_reg = find_el2_sysreg(nested_sysreg_map, reg);
-		if (el2_reg) {
-			/* Does this register have an EL1 counterpart? */
-			if (el2_reg->mapping == __INVALID_SYSREG__)
-				return;
+		WARN(!el2_reg, "reg: %d\n", reg);
 
-			if (!vcpu_el2_e2h_is_set(vcpu) &&
-			    el2_reg->translate)
-				val = el2_reg->translate(val);
+		/* Does this register have an EL1 counterpart? */
+		if (el2_reg->mapping == __INVALID_SYSREG__)
+			goto memory_write;
 
-			/* Redirect this to the EL1 version of the register. */
-			reg = el2_reg->mapping;
-		}
-	} else {
-		/* EL1 register can't be on the CPU if the guest is in vEL2. */
-		if (unlikely(is_hyp_ctxt(vcpu)))
-			goto immediate_write;
-	}
+		if (!vcpu_el2_e2h_is_set(vcpu) &&
+		    el2_reg->translate)
+			val = el2_reg->translate(val);
 
-	/*
-	 * System registers listed in the switch are not restored on every
-	 * entry to the guest but are only restored on vcpu_load.
-	 *
-	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
-	 * should never be listed below, because the the MPIDR should only be
-	 * set once, before running the VCPU, and never changed later.
-	 */
-	switch (reg) {
-	case CSSELR_EL1:	write_sysreg_s(val, SYS_CSSELR_EL1);	return;
-	case SCTLR_EL1:		write_sysreg_s(val, SYS_SCTLR_EL12);	return;
-	case ACTLR_EL1:		write_sysreg_s(val, SYS_ACTLR_EL1);	return;
-	case CPACR_EL1:		write_sysreg_s(val, SYS_CPACR_EL12);	return;
-	case TTBR0_EL1:		write_sysreg_s(val, SYS_TTBR0_EL12);	return;
-	case TTBR1_EL1:		write_sysreg_s(val, SYS_TTBR1_EL12);	return;
-	case TCR_EL1:		write_sysreg_s(val, SYS_TCR_EL12);	return;
-	case ESR_EL1:		write_sysreg_s(val, SYS_ESR_EL12);	return;
-	case AFSR0_EL1:		write_sysreg_s(val, SYS_AFSR0_EL12);	return;
-	case AFSR1_EL1:		write_sysreg_s(val, SYS_AFSR1_EL12);	return;
-	case FAR_EL1:		write_sysreg_s(val, SYS_FAR_EL12);	return;
-	case MAIR_EL1:		write_sysreg_s(val, SYS_MAIR_EL12);	return;
-	case VBAR_EL1:		write_sysreg_s(val, SYS_VBAR_EL12);	return;
-	case CONTEXTIDR_EL1:	write_sysreg_s(val, SYS_CONTEXTIDR_EL12); return;
-	case TPIDR_EL0:		write_sysreg_s(val, SYS_TPIDR_EL0);	return;
-	case TPIDRRO_EL0:	write_sysreg_s(val, SYS_TPIDRRO_EL0);	return;
-	case TPIDR_EL1:		write_sysreg_s(val, SYS_TPIDR_EL1);	return;
-	case AMAIR_EL1:		write_sysreg_s(val, SYS_AMAIR_EL12);	return;
-	case CNTKCTL_EL1:	write_sysreg_s(val, SYS_CNTKCTL_EL12);	return;
-	case PAR_EL1:		write_sysreg_s(val, SYS_PAR_EL1);	return;
-	case DACR32_EL2:	write_sysreg_s(val, SYS_DACR32_EL2);	return;
-	case IFSR32_EL2:	write_sysreg_s(val, SYS_IFSR32_EL2);	return;
-	case DBGVCR32_EL2:	write_sysreg_s(val, SYS_DBGVCR32_EL2);	return;
-	case SP_EL2:		write_sysreg(val, sp_el1);		return;
-	case ELR_EL2:		write_sysreg_el1(val, SYS_ELR);		return;
+		/* Redirect this to the EL1 version of the register. */
+		reg = el2_reg->mapping;
+		WARN_ON(!__vcpu_write_sys_reg_to_cpu(val, reg));
+		return;
 	}
 
-immediate_write:
+	/* EL1 register can't be on the CPU if the guest is in vEL2. */
+	if (unlikely(is_hyp_ctxt(vcpu)))
+		goto memory_write;
+
+	if (__vcpu_write_sys_reg_to_cpu(val, reg))
+		return;
+
+memory_write:
 	 __vcpu_sys_reg(vcpu, reg) = val;
 }