Message ID | 20140818203604.4654.99905.stgit@joelaarch64.amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Aug 18, 2014 at 03:36:04PM -0500, Joel Schopp wrote: > The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current > systems. Rather than just add a bit it seems like a good time to also set > things at run-time instead of compile time to accomodate more hardware. > > This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime, > not compile time. > > In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address > size (VTCR_EL2.T0SZE) cannot be determined in compile time since they > depend on hardware capability. > > According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document, > vttbr_x is calculated using different fixed values with consideration > of T0SZ, granule size and the level of translation tables. Therefore, > vttbr_baddr_mask should be determined dynamically. > > Changes since v4: > More minor cleanups from review > Moved some functions into headers > > Changes since v3: > Another rebase > Addressed minor comments from v2 > > Changes since v2: > Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git next branch > > Changes since v1: > Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to > provide better long term fix. Updated that patch to log error instead of > silently fail on unaligned vttbr. > > Cc: Christoffer Dall <christoffer.dall@linaro.org> > Cc: Sungjinn Chung <sungjinn.chung@samsung.com> > Signed-off-by: Jungseok Lee <jays.lee@samsung.com> > Signed-off-by: Joel Schopp <joel.schopp@amd.com> > --- > arch/arm/include/asm/kvm_mmu.h | 12 ++++++ > arch/arm/kvm/arm.c | 17 +++++++- > arch/arm64/include/asm/kvm_arm.h | 17 +------- > arch/arm64/include/asm/kvm_mmu.h | 78 ++++++++++++++++++++++++++++++++++++++ > arch/arm64/kvm/hyp-init.S | 20 +++++++--- > 5 files changed, 122 insertions(+), 22 deletions(-) > > diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h > index 5c7aa3c..73f6ff6 100644 > --- a/arch/arm/include/asm/kvm_mmu.h > +++ b/arch/arm/include/asm/kvm_mmu.h > @@ -166,6 +166,18 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, > > void stage2_flush_vm(struct kvm *kvm); > > +static inline int kvm_get_phys_addr_shift(void) > +{ > + return KVM_PHYS_SHIFT; > +} > + > +static inline int set_vttbr_baddr_mask(void) > +{ > + vttbr_baddr_mask = VTTBR_BADDR_MASK; > + return 0; > +} > + > + > #endif /* !__ASSEMBLY__ */ > > #endif /* __ARM_KVM_MMU_H__ */ > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > index 3c82b37..f396eb7 100644 > --- a/arch/arm/kvm/arm.c > +++ b/arch/arm/kvm/arm.c > @@ -37,6 +37,7 @@ > #include <asm/mman.h> > #include <asm/tlbflush.h> > #include <asm/cacheflush.h> > +#include <asm/cputype.h> > #include <asm/virt.h> > #include <asm/kvm_arm.h> > #include <asm/kvm_asm.h> > @@ -466,8 +467,14 @@ static void update_vttbr(struct kvm *kvm) > /* update vttbr to be used with the new vmid */ > pgd_phys = virt_to_phys(kvm->arch.pgd); > vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; > - kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK; > - kvm->arch.vttbr |= vmid; > + > + /* > + * If the VTTBR isn't aligned there is something wrong with the system > + * or kernel. > + */ > + BUG_ON(pgd_phys & ~vttbr_baddr_mask); > + > + kvm->arch.vttbr = pgd_phys | vmid; > > spin_unlock(&kvm_vmid_lock); > } > @@ -1052,6 +1059,12 @@ int kvm_arch_init(void *opaque) > } > } > > + err = set_vttbr_baddr_mask(); > + if (err) { > + kvm_err("Cannot set vttbr_baddr_mask\n"); > + return -EINVAL; > + } > + > cpu_notifier_register_begin(); > > err = init_hyp_mode(); > diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h > index 3d69030..8dbef70 100644 > --- a/arch/arm64/include/asm/kvm_arm.h > +++ b/arch/arm64/include/asm/kvm_arm.h > @@ -94,7 +94,6 @@ > /* TCR_EL2 Registers bits */ > #define TCR_EL2_TBI (1 << 20) > #define TCR_EL2_PS (7 << 16) > -#define TCR_EL2_PS_40B (2 << 16) > #define TCR_EL2_TG0 (1 << 14) > #define TCR_EL2_SH0 (3 << 12) > #define TCR_EL2_ORGN0 (3 << 10) > @@ -103,8 +102,6 @@ > #define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ > TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) > > -#define TCR_EL2_FLAGS (TCR_EL2_PS_40B) > - > /* VTCR_EL2 Registers bits */ > #define VTCR_EL2_PS_MASK (7 << 16) > #define VTCR_EL2_TG0_MASK (1 << 14) > @@ -119,36 +116,28 @@ > #define VTCR_EL2_SL0_MASK (3 << 6) > #define VTCR_EL2_SL0_LVL1 (1 << 6) > #define VTCR_EL2_T0SZ_MASK 0x3f > -#define VTCR_EL2_T0SZ_40B 24 > +#define VTCR_EL2_T0SZ(bits) (64 - (bits)) > > #ifdef CONFIG_ARM64_64K_PAGES > /* > * Stage2 translation configuration: > - * 40bits output (PS = 2) > - * 40bits input (T0SZ = 24) > * 64kB pages (TG0 = 1) > * 2 level page tables (SL = 1) > */ > #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ > VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ > - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) > -#define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) > + VTCR_EL2_SL0_LVL1) > #else > /* > * Stage2 translation configuration: > - * 40bits output (PS = 2) > - * 40bits input (T0SZ = 24) > * 4kB pages (TG0 = 0) > * 3 level page tables (SL = 1) > */ > #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \ > VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ > - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) > -#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) > + VTCR_EL2_SL0_LVL1) > #endif > > -#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) > -#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) > #define VTTBR_VMID_SHIFT (48LLU) > #define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) > > diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h > index 7d29847..b6ae83b 100644 > --- a/arch/arm64/include/asm/kvm_mmu.h > +++ b/arch/arm64/include/asm/kvm_mmu.h > @@ -152,5 +152,83 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, > > void stage2_flush_vm(struct kvm *kvm); > > +/* > + * ARMv8 64K architecture limitations: > + * 16 <= T0SZ <= 21 is valid under 3 level of translation tables > + * 18 <= T0SZ <= 34 is valid under 2 level of translation tables > + * 31 <= T0SZ <= 39 is valid under 1 level of transltaion tables > + * > + * ARMv8 4K architecture limitations: > + * 16 <= T0SZ <= 24 is valid under 4 level of translation tables > + * 21 <= T0SZ <= 33 is valid under 3 level of translation tables > + * 30 <= T0SZ <= 39 is valid under 2 level of translation tables > + * > + * For 4K pages we only support 3 or 4 level, giving T0SZ a range of 16 to 33. > + * For 64K pages we only support 2 or 3 level, giving T0SZ a range of 16 to 34. > + * > + * See Table D4-23 and Table D4-25 in ARM DDI 0487A.b to figure out > + * the origin of the hardcoded values, 38 and 37. > + */ > + > +#ifdef CONFIG_ARM64_64K_PAGES > +static inline int t0sz_to_vttbr_x(int t0sz) > +{ > + if (t0sz < 16 || t0sz > 34) { > + kvm_err("Cannot support %d-bit address space\n", 64 - t0sz); > + return 0; > + } > + > + return 38 - t0sz; > +} > +#else /* 4K pages */ > +static inline int t0sz_to_vttbr_x(int t0sz) > +{ > + if (t0sz < 16 || t0sz > 33) { > + kvm_err("Cannot support %d-bit address space\n", 64 - t0sz); > + return 0; why not just return an error code directly here? These are now returning 0 on error which usually means everything was ok... > + } > + return 37 - t0sz; > +} > +#endif > +static inline int kvm_get_phys_addr_shift(void) > +{ > + int pa_range = read_cpuid(ID_AA64MMFR0_EL1) & 0xf; > + > + switch (pa_range) { > + case 0: return 32; > + case 1: return 36; > + case 2: return 40; > + case 3: return 42; > + case 4: return 44; > + case 5: return 48; > + default: > + BUG(); > + return 0; > + } > +} > + > +static u64 vttbr_baddr_mask; > + > +/** > + * set_vttbr_baddr_mask - set mask value for vttbr base address > + * > + * In ARMv8, vttbr_baddr_mask cannot be determined in compile time since the > + * stage2 input address size depends on hardware capability. Thus, we first > + * need to read ID_AA64MMFR0_EL1.PARange and then set vttbr_baddr_mask with > + * consideration of both the granule size and the level of translation tables. > + */ > +static inline int set_vttbr_baddr_mask(void) > +{ > + int t0sz, vttbr_x; > + > + t0sz = VTCR_EL2_T0SZ(kvm_get_phys_addr_shift()); > + vttbr_x = t0sz_to_vttbr_x(t0sz); > + if (!vttbr_x) > + return -EINVAL; > + vttbr_baddr_mask = (((1LLU << (48 - vttbr_x)) - 1) << (vttbr_x - 1)); > + > + return 0; > +} > + > #endif /* __ASSEMBLY__ */ > #endif /* __ARM64_KVM_MMU_H__ */ > diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S > index d968796..c0f7634 100644 > --- a/arch/arm64/kvm/hyp-init.S > +++ b/arch/arm64/kvm/hyp-init.S > @@ -63,17 +63,21 @@ __do_hyp_init: > mrs x4, tcr_el1 > ldr x5, =TCR_EL2_MASK > and x4, x4, x5 > - ldr x5, =TCR_EL2_FLAGS > - orr x4, x4, x5 > - msr tcr_el2, x4 > - > - ldr x4, =VTCR_EL2_FLAGS > /* > * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in > - * VTCR_EL2. > + * TCR_EL2 and both PS bits and T0SZ bits in VTCR_EL2. > */ > mrs x5, ID_AA64MMFR0_EL1 > bfi x4, x5, #16, #3 > + msr tcr_el2, x4 > + > + ldr x4, =VTCR_EL2_FLAGS > + bfi x4, x5, #16, #3 > + and x5, x5, #0xf > + adr x6, t0sz > + add x6, x6, x5, lsl #2 > + ldr w5, [x6] > + orr x4, x4, x5 > msr vtcr_el2, x4 > > mrs x4, mair_el1 > @@ -109,6 +113,10 @@ target: /* We're now in the trampoline code, switch page tables */ > > /* Hello, World! */ > eret > + > +t0sz: > + .word VTCR_EL2_T0SZ(32), VTCR_EL2_T0SZ(36), VTCR_EL2_T0SZ(40) > + .word VTCR_EL2_T0SZ(42), VTCR_EL2_T0SZ(44), VTCR_EL2_T0SZ(48) > ENDPROC(__kvm_hyp_init) > > .ltorg > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 08/19/2014 07:24 AM, Christoffer Dall wrote: > On Mon, Aug 18, 2014 at 03:36:04PM -0500, Joel Schopp wrote: >> The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current >> systems. Rather than just add a bit it seems like a good time to also set >> things at run-time instead of compile time to accomodate more hardware. >> >> This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime, >> not compile time. >> >> In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address >> size (VTCR_EL2.T0SZE) cannot be determined in compile time since they >> depend on hardware capability. >> >> According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document, >> vttbr_x is calculated using different fixed values with consideration >> of T0SZ, granule size and the level of translation tables. Therefore, >> vttbr_baddr_mask should be determined dynamically. >> >> Changes since v4: >> More minor cleanups from review >> Moved some functions into headers >> >> Changes since v3: >> Another rebase >> Addressed minor comments from v2 >> >> Changes since v2: >> Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git next branch >> >> Changes since v1: >> Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to >> provide better long term fix. Updated that patch to log error instead of >> silently fail on unaligned vttbr. >> >> Cc: Christoffer Dall <christoffer.dall@linaro.org> >> Cc: Sungjinn Chung <sungjinn.chung@samsung.com> >> Signed-off-by: Jungseok Lee <jays.lee@samsung.com> >> Signed-off-by: Joel Schopp <joel.schopp@amd.com> >> --- >> arch/arm/include/asm/kvm_mmu.h | 12 ++++++ >> arch/arm/kvm/arm.c | 17 +++++++- >> arch/arm64/include/asm/kvm_arm.h | 17 +------- >> arch/arm64/include/asm/kvm_mmu.h | 78 ++++++++++++++++++++++++++++++++++++++ >> arch/arm64/kvm/hyp-init.S | 20 +++++++--- >> 5 files changed, 122 insertions(+), 22 deletions(-) >> >> diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h >> index 5c7aa3c..73f6ff6 100644 >> --- a/arch/arm/include/asm/kvm_mmu.h >> +++ b/arch/arm/include/asm/kvm_mmu.h >> @@ -166,6 +166,18 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, >> >> void stage2_flush_vm(struct kvm *kvm); >> >> +static inline int kvm_get_phys_addr_shift(void) >> +{ >> + return KVM_PHYS_SHIFT; >> +} >> + >> +static inline int set_vttbr_baddr_mask(void) >> +{ >> + vttbr_baddr_mask = VTTBR_BADDR_MASK; >> + return 0; >> +} >> + >> + >> #endif /* !__ASSEMBLY__ */ >> >> #endif /* __ARM_KVM_MMU_H__ */ >> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c >> index 3c82b37..f396eb7 100644 >> --- a/arch/arm/kvm/arm.c >> +++ b/arch/arm/kvm/arm.c >> @@ -37,6 +37,7 @@ >> #include <asm/mman.h> >> #include <asm/tlbflush.h> >> #include <asm/cacheflush.h> >> +#include <asm/cputype.h> >> #include <asm/virt.h> >> #include <asm/kvm_arm.h> >> #include <asm/kvm_asm.h> >> @@ -466,8 +467,14 @@ static void update_vttbr(struct kvm *kvm) >> /* update vttbr to be used with the new vmid */ >> pgd_phys = virt_to_phys(kvm->arch.pgd); >> vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; >> - kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK; >> - kvm->arch.vttbr |= vmid; >> + >> + /* >> + * If the VTTBR isn't aligned there is something wrong with the system >> + * or kernel. >> + */ >> + BUG_ON(pgd_phys & ~vttbr_baddr_mask); >> + >> + kvm->arch.vttbr = pgd_phys | vmid; >> >> spin_unlock(&kvm_vmid_lock); >> } >> @@ -1052,6 +1059,12 @@ int kvm_arch_init(void *opaque) >> } >> } >> >> + err = set_vttbr_baddr_mask(); >> + if (err) { >> + kvm_err("Cannot set vttbr_baddr_mask\n"); >> + return -EINVAL; >> + } >> + >> cpu_notifier_register_begin(); >> >> err = init_hyp_mode(); >> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h >> index 3d69030..8dbef70 100644 >> --- a/arch/arm64/include/asm/kvm_arm.h >> +++ b/arch/arm64/include/asm/kvm_arm.h >> @@ -94,7 +94,6 @@ >> /* TCR_EL2 Registers bits */ >> #define TCR_EL2_TBI (1 << 20) >> #define TCR_EL2_PS (7 << 16) >> -#define TCR_EL2_PS_40B (2 << 16) >> #define TCR_EL2_TG0 (1 << 14) >> #define TCR_EL2_SH0 (3 << 12) >> #define TCR_EL2_ORGN0 (3 << 10) >> @@ -103,8 +102,6 @@ >> #define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ >> TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) >> >> -#define TCR_EL2_FLAGS (TCR_EL2_PS_40B) >> - >> /* VTCR_EL2 Registers bits */ >> #define VTCR_EL2_PS_MASK (7 << 16) >> #define VTCR_EL2_TG0_MASK (1 << 14) >> @@ -119,36 +116,28 @@ >> #define VTCR_EL2_SL0_MASK (3 << 6) >> #define VTCR_EL2_SL0_LVL1 (1 << 6) >> #define VTCR_EL2_T0SZ_MASK 0x3f >> -#define VTCR_EL2_T0SZ_40B 24 >> +#define VTCR_EL2_T0SZ(bits) (64 - (bits)) >> >> #ifdef CONFIG_ARM64_64K_PAGES >> /* >> * Stage2 translation configuration: >> - * 40bits output (PS = 2) >> - * 40bits input (T0SZ = 24) >> * 64kB pages (TG0 = 1) >> * 2 level page tables (SL = 1) >> */ >> #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ >> VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ >> - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) >> -#define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) >> + VTCR_EL2_SL0_LVL1) >> #else >> /* >> * Stage2 translation configuration: >> - * 40bits output (PS = 2) >> - * 40bits input (T0SZ = 24) >> * 4kB pages (TG0 = 0) >> * 3 level page tables (SL = 1) >> */ >> #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \ >> VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ >> - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) >> -#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) >> + VTCR_EL2_SL0_LVL1) >> #endif >> >> -#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) >> -#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) >> #define VTTBR_VMID_SHIFT (48LLU) >> #define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) >> >> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h >> index 7d29847..b6ae83b 100644 >> --- a/arch/arm64/include/asm/kvm_mmu.h >> +++ b/arch/arm64/include/asm/kvm_mmu.h >> @@ -152,5 +152,83 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, >> >> void stage2_flush_vm(struct kvm *kvm); >> >> +/* >> + * ARMv8 64K architecture limitations: >> + * 16 <= T0SZ <= 21 is valid under 3 level of translation tables >> + * 18 <= T0SZ <= 34 is valid under 2 level of translation tables >> + * 31 <= T0SZ <= 39 is valid under 1 level of transltaion tables >> + * >> + * ARMv8 4K architecture limitations: >> + * 16 <= T0SZ <= 24 is valid under 4 level of translation tables >> + * 21 <= T0SZ <= 33 is valid under 3 level of translation tables >> + * 30 <= T0SZ <= 39 is valid under 2 level of translation tables >> + * >> + * For 4K pages we only support 3 or 4 level, giving T0SZ a range of 16 to 33. >> + * For 64K pages we only support 2 or 3 level, giving T0SZ a range of 16 to 34. >> + * >> + * See Table D4-23 and Table D4-25 in ARM DDI 0487A.b to figure out >> + * the origin of the hardcoded values, 38 and 37. >> + */ >> + >> +#ifdef CONFIG_ARM64_64K_PAGES >> +static inline int t0sz_to_vttbr_x(int t0sz) >> +{ >> + if (t0sz < 16 || t0sz > 34) { >> + kvm_err("Cannot support %d-bit address space\n", 64 - t0sz); >> + return 0; >> + } >> + >> + return 38 - t0sz; >> +} >> +#else /* 4K pages */ >> +static inline int t0sz_to_vttbr_x(int t0sz) >> +{ >> + if (t0sz < 16 || t0sz > 33) { >> + kvm_err("Cannot support %d-bit address space\n", 64 - t0sz); >> + return 0; > why not just return an error code directly here? These are now > returning 0 on error which usually means everything was ok... The return is a value,not just an error code. Because of this returning an error overloads that value. 0 just seemed like a convenient invalid value to check since a vttbr_x of 0 is invalid, but returning a negative error code would be as equally invalid. If this is the only issue it doesn't seem worth respinning the patch for, but I'll change it to -EINVAL if for some reason a v6 is needed. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Aug 19, 2014 at 09:23:57AM -0500, Joel Schopp wrote: > > On 08/19/2014 07:24 AM, Christoffer Dall wrote: > > On Mon, Aug 18, 2014 at 03:36:04PM -0500, Joel Schopp wrote: > >> The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current > >> systems. Rather than just add a bit it seems like a good time to also set > >> things at run-time instead of compile time to accomodate more hardware. > >> > >> This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime, > >> not compile time. > >> > >> In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address > >> size (VTCR_EL2.T0SZE) cannot be determined in compile time since they > >> depend on hardware capability. > >> > >> According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document, > >> vttbr_x is calculated using different fixed values with consideration > >> of T0SZ, granule size and the level of translation tables. Therefore, > >> vttbr_baddr_mask should be determined dynamically. > >> > >> Changes since v4: > >> More minor cleanups from review > >> Moved some functions into headers > >> > >> Changes since v3: > >> Another rebase > >> Addressed minor comments from v2 > >> > >> Changes since v2: > >> Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git next branch > >> > >> Changes since v1: > >> Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to > >> provide better long term fix. Updated that patch to log error instead of > >> silently fail on unaligned vttbr. > >> > >> Cc: Christoffer Dall <christoffer.dall@linaro.org> > >> Cc: Sungjinn Chung <sungjinn.chung@samsung.com> > >> Signed-off-by: Jungseok Lee <jays.lee@samsung.com> > >> Signed-off-by: Joel Schopp <joel.schopp@amd.com> > >> --- > >> arch/arm/include/asm/kvm_mmu.h | 12 ++++++ > >> arch/arm/kvm/arm.c | 17 +++++++- > >> arch/arm64/include/asm/kvm_arm.h | 17 +------- > >> arch/arm64/include/asm/kvm_mmu.h | 78 ++++++++++++++++++++++++++++++++++++++ > >> arch/arm64/kvm/hyp-init.S | 20 +++++++--- > >> 5 files changed, 122 insertions(+), 22 deletions(-) > >> > >> diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h > >> index 5c7aa3c..73f6ff6 100644 > >> --- a/arch/arm/include/asm/kvm_mmu.h > >> +++ b/arch/arm/include/asm/kvm_mmu.h > >> @@ -166,6 +166,18 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, > >> > >> void stage2_flush_vm(struct kvm *kvm); > >> > >> +static inline int kvm_get_phys_addr_shift(void) > >> +{ > >> + return KVM_PHYS_SHIFT; > >> +} > >> + > >> +static inline int set_vttbr_baddr_mask(void) > >> +{ > >> + vttbr_baddr_mask = VTTBR_BADDR_MASK; > >> + return 0; > >> +} > >> + > >> + > >> #endif /* !__ASSEMBLY__ */ > >> > >> #endif /* __ARM_KVM_MMU_H__ */ > >> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > >> index 3c82b37..f396eb7 100644 > >> --- a/arch/arm/kvm/arm.c > >> +++ b/arch/arm/kvm/arm.c > >> @@ -37,6 +37,7 @@ > >> #include <asm/mman.h> > >> #include <asm/tlbflush.h> > >> #include <asm/cacheflush.h> > >> +#include <asm/cputype.h> > >> #include <asm/virt.h> > >> #include <asm/kvm_arm.h> > >> #include <asm/kvm_asm.h> > >> @@ -466,8 +467,14 @@ static void update_vttbr(struct kvm *kvm) > >> /* update vttbr to be used with the new vmid */ > >> pgd_phys = virt_to_phys(kvm->arch.pgd); > >> vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; > >> - kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK; > >> - kvm->arch.vttbr |= vmid; > >> + > >> + /* > >> + * If the VTTBR isn't aligned there is something wrong with the system > >> + * or kernel. > >> + */ > >> + BUG_ON(pgd_phys & ~vttbr_baddr_mask); > >> + > >> + kvm->arch.vttbr = pgd_phys | vmid; > >> > >> spin_unlock(&kvm_vmid_lock); > >> } > >> @@ -1052,6 +1059,12 @@ int kvm_arch_init(void *opaque) > >> } > >> } > >> > >> + err = set_vttbr_baddr_mask(); > >> + if (err) { > >> + kvm_err("Cannot set vttbr_baddr_mask\n"); > >> + return -EINVAL; > >> + } > >> + > >> cpu_notifier_register_begin(); > >> > >> err = init_hyp_mode(); > >> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h > >> index 3d69030..8dbef70 100644 > >> --- a/arch/arm64/include/asm/kvm_arm.h > >> +++ b/arch/arm64/include/asm/kvm_arm.h > >> @@ -94,7 +94,6 @@ > >> /* TCR_EL2 Registers bits */ > >> #define TCR_EL2_TBI (1 << 20) > >> #define TCR_EL2_PS (7 << 16) > >> -#define TCR_EL2_PS_40B (2 << 16) > >> #define TCR_EL2_TG0 (1 << 14) > >> #define TCR_EL2_SH0 (3 << 12) > >> #define TCR_EL2_ORGN0 (3 << 10) > >> @@ -103,8 +102,6 @@ > >> #define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ > >> TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) > >> > >> -#define TCR_EL2_FLAGS (TCR_EL2_PS_40B) > >> - > >> /* VTCR_EL2 Registers bits */ > >> #define VTCR_EL2_PS_MASK (7 << 16) > >> #define VTCR_EL2_TG0_MASK (1 << 14) > >> @@ -119,36 +116,28 @@ > >> #define VTCR_EL2_SL0_MASK (3 << 6) > >> #define VTCR_EL2_SL0_LVL1 (1 << 6) > >> #define VTCR_EL2_T0SZ_MASK 0x3f > >> -#define VTCR_EL2_T0SZ_40B 24 > >> +#define VTCR_EL2_T0SZ(bits) (64 - (bits)) > >> > >> #ifdef CONFIG_ARM64_64K_PAGES > >> /* > >> * Stage2 translation configuration: > >> - * 40bits output (PS = 2) > >> - * 40bits input (T0SZ = 24) > >> * 64kB pages (TG0 = 1) > >> * 2 level page tables (SL = 1) > >> */ > >> #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ > >> VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ > >> - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) > >> -#define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) > >> + VTCR_EL2_SL0_LVL1) > >> #else > >> /* > >> * Stage2 translation configuration: > >> - * 40bits output (PS = 2) > >> - * 40bits input (T0SZ = 24) > >> * 4kB pages (TG0 = 0) > >> * 3 level page tables (SL = 1) > >> */ > >> #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \ > >> VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ > >> - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) > >> -#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) > >> + VTCR_EL2_SL0_LVL1) > >> #endif > >> > >> -#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) > >> -#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) > >> #define VTTBR_VMID_SHIFT (48LLU) > >> #define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) > >> > >> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h > >> index 7d29847..b6ae83b 100644 > >> --- a/arch/arm64/include/asm/kvm_mmu.h > >> +++ b/arch/arm64/include/asm/kvm_mmu.h > >> @@ -152,5 +152,83 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, > >> > >> void stage2_flush_vm(struct kvm *kvm); > >> > >> +/* > >> + * ARMv8 64K architecture limitations: > >> + * 16 <= T0SZ <= 21 is valid under 3 level of translation tables > >> + * 18 <= T0SZ <= 34 is valid under 2 level of translation tables > >> + * 31 <= T0SZ <= 39 is valid under 1 level of transltaion tables > >> + * > >> + * ARMv8 4K architecture limitations: > >> + * 16 <= T0SZ <= 24 is valid under 4 level of translation tables > >> + * 21 <= T0SZ <= 33 is valid under 3 level of translation tables > >> + * 30 <= T0SZ <= 39 is valid under 2 level of translation tables > >> + * > >> + * For 4K pages we only support 3 or 4 level, giving T0SZ a range of 16 to 33. > >> + * For 64K pages we only support 2 or 3 level, giving T0SZ a range of 16 to 34. > >> + * > >> + * See Table D4-23 and Table D4-25 in ARM DDI 0487A.b to figure out > >> + * the origin of the hardcoded values, 38 and 37. > >> + */ > >> + > >> +#ifdef CONFIG_ARM64_64K_PAGES > >> +static inline int t0sz_to_vttbr_x(int t0sz) > >> +{ > >> + if (t0sz < 16 || t0sz > 34) { > >> + kvm_err("Cannot support %d-bit address space\n", 64 - t0sz); > >> + return 0; > >> + } > >> + > >> + return 38 - t0sz; > >> +} > >> +#else /* 4K pages */ > >> +static inline int t0sz_to_vttbr_x(int t0sz) > >> +{ > >> + if (t0sz < 16 || t0sz > 33) { > >> + kvm_err("Cannot support %d-bit address space\n", 64 - t0sz); > >> + return 0; > > why not just return an error code directly here? These are now > > returning 0 on error which usually means everything was ok... > The return is a value,not just an error code. Because of this returning > an error overloads that value. 0 just seemed like a convenient invalid > value to check since a vttbr_x of 0 is invalid, but returning a negative > error code would be as equally invalid. If this is the only issue it > doesn't seem worth respinning the patch for, but I'll change it to > -EINVAL if for some reason a v6 is needed. Have you given up on doing the alignment check with the proper size on the pgd allocation for this patch? -Christoffer -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
>> The return is a value,not just an error code. Because of this returning >> an error overloads that value. 0 just seemed like a convenient invalid >> value to check since a vttbr_x of 0 is invalid, but returning a negative >> error code would be as equally invalid. If this is the only issue it >> doesn't seem worth respinning the patch for, but I'll change it to >> -EINVAL if for some reason a v6 is needed. > Have you given up on doing the alignment check with the proper size on > the pgd allocation for this patch? Yes, I'd rather leave the extra check out of this patch. If I were changing the pgd allocation code I would make sure to add a check, or if there were a static check there now I would update it for the dynamic value from the hardware, but it seems unrelated to add several checks to other parts of the code beyond those already in the patch. I did leave the functions in the headers such that checks like this could be added when someone is updating the code for other reasons, say 4 level page tables. -Joel -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Aug 19, 2014 at 09:49:07AM -0500, Joel Schopp wrote: > > >> The return is a value,not just an error code. Because of this returning > >> an error overloads that value. 0 just seemed like a convenient invalid > >> value to check since a vttbr_x of 0 is invalid, but returning a negative > >> error code would be as equally invalid. If this is the only issue it > >> doesn't seem worth respinning the patch for, but I'll change it to > >> -EINVAL if for some reason a v6 is needed. > > Have you given up on doing the alignment check with the proper size on > > the pgd allocation for this patch? > Yes, I'd rather leave the extra check out of this patch. If I were > changing the pgd allocation code I would make sure to add a check, or if > there were a static check there now I would update it for the dynamic > value from the hardware, but it seems unrelated to add several checks to > other parts of the code beyond those already in the patch. I did leave > the functions in the headers such that checks like this could be added > when someone is updating the code for other reasons, say 4 level page > tables. > hmmm, the point is that we need to ensure that we have a properly aligned allocated PGD, that's what this patch currently addresses, and as you pointed out, the BUG_ON() just before trying to run a VM is not the nicest solution - we should really be dealing with this properly at allocation time. But, if you don't have time to look at that, then ok, I'll have to pick it up myself. However, you are hinting that we do not support 4 levels of page tables, yet you do allow the t0sz_to_vttbr_x funciton to pass even when using t0sz values only supported under 4 levels of page tables.... What is the rationale for that? -Christoffer -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> hmmm, the point is that we need to ensure that we have a properly > aligned allocated PGD, that's what this patch currently addresses, and as > you pointed out, the BUG_ON() just before trying to run a VM is not the > nicest solution - we should really be dealing with this properly at > allocation time. > > But, if you don't have time to look at that, then ok, I'll have to pick > it up myself. > > However, you are hinting that we do not support 4 levels of page tables, > yet you do allow the t0sz_to_vttbr_x funciton to pass even when using > t0sz values only supported under 4 levels of page tables.... What is > the rationale for that? Minimizing merge conflicts. I figure both are going in within the next month or two. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Joel, On Mon, Aug 18 2014 at 9:36:04 pm BST, Joel Schopp <joel.schopp@amd.com> wrote: > The current VTTBR_BADDR_MASK only masks 39 bits, which is broken on current > systems. Rather than just add a bit it seems like a good time to also set > things at run-time instead of compile time to accomodate more hardware. > > This patch sets TCR_EL2.PS, VTCR_EL2.T0SZ and vttbr_baddr_mask in runtime, > not compile time. > > In ARMv8, EL2 physical address size (TCR_EL2.PS) and stage2 input address > size (VTCR_EL2.T0SZE) cannot be determined in compile time since they > depend on hardware capability. > > According to Table D4-23 and Table D4-25 in ARM DDI 0487A.b document, > vttbr_x is calculated using different fixed values with consideration > of T0SZ, granule size and the level of translation tables. Therefore, > vttbr_baddr_mask should be determined dynamically. > > Changes since v4: > More minor cleanups from review > Moved some functions into headers > > Changes since v3: > Another rebase > Addressed minor comments from v2 > > Changes since v2: > Rebased on https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git next branch > > Changes since v1: > Rebased fix on Jungseok Lee's patch https://lkml.org/lkml/2014/5/12/189 to > provide better long term fix. Updated that patch to log error instead of > silently fail on unaligned vttbr. > > Cc: Christoffer Dall <christoffer.dall@linaro.org> > Cc: Sungjinn Chung <sungjinn.chung@samsung.com> > Signed-off-by: Jungseok Lee <jays.lee@samsung.com> > Signed-off-by: Joel Schopp <joel.schopp@amd.com> > --- > arch/arm/include/asm/kvm_mmu.h | 12 ++++++ > arch/arm/kvm/arm.c | 17 +++++++- > arch/arm64/include/asm/kvm_arm.h | 17 +------- > arch/arm64/include/asm/kvm_mmu.h | 78 ++++++++++++++++++++++++++++++++++++++ > arch/arm64/kvm/hyp-init.S | 20 +++++++--- > 5 files changed, 122 insertions(+), 22 deletions(-) > > diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h > index 5c7aa3c..73f6ff6 100644 > --- a/arch/arm/include/asm/kvm_mmu.h > +++ b/arch/arm/include/asm/kvm_mmu.h > @@ -166,6 +166,18 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, > > void stage2_flush_vm(struct kvm *kvm); > > +static inline int kvm_get_phys_addr_shift(void) > +{ > + return KVM_PHYS_SHIFT; > +} > + > +static inline int set_vttbr_baddr_mask(void) > +{ > + vttbr_baddr_mask = VTTBR_BADDR_MASK; Have you tried compiling this? Apart from the obvious missing definition of the variable, I'm not fond of functions with side-effects hidden in an include file. What is wrong with just returning the mask and letting the common code setting it? > + return 0; > +} > + > + > #endif /* !__ASSEMBLY__ */ > > #endif /* __ARM_KVM_MMU_H__ */ > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > index 3c82b37..f396eb7 100644 > --- a/arch/arm/kvm/arm.c > +++ b/arch/arm/kvm/arm.c > @@ -37,6 +37,7 @@ > #include <asm/mman.h> > #include <asm/tlbflush.h> > #include <asm/cacheflush.h> > +#include <asm/cputype.h> > #include <asm/virt.h> > #include <asm/kvm_arm.h> > #include <asm/kvm_asm.h> > @@ -466,8 +467,14 @@ static void update_vttbr(struct kvm *kvm) > /* update vttbr to be used with the new vmid */ > pgd_phys = virt_to_phys(kvm->arch.pgd); > vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; > - kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK; > - kvm->arch.vttbr |= vmid; > + > + /* > + * If the VTTBR isn't aligned there is something wrong with the system > + * or kernel. > + */ > + BUG_ON(pgd_phys & ~vttbr_baddr_mask); > + > + kvm->arch.vttbr = pgd_phys | vmid; > > spin_unlock(&kvm_vmid_lock); > } > @@ -1052,6 +1059,12 @@ int kvm_arch_init(void *opaque) > } > } > > + err = set_vttbr_baddr_mask(); > + if (err) { > + kvm_err("Cannot set vttbr_baddr_mask\n"); > + return -EINVAL; > + } > + > cpu_notifier_register_begin(); > > err = init_hyp_mode(); > diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h > index 3d69030..8dbef70 100644 > --- a/arch/arm64/include/asm/kvm_arm.h > +++ b/arch/arm64/include/asm/kvm_arm.h > @@ -94,7 +94,6 @@ > /* TCR_EL2 Registers bits */ > #define TCR_EL2_TBI (1 << 20) > #define TCR_EL2_PS (7 << 16) > -#define TCR_EL2_PS_40B (2 << 16) > #define TCR_EL2_TG0 (1 << 14) > #define TCR_EL2_SH0 (3 << 12) > #define TCR_EL2_ORGN0 (3 << 10) > @@ -103,8 +102,6 @@ > #define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ > TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) > > -#define TCR_EL2_FLAGS (TCR_EL2_PS_40B) > - > /* VTCR_EL2 Registers bits */ > #define VTCR_EL2_PS_MASK (7 << 16) > #define VTCR_EL2_TG0_MASK (1 << 14) > @@ -119,36 +116,28 @@ > #define VTCR_EL2_SL0_MASK (3 << 6) > #define VTCR_EL2_SL0_LVL1 (1 << 6) > #define VTCR_EL2_T0SZ_MASK 0x3f > -#define VTCR_EL2_T0SZ_40B 24 > +#define VTCR_EL2_T0SZ(bits) (64 - (bits)) > > #ifdef CONFIG_ARM64_64K_PAGES > /* > * Stage2 translation configuration: > - * 40bits output (PS = 2) > - * 40bits input (T0SZ = 24) > * 64kB pages (TG0 = 1) > * 2 level page tables (SL = 1) > */ > #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ > VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ > - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) > -#define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) > + VTCR_EL2_SL0_LVL1) > #else > /* > * Stage2 translation configuration: > - * 40bits output (PS = 2) > - * 40bits input (T0SZ = 24) > * 4kB pages (TG0 = 0) > * 3 level page tables (SL = 1) > */ > #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \ > VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ > - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) > -#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) > + VTCR_EL2_SL0_LVL1) > #endif > > -#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) > -#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) > #define VTTBR_VMID_SHIFT (48LLU) > #define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) > > diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h > index 7d29847..b6ae83b 100644 > --- a/arch/arm64/include/asm/kvm_mmu.h > +++ b/arch/arm64/include/asm/kvm_mmu.h > @@ -152,5 +152,83 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, > > void stage2_flush_vm(struct kvm *kvm); > > +/* > + * ARMv8 64K architecture limitations: > + * 16 <= T0SZ <= 21 is valid under 3 level of translation tables > + * 18 <= T0SZ <= 34 is valid under 2 level of translation tables > + * 31 <= T0SZ <= 39 is valid under 1 level of transltaion tables > + * > + * ARMv8 4K architecture limitations: > + * 16 <= T0SZ <= 24 is valid under 4 level of translation tables > + * 21 <= T0SZ <= 33 is valid under 3 level of translation tables > + * 30 <= T0SZ <= 39 is valid under 2 level of translation tables > + * > + * For 4K pages we only support 3 or 4 level, giving T0SZ a range of 16 to 33. > + * For 64K pages we only support 2 or 3 level, giving T0SZ a range of 16 to 34. > + * > + * See Table D4-23 and Table D4-25 in ARM DDI 0487A.b to figure out > + * the origin of the hardcoded values, 38 and 37. > + */ > + > +#ifdef CONFIG_ARM64_64K_PAGES > +static inline int t0sz_to_vttbr_x(int t0sz) > +{ > + if (t0sz < 16 || t0sz > 34) { > + kvm_err("Cannot support %d-bit address space\n", 64 - t0sz); > + return 0; 0 is definitely a bad value for something that is an error case. Consider -EINVAL instead. Also, what if we're in a range that only deals with more levels of page tables than the kernel can deal with (remember we use the kernel page table accessors)? See the new ARM64_VA_BITS and ARM64_PGTABLE_LEVELS symbols that are now available, and use them to validate the range you have. > + } > + > + return 38 - t0sz; > +} > +#else /* 4K pages */ > +static inline int t0sz_to_vttbr_x(int t0sz) > +{ > + if (t0sz < 16 || t0sz > 33) { > + kvm_err("Cannot support %d-bit address space\n", 64 - t0sz); > + return 0; Same here. > + } > + return 37 - t0sz; > +} > +#endif > +static inline int kvm_get_phys_addr_shift(void) > +{ > + int pa_range = read_cpuid(ID_AA64MMFR0_EL1) & 0xf; > + > + switch (pa_range) { > + case 0: return 32; > + case 1: return 36; > + case 2: return 40; > + case 3: return 42; > + case 4: return 44; > + case 5: return 48; > + default: > + BUG(); > + return 0; > + } > +} > + > +static u64 vttbr_baddr_mask; Now every compilation unit that includes kvm_mmu.h has an instance of this variable. I doubt that it is the intended effect. > + > +/** > + * set_vttbr_baddr_mask - set mask value for vttbr base address > + * > + * In ARMv8, vttbr_baddr_mask cannot be determined in compile time since the > + * stage2 input address size depends on hardware capability. Thus, we first > + * need to read ID_AA64MMFR0_EL1.PARange and then set vttbr_baddr_mask with > + * consideration of both the granule size and the level of translation tables. > + */ > +static inline int set_vttbr_baddr_mask(void) > +{ > + int t0sz, vttbr_x; > + > + t0sz = VTCR_EL2_T0SZ(kvm_get_phys_addr_shift()); > + vttbr_x = t0sz_to_vttbr_x(t0sz); > + if (!vttbr_x) > + return -EINVAL; > + vttbr_baddr_mask = (((1LLU << (48 - vttbr_x)) - 1) << (vttbr_x - 1)); I think this can now be written as GENMASK_ULL(48, (vttbr_x - 1)). > + return 0; > +} > + > #endif /* __ASSEMBLY__ */ > #endif /* __ARM64_KVM_MMU_H__ */ > diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S > index d968796..c0f7634 100644 > --- a/arch/arm64/kvm/hyp-init.S > +++ b/arch/arm64/kvm/hyp-init.S > @@ -63,17 +63,21 @@ __do_hyp_init: > mrs x4, tcr_el1 > ldr x5, =TCR_EL2_MASK > and x4, x4, x5 > - ldr x5, =TCR_EL2_FLAGS > - orr x4, x4, x5 > - msr tcr_el2, x4 > - > - ldr x4, =VTCR_EL2_FLAGS > /* > * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in > - * VTCR_EL2. > + * TCR_EL2 and both PS bits and T0SZ bits in VTCR_EL2. > */ > mrs x5, ID_AA64MMFR0_EL1 > bfi x4, x5, #16, #3 > + msr tcr_el2, x4 > + > + ldr x4, =VTCR_EL2_FLAGS > + bfi x4, x5, #16, #3 > + and x5, x5, #0xf > + adr x6, t0sz > + add x6, x6, x5, lsl #2 > + ldr w5, [x6] > + orr x4, x4, x5 You'll need to validate the T0SZ value, and possibly adjust it so that it is compatible with the addressing capability of the kernel. That probably require a slight change of the hyp-init API. > msr vtcr_el2, x4 > > mrs x4, mair_el1 > @@ -109,6 +113,10 @@ target: /* We're now in the trampoline code, switch page tables */ > > /* Hello, World! */ > eret > + > +t0sz: > + .word VTCR_EL2_T0SZ(32), VTCR_EL2_T0SZ(36), VTCR_EL2_T0SZ(40) > + .word VTCR_EL2_T0SZ(42), VTCR_EL2_T0SZ(44), VTCR_EL2_T0SZ(48) > ENDPROC(__kvm_hyp_init) > > .ltorg > Another element that doesn't appear in this patch is that we need a way for the kernel to expose the maximum input address to userspace (and validate that noone puts memory outside of that range). This should be a separate patch, but it is conceptually tied to the same problem. Thanks, M.
>> diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h >> index 5c7aa3c..73f6ff6 100644 >> --- a/arch/arm/include/asm/kvm_mmu.h >> +++ b/arch/arm/include/asm/kvm_mmu.h >> @@ -166,6 +166,18 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, >> >> void stage2_flush_vm(struct kvm *kvm); >> >> +static inline int kvm_get_phys_addr_shift(void) >> +{ >> + return KVM_PHYS_SHIFT; >> +} >> + >> +static inline int set_vttbr_baddr_mask(void) >> +{ >> + vttbr_baddr_mask = VTTBR_BADDR_MASK; > Have you tried compiling this? > > Apart from the obvious missing definition of the variable, I'm not fond > of functions with side-effects hidden in an include file. What is wrong > with just returning the mask and letting the common code setting it? I like that change, will do in v6. >> +#ifdef CONFIG_ARM64_64K_PAGES >> +static inline int t0sz_to_vttbr_x(int t0sz) >> +{ >> + if (t0sz < 16 || t0sz > 34) { >> + kvm_err("Cannot support %d-bit address space\n", 64 - t0sz); >> + return 0; > 0 is definitely a bad value for something that is an error > case. Consider -EINVAL instead. OK. > > Also, what if we're in a range that only deals with more levels of page > tables than the kernel can deal with (remember we use the kernel page > table accessors)? See the new ARM64_VA_BITS and ARM64_PGTABLE_LEVELS > symbols that are now available, and use them to validate the range you > have. With the simple current tests I can look at them and see they are correct, even if I can't make a scenario to test that they would fail. However, if I add in more complicated checks I'd really like to test them catching the failure cases. Can you describe a case where we can boot a kernel and then have the checks still fail in kvm? > >> + } >> + return 37 - t0sz; >> +} >> +#endif >> +static inline int kvm_get_phys_addr_shift(void) >> +{ >> + int pa_range = read_cpuid(ID_AA64MMFR0_EL1) & 0xf; >> + >> + switch (pa_range) { >> + case 0: return 32; >> + case 1: return 36; >> + case 2: return 40; >> + case 3: return 42; >> + case 4: return 44; >> + case 5: return 48; >> + default: >> + BUG(); >> + return 0; >> + } >> +} >> + >> +static u64 vttbr_baddr_mask; > Now every compilation unit that includes kvm_mmu.h has an instance of > this variable. I doubt that it is the intended effect. The change for the comment farther above to just return the mask and have the common code set it should resolve this as well. > >> + >> +/** >> + * set_vttbr_baddr_mask - set mask value for vttbr base address >> + * >> + * In ARMv8, vttbr_baddr_mask cannot be determined in compile time since the >> + * stage2 input address size depends on hardware capability. Thus, we first >> + * need to read ID_AA64MMFR0_EL1.PARange and then set vttbr_baddr_mask with >> + * consideration of both the granule size and the level of translation tables. >> + */ >> +static inline int set_vttbr_baddr_mask(void) >> +{ >> + int t0sz, vttbr_x; >> + >> + t0sz = VTCR_EL2_T0SZ(kvm_get_phys_addr_shift()); >> + vttbr_x = t0sz_to_vttbr_x(t0sz); >> + if (!vttbr_x) >> + return -EINVAL; >> + vttbr_baddr_mask = (((1LLU << (48 - vttbr_x)) - 1) << (vttbr_x - 1)); > I think this can now be written as GENMASK_ULL(48, (vttbr_x - 1)). That does improve readability, I like it. > >> + return 0; >> +} >> + >> #endif /* __ASSEMBLY__ */ >> #endif /* __ARM64_KVM_MMU_H__ */ >> diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S >> index d968796..c0f7634 100644 >> --- a/arch/arm64/kvm/hyp-init.S >> +++ b/arch/arm64/kvm/hyp-init.S >> @@ -63,17 +63,21 @@ __do_hyp_init: >> mrs x4, tcr_el1 >> ldr x5, =TCR_EL2_MASK >> and x4, x4, x5 >> - ldr x5, =TCR_EL2_FLAGS >> - orr x4, x4, x5 >> - msr tcr_el2, x4 >> - >> - ldr x4, =VTCR_EL2_FLAGS >> /* >> * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in >> - * VTCR_EL2. >> + * TCR_EL2 and both PS bits and T0SZ bits in VTCR_EL2. >> */ >> mrs x5, ID_AA64MMFR0_EL1 >> bfi x4, x5, #16, #3 >> + msr tcr_el2, x4 >> + >> + ldr x4, =VTCR_EL2_FLAGS >> + bfi x4, x5, #16, #3 >> + and x5, x5, #0xf >> + adr x6, t0sz >> + add x6, x6, x5, lsl #2 >> + ldr w5, [x6] >> + orr x4, x4, x5 > You'll need to validate the T0SZ value, and possibly adjust it so that > it is compatible with the addressing capability of the kernel. That > probably require a slight change of the hyp-init API. In order to do that I really should test that path, can you think of a way to generate a t0sz value that is incompatible with the kernel, but the kernel still boots so I can load kvm and test it? > >> msr vtcr_el2, x4 >> >> mrs x4, mair_el1 >> @@ -109,6 +113,10 @@ target: /* We're now in the trampoline code, switch page tables */ >> >> /* Hello, World! */ >> eret >> + >> +t0sz: >> + .word VTCR_EL2_T0SZ(32), VTCR_EL2_T0SZ(36), VTCR_EL2_T0SZ(40) >> + .word VTCR_EL2_T0SZ(42), VTCR_EL2_T0SZ(44), VTCR_EL2_T0SZ(48) >> ENDPROC(__kvm_hyp_init) >> >> .ltorg >> > Another element that doesn't appear in this patch is that we need a way > for the kernel to expose the maximum input address to userspace (and > validate that noone puts memory outside of that range). This should be a > separate patch, but it is conceptually tied to the same problem. I do think that separate patch would be a nice addition and probably dependent on this, but I do think this patch is useful on its own. Especially since the current VTTBR_BADDR_MASK is broken. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Aug 26 2014 at 7:35:21 pm BST, Joel Schopp <joel.schopp@amd.com> wrote: >>> diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h >>> index 5c7aa3c..73f6ff6 100644 >>> --- a/arch/arm/include/asm/kvm_mmu.h >>> +++ b/arch/arm/include/asm/kvm_mmu.h >>> @@ -166,6 +166,18 @@ static inline void >>> coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, >>> >>> void stage2_flush_vm(struct kvm *kvm); >>> >>> +static inline int kvm_get_phys_addr_shift(void) >>> +{ >>> + return KVM_PHYS_SHIFT; >>> +} >>> + >>> +static inline int set_vttbr_baddr_mask(void) >>> +{ >>> + vttbr_baddr_mask = VTTBR_BADDR_MASK; >> Have you tried compiling this? >> >> Apart from the obvious missing definition of the variable, I'm not fond >> of functions with side-effects hidden in an include file. What is wrong >> with just returning the mask and letting the common code setting it? > I like that change, will do in v6. > >>> +#ifdef CONFIG_ARM64_64K_PAGES >>> +static inline int t0sz_to_vttbr_x(int t0sz) >>> +{ >>> + if (t0sz < 16 || t0sz > 34) { >>> + kvm_err("Cannot support %d-bit address space\n", 64 - t0sz); >>> + return 0; >> 0 is definitely a bad value for something that is an error >> case. Consider -EINVAL instead. > OK. >> >> Also, what if we're in a range that only deals with more levels of page >> tables than the kernel can deal with (remember we use the kernel page >> table accessors)? See the new ARM64_VA_BITS and ARM64_PGTABLE_LEVELS >> symbols that are now available, and use them to validate the range you >> have. > With the simple current tests I can look at them and see they are > correct, even if I can't make a scenario to test that they would fail. > However, if I add in more complicated checks I'd really like to test > them catching the failure cases. Can you describe a case where we can > boot a kernel and then have the checks still fail in kvm? You're looking at T0SZ values that are outside of what a given configuration of the kernel can handle (T0SZ == 16, for example, is only valid with 3 levels of page tables, and the current implementation only deals with 2). This is a case where things may explode, as you allow input addresses that we simply cannot support, and in this case you want to cap T0SZ to something sensible that is compatible with what the kernel (and thus KVM) can express in terms of translations. Testing is another thing, and I don't expect you to be able to test every possible combinaison. Nonetheless, I do expect some reasonable effort in the code to avoid deadly situations. >> >>> + } >>> + return 37 - t0sz; >>> +} >>> +#endif >>> +static inline int kvm_get_phys_addr_shift(void) >>> +{ >>> + int pa_range = read_cpuid(ID_AA64MMFR0_EL1) & 0xf; >>> + >>> + switch (pa_range) { >>> + case 0: return 32; >>> + case 1: return 36; >>> + case 2: return 40; >>> + case 3: return 42; >>> + case 4: return 44; >>> + case 5: return 48; >>> + default: >>> + BUG(); >>> + return 0; >>> + } >>> +} >>> + >>> +static u64 vttbr_baddr_mask; >> Now every compilation unit that includes kvm_mmu.h has an instance of >> this variable. I doubt that it is the intended effect. > The change for the comment farther above to just return the mask and > have the common code set it should resolve this as well. > >> >>> + >>> +/** >>> + * set_vttbr_baddr_mask - set mask value for vttbr base address >>> + * >>> + * In ARMv8, vttbr_baddr_mask cannot be determined in compile time since the >>> + * stage2 input address size depends on hardware capability. Thus, we first >>> + * need to read ID_AA64MMFR0_EL1.PARange and then set vttbr_baddr_mask with >>> + * consideration of both the granule size and the level of >>> translation tables. >>> + */ >>> +static inline int set_vttbr_baddr_mask(void) >>> +{ >>> + int t0sz, vttbr_x; >>> + >>> + t0sz = VTCR_EL2_T0SZ(kvm_get_phys_addr_shift()); >>> + vttbr_x = t0sz_to_vttbr_x(t0sz); >>> + if (!vttbr_x) >>> + return -EINVAL; >>> + vttbr_baddr_mask = (((1LLU << (48 - vttbr_x)) - 1) << (vttbr_x - 1)); >> I think this can now be written as GENMASK_ULL(48, (vttbr_x - 1)). > That does improve readability, I like it. > >> >>> + return 0; >>> +} >>> + >>> #endif /* __ASSEMBLY__ */ >>> #endif /* __ARM64_KVM_MMU_H__ */ >>> diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S >>> index d968796..c0f7634 100644 >>> --- a/arch/arm64/kvm/hyp-init.S >>> +++ b/arch/arm64/kvm/hyp-init.S >>> @@ -63,17 +63,21 @@ __do_hyp_init: >>> mrs x4, tcr_el1 >>> ldr x5, =TCR_EL2_MASK >>> and x4, x4, x5 >>> - ldr x5, =TCR_EL2_FLAGS >>> - orr x4, x4, x5 >>> - msr tcr_el2, x4 >>> - >>> - ldr x4, =VTCR_EL2_FLAGS >>> /* >>> * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in >>> - * VTCR_EL2. >>> + * TCR_EL2 and both PS bits and T0SZ bits in VTCR_EL2. >>> */ >>> mrs x5, ID_AA64MMFR0_EL1 >>> bfi x4, x5, #16, #3 >>> + msr tcr_el2, x4 >>> + >>> + ldr x4, =VTCR_EL2_FLAGS >>> + bfi x4, x5, #16, #3 >>> + and x5, x5, #0xf >>> + adr x6, t0sz >>> + add x6, x6, x5, lsl #2 >>> + ldr w5, [x6] >>> + orr x4, x4, x5 >> You'll need to validate the T0SZ value, and possibly adjust it so that >> it is compatible with the addressing capability of the kernel. That >> probably require a slight change of the hyp-init API. > In order to do that I really should test that path, can you think of a > way to generate a t0sz value that is incompatible with the kernel, but > the kernel still boots so I can load kvm and test it? > >> >>> msr vtcr_el2, x4 >>> >>> mrs x4, mair_el1 >>> @@ -109,6 +113,10 @@ target: /* We're now in the trampoline code, switch page tables */ >>> >>> /* Hello, World! */ >>> eret >>> + >>> +t0sz: >>> + .word VTCR_EL2_T0SZ(32), VTCR_EL2_T0SZ(36), VTCR_EL2_T0SZ(40) >>> + .word VTCR_EL2_T0SZ(42), VTCR_EL2_T0SZ(44), VTCR_EL2_T0SZ(48) >>> ENDPROC(__kvm_hyp_init) >>> >>> .ltorg >>> >> Another element that doesn't appear in this patch is that we need a way >> for the kernel to expose the maximum input address to userspace (and >> validate that noone puts memory outside of that range). This should be a >> separate patch, but it is conceptually tied to the same problem. > I do think that separate patch would be a nice addition and probably > dependent on this, but I do think this patch is useful on its own. > Especially since the current VTTBR_BADDR_MASK is broken. This patch is indeed useful on its own, but I want to see a complete solution, not a set of point fixes. Thanks, M.
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 5c7aa3c..73f6ff6 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -166,6 +166,18 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, void stage2_flush_vm(struct kvm *kvm); +static inline int kvm_get_phys_addr_shift(void) +{ + return KVM_PHYS_SHIFT; +} + +static inline int set_vttbr_baddr_mask(void) +{ + vttbr_baddr_mask = VTTBR_BADDR_MASK; + return 0; +} + + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 3c82b37..f396eb7 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -37,6 +37,7 @@ #include <asm/mman.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> +#include <asm/cputype.h> #include <asm/virt.h> #include <asm/kvm_arm.h> #include <asm/kvm_asm.h> @@ -466,8 +467,14 @@ static void update_vttbr(struct kvm *kvm) /* update vttbr to be used with the new vmid */ pgd_phys = virt_to_phys(kvm->arch.pgd); vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; - kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK; - kvm->arch.vttbr |= vmid; + + /* + * If the VTTBR isn't aligned there is something wrong with the system + * or kernel. + */ + BUG_ON(pgd_phys & ~vttbr_baddr_mask); + + kvm->arch.vttbr = pgd_phys | vmid; spin_unlock(&kvm_vmid_lock); } @@ -1052,6 +1059,12 @@ int kvm_arch_init(void *opaque) } } + err = set_vttbr_baddr_mask(); + if (err) { + kvm_err("Cannot set vttbr_baddr_mask\n"); + return -EINVAL; + } + cpu_notifier_register_begin(); err = init_hyp_mode(); diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 3d69030..8dbef70 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -94,7 +94,6 @@ /* TCR_EL2 Registers bits */ #define TCR_EL2_TBI (1 << 20) #define TCR_EL2_PS (7 << 16) -#define TCR_EL2_PS_40B (2 << 16) #define TCR_EL2_TG0 (1 << 14) #define TCR_EL2_SH0 (3 << 12) #define TCR_EL2_ORGN0 (3 << 10) @@ -103,8 +102,6 @@ #define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) -#define TCR_EL2_FLAGS (TCR_EL2_PS_40B) - /* VTCR_EL2 Registers bits */ #define VTCR_EL2_PS_MASK (7 << 16) #define VTCR_EL2_TG0_MASK (1 << 14) @@ -119,36 +116,28 @@ #define VTCR_EL2_SL0_MASK (3 << 6) #define VTCR_EL2_SL0_LVL1 (1 << 6) #define VTCR_EL2_T0SZ_MASK 0x3f -#define VTCR_EL2_T0SZ_40B 24 +#define VTCR_EL2_T0SZ(bits) (64 - (bits)) #ifdef CONFIG_ARM64_64K_PAGES /* * Stage2 translation configuration: - * 40bits output (PS = 2) - * 40bits input (T0SZ = 24) * 64kB pages (TG0 = 1) * 2 level page tables (SL = 1) */ #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) -#define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) + VTCR_EL2_SL0_LVL1) #else /* * Stage2 translation configuration: - * 40bits output (PS = 2) - * 40bits input (T0SZ = 24) * 4kB pages (TG0 = 0) * 3 level page tables (SL = 1) */ #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) -#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) + VTCR_EL2_SL0_LVL1) #endif -#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) #define VTTBR_VMID_SHIFT (48LLU) #define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 7d29847..b6ae83b 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -152,5 +152,83 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, void stage2_flush_vm(struct kvm *kvm); +/* + * ARMv8 64K architecture limitations: + * 16 <= T0SZ <= 21 is valid under 3 level of translation tables + * 18 <= T0SZ <= 34 is valid under 2 level of translation tables + * 31 <= T0SZ <= 39 is valid under 1 level of transltaion tables + * + * ARMv8 4K architecture limitations: + * 16 <= T0SZ <= 24 is valid under 4 level of translation tables + * 21 <= T0SZ <= 33 is valid under 3 level of translation tables + * 30 <= T0SZ <= 39 is valid under 2 level of translation tables + * + * For 4K pages we only support 3 or 4 level, giving T0SZ a range of 16 to 33. + * For 64K pages we only support 2 or 3 level, giving T0SZ a range of 16 to 34. + * + * See Table D4-23 and Table D4-25 in ARM DDI 0487A.b to figure out + * the origin of the hardcoded values, 38 and 37. + */ + +#ifdef CONFIG_ARM64_64K_PAGES +static inline int t0sz_to_vttbr_x(int t0sz) +{ + if (t0sz < 16 || t0sz > 34) { + kvm_err("Cannot support %d-bit address space\n", 64 - t0sz); + return 0; + } + + return 38 - t0sz; +} +#else /* 4K pages */ +static inline int t0sz_to_vttbr_x(int t0sz) +{ + if (t0sz < 16 || t0sz > 33) { + kvm_err("Cannot support %d-bit address space\n", 64 - t0sz); + return 0; + } + return 37 - t0sz; +} +#endif +static inline int kvm_get_phys_addr_shift(void) +{ + int pa_range = read_cpuid(ID_AA64MMFR0_EL1) & 0xf; + + switch (pa_range) { + case 0: return 32; + case 1: return 36; + case 2: return 40; + case 3: return 42; + case 4: return 44; + case 5: return 48; + default: + BUG(); + return 0; + } +} + +static u64 vttbr_baddr_mask; + +/** + * set_vttbr_baddr_mask - set mask value for vttbr base address + * + * In ARMv8, vttbr_baddr_mask cannot be determined in compile time since the + * stage2 input address size depends on hardware capability. Thus, we first + * need to read ID_AA64MMFR0_EL1.PARange and then set vttbr_baddr_mask with + * consideration of both the granule size and the level of translation tables. + */ +static inline int set_vttbr_baddr_mask(void) +{ + int t0sz, vttbr_x; + + t0sz = VTCR_EL2_T0SZ(kvm_get_phys_addr_shift()); + vttbr_x = t0sz_to_vttbr_x(t0sz); + if (!vttbr_x) + return -EINVAL; + vttbr_baddr_mask = (((1LLU << (48 - vttbr_x)) - 1) << (vttbr_x - 1)); + + return 0; +} + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index d968796..c0f7634 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -63,17 +63,21 @@ __do_hyp_init: mrs x4, tcr_el1 ldr x5, =TCR_EL2_MASK and x4, x4, x5 - ldr x5, =TCR_EL2_FLAGS - orr x4, x4, x5 - msr tcr_el2, x4 - - ldr x4, =VTCR_EL2_FLAGS /* * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in - * VTCR_EL2. + * TCR_EL2 and both PS bits and T0SZ bits in VTCR_EL2. */ mrs x5, ID_AA64MMFR0_EL1 bfi x4, x5, #16, #3 + msr tcr_el2, x4 + + ldr x4, =VTCR_EL2_FLAGS + bfi x4, x5, #16, #3 + and x5, x5, #0xf + adr x6, t0sz + add x6, x6, x5, lsl #2 + ldr w5, [x6] + orr x4, x4, x5 msr vtcr_el2, x4 mrs x4, mair_el1 @@ -109,6 +113,10 @@ target: /* We're now in the trampoline code, switch page tables */ /* Hello, World! */ eret + +t0sz: + .word VTCR_EL2_T0SZ(32), VTCR_EL2_T0SZ(36), VTCR_EL2_T0SZ(40) + .word VTCR_EL2_T0SZ(42), VTCR_EL2_T0SZ(44), VTCR_EL2_T0SZ(48) ENDPROC(__kvm_hyp_init) .ltorg