Message ID | 20230127112932.38045-10-steven.price@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: Support for Arm CCA in KVM | expand |
On Fri, 27 Jan 2023 11:29:13 +0000 Steven Price <steven.price@arm.com> wrote: > The RMM owns the stage 2 page tables for a realm, and KVM must request > that the RMM creates/destroys entries as necessary. The physical pages > to store the page tables are delegated to the realm as required, and can > be undelegated when no longer used. > This is only an introduction to RTT handling. While this patch is mostly like RTT teardown, better add more introduction to this patch. Also maybe refine the tittle to reflect what this patch is actually doing. > Signed-off-by: Steven Price <steven.price@arm.com> > --- > arch/arm64/include/asm/kvm_rme.h | 19 +++++ > arch/arm64/kvm/mmu.c | 7 +- > arch/arm64/kvm/rme.c | 139 +++++++++++++++++++++++++++++++ > 3 files changed, 162 insertions(+), 3 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h > index a6318af3ed11..eea5118dfa8a 100644 > --- a/arch/arm64/include/asm/kvm_rme.h > +++ b/arch/arm64/include/asm/kvm_rme.h > @@ -35,5 +35,24 @@ u32 kvm_realm_ipa_limit(void); > int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); > int kvm_init_realm_vm(struct kvm *kvm); > void kvm_destroy_realm(struct kvm *kvm); > +void kvm_realm_destroy_rtts(struct realm *realm, u32 ia_bits, u32 start_level); > + > +#define RME_RTT_BLOCK_LEVEL 2 > +#define RME_RTT_MAX_LEVEL 3 > + > +#define RME_PAGE_SHIFT 12 > +#define RME_PAGE_SIZE BIT(RME_PAGE_SHIFT) > +/* See ARM64_HW_PGTABLE_LEVEL_SHIFT() */ > +#define RME_RTT_LEVEL_SHIFT(l) \ > + ((RME_PAGE_SHIFT - 3) * (4 - (l)) + 3) > +#define RME_L2_BLOCK_SIZE BIT(RME_RTT_LEVEL_SHIFT(2)) > + > +static inline unsigned long rme_rtt_level_mapsize(int level) > +{ > + if (WARN_ON(level > RME_RTT_MAX_LEVEL)) > + return RME_PAGE_SIZE; > + > + return (1UL << RME_RTT_LEVEL_SHIFT(level)); > +} > > #endif > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > index 22c00274884a..f29558c5dcbc 100644 > --- a/arch/arm64/kvm/mmu.c > +++ b/arch/arm64/kvm/mmu.c > @@ -834,16 +834,17 @@ void stage2_unmap_vm(struct kvm *kvm) > void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) > { > struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); > - struct kvm_pgtable *pgt = NULL; > + struct kvm_pgtable *pgt; > > write_lock(&kvm->mmu_lock); > + pgt = mmu->pgt; > if (kvm_is_realm(kvm) && > kvm_realm_state(kvm) != REALM_STATE_DYING) { > - /* TODO: teardown rtts */ > write_unlock(&kvm->mmu_lock); > + kvm_realm_destroy_rtts(&kvm->arch.realm, pgt->ia_bits, > + pgt->start_level); > return; > } > - pgt = mmu->pgt; > if (pgt) { > mmu->pgd_phys = 0; > mmu->pgt = NULL; > diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c > index 0c9d70e4d9e6..f7b0e5a779f8 100644 > --- a/arch/arm64/kvm/rme.c > +++ b/arch/arm64/kvm/rme.c > @@ -73,6 +73,28 @@ static int rmi_check_version(void) > return 0; > } > > +static void realm_destroy_undelegate_range(struct realm *realm, > + unsigned long ipa, > + unsigned long addr, > + ssize_t size) > +{ > + unsigned long rd = virt_to_phys(realm->rd); > + int ret; > + > + while (size > 0) { > + ret = rmi_data_destroy(rd, ipa); > + WARN_ON(ret); > + ret = rmi_granule_undelegate(addr); > + As the return value is not documented, what will happen if a page undelegate failed? Leaked? Some explanation is required here. > + if (ret) > + get_page(phys_to_page(addr)); > + > + addr += PAGE_SIZE; > + ipa += PAGE_SIZE; > + size -= PAGE_SIZE; > + } > +} > + > static unsigned long create_realm_feat_reg0(struct kvm *kvm) > { > unsigned long ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr); > @@ -170,6 +192,123 @@ static int realm_create_rd(struct kvm *kvm) > return r; > } > > +static int realm_rtt_destroy(struct realm *realm, unsigned long addr, > + int level, phys_addr_t rtt_granule) > +{ > + addr = ALIGN_DOWN(addr, rme_rtt_level_mapsize(level - 1)); > + return rmi_rtt_destroy(rtt_granule, virt_to_phys(realm->rd), addr, > + level); > +} > + > +static int realm_destroy_free_rtt(struct realm *realm, unsigned long addr, > + int level, phys_addr_t rtt_granule) > +{ > + if (realm_rtt_destroy(realm, addr, level, rtt_granule)) > + return -ENXIO; > + if (!WARN_ON(rmi_granule_undelegate(rtt_granule))) > + put_page(phys_to_page(rtt_granule)); > + > + return 0; > +} > + > +static int realm_rtt_create(struct realm *realm, > + unsigned long addr, > + int level, > + phys_addr_t phys) > +{ > + addr = ALIGN_DOWN(addr, rme_rtt_level_mapsize(level - 1)); > + return rmi_rtt_create(phys, virt_to_phys(realm->rd), addr, level); > +} > + > +static int realm_tear_down_rtt_range(struct realm *realm, int level, > + unsigned long start, unsigned long end) > +{ > + phys_addr_t rd = virt_to_phys(realm->rd); > + ssize_t map_size = rme_rtt_level_mapsize(level); > + unsigned long addr, next_addr; > + bool failed = false; > + > + for (addr = start; addr < end; addr = next_addr) { > + phys_addr_t rtt_addr, tmp_rtt; > + struct rtt_entry rtt; > + unsigned long end_addr; > + > + next_addr = ALIGN(addr + 1, map_size); > + > + end_addr = min(next_addr, end); > + > + if (rmi_rtt_read_entry(rd, ALIGN_DOWN(addr, map_size), > + level, &rtt)) { > + failed = true; > + continue; > + } > + > + rtt_addr = rmi_rtt_get_phys(&rtt); > + WARN_ON(level != rtt.walk_level); > + > + switch (rtt.state) { > + case RMI_UNASSIGNED: > + case RMI_DESTROYED: > + break; > + case RMI_TABLE: > + if (realm_tear_down_rtt_range(realm, level + 1, > + addr, end_addr)) { > + failed = true; > + break; > + } > + if (IS_ALIGNED(addr, map_size) && > + next_addr <= end && > + realm_destroy_free_rtt(realm, addr, level + 1, > + rtt_addr)) > + failed = true; > + break; > + case RMI_ASSIGNED: > + WARN_ON(!rtt_addr); > + /* > + * If there is a block mapping, break it now, using the > + * spare_page. We are sure to have a valid delegated > + * page at spare_page before we enter here, otherwise > + * WARN once, which will be followed by further > + * warnings. > + */ > + tmp_rtt = realm->spare_page; > + if (level == 2 && > + !WARN_ON_ONCE(tmp_rtt == PHYS_ADDR_MAX) && > + realm_rtt_create(realm, addr, > + RME_RTT_MAX_LEVEL, tmp_rtt)) { > + WARN_ON(1); > + failed = true; > + break; > + } > + realm_destroy_undelegate_range(realm, addr, > + rtt_addr, map_size); > + /* > + * Collapse the last level table and make the spare page > + * reusable again. > + */ > + if (level == 2 && > + realm_rtt_destroy(realm, addr, RME_RTT_MAX_LEVEL, > + tmp_rtt)) > + failed = true; > + break; > + case RMI_VALID_NS: > + WARN_ON(rmi_rtt_unmap_unprotected(rd, addr, level)); > + break; > + default: > + WARN_ON(1); > + failed = true; > + break; > + } > + } > + > + return failed ? -EINVAL : 0; > +} > + > +void kvm_realm_destroy_rtts(struct realm *realm, u32 ia_bits, u32 start_level) > +{ > + realm_tear_down_rtt_range(realm, start_level, 0, (1UL << ia_bits)); > +} > + > /* Protects access to rme_vmid_bitmap */ > static DEFINE_SPINLOCK(rme_vmid_lock); > static unsigned long *rme_vmid_bitmap;
On 13/02/2023 17:44, Zhi Wang wrote: > On Fri, 27 Jan 2023 11:29:13 +0000 > Steven Price <steven.price@arm.com> wrote: > >> The RMM owns the stage 2 page tables for a realm, and KVM must request >> that the RMM creates/destroys entries as necessary. The physical pages >> to store the page tables are delegated to the realm as required, and can >> be undelegated when no longer used. >> > > This is only an introduction to RTT handling. While this patch is mostly like > RTT teardown, better add more introduction to this patch. Also maybe refine > the tittle to reflect what this patch is actually doing. You've a definite point that this patch is mostly about RTT teardown. Technically it also adds the RTT creation path (realm_rtt_create) - hence the generic patch title. But I'll definitely expand the commit message to mention the complexity of tear down which is the bulk of the patch. >> Signed-off-by: Steven Price <steven.price@arm.com> >> --- >> arch/arm64/include/asm/kvm_rme.h | 19 +++++ >> arch/arm64/kvm/mmu.c | 7 +- >> arch/arm64/kvm/rme.c | 139 +++++++++++++++++++++++++++++++ >> 3 files changed, 162 insertions(+), 3 deletions(-) >> >> diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h >> index a6318af3ed11..eea5118dfa8a 100644 >> --- a/arch/arm64/include/asm/kvm_rme.h >> +++ b/arch/arm64/include/asm/kvm_rme.h >> @@ -35,5 +35,24 @@ u32 kvm_realm_ipa_limit(void); >> int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); >> int kvm_init_realm_vm(struct kvm *kvm); >> void kvm_destroy_realm(struct kvm *kvm); >> +void kvm_realm_destroy_rtts(struct realm *realm, u32 ia_bits, u32 start_level); >> + >> +#define RME_RTT_BLOCK_LEVEL 2 >> +#define RME_RTT_MAX_LEVEL 3 >> + >> +#define RME_PAGE_SHIFT 12 >> +#define RME_PAGE_SIZE BIT(RME_PAGE_SHIFT) >> +/* See ARM64_HW_PGTABLE_LEVEL_SHIFT() */ >> +#define RME_RTT_LEVEL_SHIFT(l) \ >> + ((RME_PAGE_SHIFT - 3) * (4 - (l)) + 3) >> +#define RME_L2_BLOCK_SIZE BIT(RME_RTT_LEVEL_SHIFT(2)) >> + >> +static inline unsigned long rme_rtt_level_mapsize(int level) >> +{ >> + if (WARN_ON(level > RME_RTT_MAX_LEVEL)) >> + return RME_PAGE_SIZE; >> + >> + return (1UL << RME_RTT_LEVEL_SHIFT(level)); >> +} >> >> #endif >> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c >> index 22c00274884a..f29558c5dcbc 100644 >> --- a/arch/arm64/kvm/mmu.c >> +++ b/arch/arm64/kvm/mmu.c >> @@ -834,16 +834,17 @@ void stage2_unmap_vm(struct kvm *kvm) >> void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) >> { >> struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); >> - struct kvm_pgtable *pgt = NULL; >> + struct kvm_pgtable *pgt; >> >> write_lock(&kvm->mmu_lock); >> + pgt = mmu->pgt; >> if (kvm_is_realm(kvm) && >> kvm_realm_state(kvm) != REALM_STATE_DYING) { >> - /* TODO: teardown rtts */ >> write_unlock(&kvm->mmu_lock); >> + kvm_realm_destroy_rtts(&kvm->arch.realm, pgt->ia_bits, >> + pgt->start_level); >> return; >> } >> - pgt = mmu->pgt; >> if (pgt) { >> mmu->pgd_phys = 0; >> mmu->pgt = NULL; >> diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c >> index 0c9d70e4d9e6..f7b0e5a779f8 100644 >> --- a/arch/arm64/kvm/rme.c >> +++ b/arch/arm64/kvm/rme.c >> @@ -73,6 +73,28 @@ static int rmi_check_version(void) >> return 0; >> } >> >> +static void realm_destroy_undelegate_range(struct realm *realm, >> + unsigned long ipa, >> + unsigned long addr, >> + ssize_t size) >> +{ >> + unsigned long rd = virt_to_phys(realm->rd); >> + int ret; >> + >> + while (size > 0) { >> + ret = rmi_data_destroy(rd, ipa); >> + WARN_ON(ret); >> + ret = rmi_granule_undelegate(addr); >> + > As the return value is not documented, what will happen if a page undelegate > failed? Leaked? Some explanation is required here. Yes - it's leaked. I'll add a comment to explain the get_page() call. Thanks, Steve >> + if (ret) >> + get_page(phys_to_page(addr)); >> + >> + addr += PAGE_SIZE; >> + ipa += PAGE_SIZE; >> + size -= PAGE_SIZE; >> + } >> +} >> + >> static unsigned long create_realm_feat_reg0(struct kvm *kvm) >> { >> unsigned long ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr); >> @@ -170,6 +192,123 @@ static int realm_create_rd(struct kvm *kvm) >> return r; >> } >> >> +static int realm_rtt_destroy(struct realm *realm, unsigned long addr, >> + int level, phys_addr_t rtt_granule) >> +{ >> + addr = ALIGN_DOWN(addr, rme_rtt_level_mapsize(level - 1)); >> + return rmi_rtt_destroy(rtt_granule, virt_to_phys(realm->rd), addr, >> + level); >> +} >> + >> +static int realm_destroy_free_rtt(struct realm *realm, unsigned long addr, >> + int level, phys_addr_t rtt_granule) >> +{ >> + if (realm_rtt_destroy(realm, addr, level, rtt_granule)) >> + return -ENXIO; >> + if (!WARN_ON(rmi_granule_undelegate(rtt_granule))) >> + put_page(phys_to_page(rtt_granule)); >> + >> + return 0; >> +} >> + >> +static int realm_rtt_create(struct realm *realm, >> + unsigned long addr, >> + int level, >> + phys_addr_t phys) >> +{ >> + addr = ALIGN_DOWN(addr, rme_rtt_level_mapsize(level - 1)); >> + return rmi_rtt_create(phys, virt_to_phys(realm->rd), addr, level); >> +} >> + >> +static int realm_tear_down_rtt_range(struct realm *realm, int level, >> + unsigned long start, unsigned long end) >> +{ >> + phys_addr_t rd = virt_to_phys(realm->rd); >> + ssize_t map_size = rme_rtt_level_mapsize(level); >> + unsigned long addr, next_addr; >> + bool failed = false; >> + >> + for (addr = start; addr < end; addr = next_addr) { >> + phys_addr_t rtt_addr, tmp_rtt; >> + struct rtt_entry rtt; >> + unsigned long end_addr; >> + >> + next_addr = ALIGN(addr + 1, map_size); >> + >> + end_addr = min(next_addr, end); >> + >> + if (rmi_rtt_read_entry(rd, ALIGN_DOWN(addr, map_size), >> + level, &rtt)) { >> + failed = true; >> + continue; >> + } >> + >> + rtt_addr = rmi_rtt_get_phys(&rtt); >> + WARN_ON(level != rtt.walk_level); >> + >> + switch (rtt.state) { >> + case RMI_UNASSIGNED: >> + case RMI_DESTROYED: >> + break; >> + case RMI_TABLE: >> + if (realm_tear_down_rtt_range(realm, level + 1, >> + addr, end_addr)) { >> + failed = true; >> + break; >> + } >> + if (IS_ALIGNED(addr, map_size) && >> + next_addr <= end && >> + realm_destroy_free_rtt(realm, addr, level + 1, >> + rtt_addr)) >> + failed = true; >> + break; >> + case RMI_ASSIGNED: >> + WARN_ON(!rtt_addr); >> + /* >> + * If there is a block mapping, break it now, using the >> + * spare_page. We are sure to have a valid delegated >> + * page at spare_page before we enter here, otherwise >> + * WARN once, which will be followed by further >> + * warnings. >> + */ >> + tmp_rtt = realm->spare_page; >> + if (level == 2 && >> + !WARN_ON_ONCE(tmp_rtt == PHYS_ADDR_MAX) && >> + realm_rtt_create(realm, addr, >> + RME_RTT_MAX_LEVEL, tmp_rtt)) { >> + WARN_ON(1); >> + failed = true; >> + break; >> + } >> + realm_destroy_undelegate_range(realm, addr, >> + rtt_addr, map_size); >> + /* >> + * Collapse the last level table and make the spare page >> + * reusable again. >> + */ >> + if (level == 2 && >> + realm_rtt_destroy(realm, addr, RME_RTT_MAX_LEVEL, >> + tmp_rtt)) >> + failed = true; >> + break; >> + case RMI_VALID_NS: >> + WARN_ON(rmi_rtt_unmap_unprotected(rd, addr, level)); >> + break; >> + default: >> + WARN_ON(1); >> + failed = true; >> + break; >> + } >> + } >> + >> + return failed ? -EINVAL : 0; >> +} >> + >> +void kvm_realm_destroy_rtts(struct realm *realm, u32 ia_bits, u32 start_level) >> +{ >> + realm_tear_down_rtt_range(realm, start_level, 0, (1UL << ia_bits)); >> +} >> + >> /* Protects access to rme_vmid_bitmap */ >> static DEFINE_SPINLOCK(rme_vmid_lock); >> static unsigned long *rme_vmid_bitmap; >
On Fri, 3 Mar 2023 14:04:56 +0000 Steven Price <steven.price@arm.com> wrote: > On 13/02/2023 17:44, Zhi Wang wrote: > > On Fri, 27 Jan 2023 11:29:13 +0000 > > Steven Price <steven.price@arm.com> wrote: > > > >> The RMM owns the stage 2 page tables for a realm, and KVM must request > >> that the RMM creates/destroys entries as necessary. The physical pages > >> to store the page tables are delegated to the realm as required, and can > >> be undelegated when no longer used. > >> > > > > This is only an introduction to RTT handling. While this patch is mostly like > > RTT teardown, better add more introduction to this patch. Also maybe refine > > the tittle to reflect what this patch is actually doing. > > You've a definite point that this patch is mostly about RTT teardown. > Technically it also adds the RTT creation path (realm_rtt_create) - > hence the generic patch title. > But realm_rtt_create() seem only used in realm_tear_down_rtt_range(). That makes me wonder where is the real RTT creation path. > But I'll definitely expand the commit message to mention the complexity > of tear down which is the bulk of the patch. It is also a good place to explain more about the RTT. > > >> Signed-off-by: Steven Price <steven.price@arm.com> > >> --- > >> arch/arm64/include/asm/kvm_rme.h | 19 +++++ > >> arch/arm64/kvm/mmu.c | 7 +- > >> arch/arm64/kvm/rme.c | 139 +++++++++++++++++++++++++++++++ > >> 3 files changed, 162 insertions(+), 3 deletions(-) > >> > >> diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h > >> index a6318af3ed11..eea5118dfa8a 100644 > >> --- a/arch/arm64/include/asm/kvm_rme.h > >> +++ b/arch/arm64/include/asm/kvm_rme.h > >> @@ -35,5 +35,24 @@ u32 kvm_realm_ipa_limit(void); > >> int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); > >> int kvm_init_realm_vm(struct kvm *kvm); > >> void kvm_destroy_realm(struct kvm *kvm); > >> +void kvm_realm_destroy_rtts(struct realm *realm, u32 ia_bits, u32 start_level); > >> + > >> +#define RME_RTT_BLOCK_LEVEL 2 > >> +#define RME_RTT_MAX_LEVEL 3 > >> + > >> +#define RME_PAGE_SHIFT 12 > >> +#define RME_PAGE_SIZE BIT(RME_PAGE_SHIFT) > >> +/* See ARM64_HW_PGTABLE_LEVEL_SHIFT() */ > >> +#define RME_RTT_LEVEL_SHIFT(l) \ > >> + ((RME_PAGE_SHIFT - 3) * (4 - (l)) + 3) > >> +#define RME_L2_BLOCK_SIZE BIT(RME_RTT_LEVEL_SHIFT(2)) > >> + > >> +static inline unsigned long rme_rtt_level_mapsize(int level) > >> +{ > >> + if (WARN_ON(level > RME_RTT_MAX_LEVEL)) > >> + return RME_PAGE_SIZE; > >> + > >> + return (1UL << RME_RTT_LEVEL_SHIFT(level)); > >> +} > >> > >> #endif > >> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > >> index 22c00274884a..f29558c5dcbc 100644 > >> --- a/arch/arm64/kvm/mmu.c > >> +++ b/arch/arm64/kvm/mmu.c > >> @@ -834,16 +834,17 @@ void stage2_unmap_vm(struct kvm *kvm) > >> void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) > >> { > >> struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); > >> - struct kvm_pgtable *pgt = NULL; > >> + struct kvm_pgtable *pgt; > >> > >> write_lock(&kvm->mmu_lock); > >> + pgt = mmu->pgt; > >> if (kvm_is_realm(kvm) && > >> kvm_realm_state(kvm) != REALM_STATE_DYING) { > >> - /* TODO: teardown rtts */ > >> write_unlock(&kvm->mmu_lock); > >> + kvm_realm_destroy_rtts(&kvm->arch.realm, pgt->ia_bits, > >> + pgt->start_level); > >> return; > >> } > >> - pgt = mmu->pgt; > >> if (pgt) { > >> mmu->pgd_phys = 0; > >> mmu->pgt = NULL; > >> diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c > >> index 0c9d70e4d9e6..f7b0e5a779f8 100644 > >> --- a/arch/arm64/kvm/rme.c > >> +++ b/arch/arm64/kvm/rme.c > >> @@ -73,6 +73,28 @@ static int rmi_check_version(void) > >> return 0; > >> } > >> > >> +static void realm_destroy_undelegate_range(struct realm *realm, > >> + unsigned long ipa, > >> + unsigned long addr, > >> + ssize_t size) > >> +{ > >> + unsigned long rd = virt_to_phys(realm->rd); > >> + int ret; > >> + > >> + while (size > 0) { > >> + ret = rmi_data_destroy(rd, ipa); > >> + WARN_ON(ret); > >> + ret = rmi_granule_undelegate(addr); > >> + > > As the return value is not documented, what will happen if a page undelegate > > failed? Leaked? Some explanation is required here. > > Yes - it's leaked. I'll add a comment to explain the get_page() call. > > Thanks, > > Steve > > >> + if (ret) > >> + get_page(phys_to_page(addr)); > >> + > >> + addr += PAGE_SIZE; > >> + ipa += PAGE_SIZE; > >> + size -= PAGE_SIZE; > >> + } > >> +} > >> + > >> static unsigned long create_realm_feat_reg0(struct kvm *kvm) > >> { > >> unsigned long ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr); > >> @@ -170,6 +192,123 @@ static int realm_create_rd(struct kvm *kvm) > >> return r; > >> } > >> > >> +static int realm_rtt_destroy(struct realm *realm, unsigned long addr, > >> + int level, phys_addr_t rtt_granule) > >> +{ > >> + addr = ALIGN_DOWN(addr, rme_rtt_level_mapsize(level - 1)); > >> + return rmi_rtt_destroy(rtt_granule, virt_to_phys(realm->rd), addr, > >> + level); > >> +} > >> + > >> +static int realm_destroy_free_rtt(struct realm *realm, unsigned long addr, > >> + int level, phys_addr_t rtt_granule) > >> +{ > >> + if (realm_rtt_destroy(realm, addr, level, rtt_granule)) > >> + return -ENXIO; > >> + if (!WARN_ON(rmi_granule_undelegate(rtt_granule))) > >> + put_page(phys_to_page(rtt_granule)); > >> + > >> + return 0; > >> +} > >> + > >> +static int realm_rtt_create(struct realm *realm, > >> + unsigned long addr, > >> + int level, > >> + phys_addr_t phys) > >> +{ > >> + addr = ALIGN_DOWN(addr, rme_rtt_level_mapsize(level - 1)); > >> + return rmi_rtt_create(phys, virt_to_phys(realm->rd), addr, level); > >> +} > >> + > >> +static int realm_tear_down_rtt_range(struct realm *realm, int level, > >> + unsigned long start, unsigned long end) > >> +{ > >> + phys_addr_t rd = virt_to_phys(realm->rd); > >> + ssize_t map_size = rme_rtt_level_mapsize(level); > >> + unsigned long addr, next_addr; > >> + bool failed = false; > >> + > >> + for (addr = start; addr < end; addr = next_addr) { > >> + phys_addr_t rtt_addr, tmp_rtt; > >> + struct rtt_entry rtt; > >> + unsigned long end_addr; > >> + > >> + next_addr = ALIGN(addr + 1, map_size); > >> + > >> + end_addr = min(next_addr, end); > >> + > >> + if (rmi_rtt_read_entry(rd, ALIGN_DOWN(addr, map_size), > >> + level, &rtt)) { > >> + failed = true; > >> + continue; > >> + } > >> + > >> + rtt_addr = rmi_rtt_get_phys(&rtt); > >> + WARN_ON(level != rtt.walk_level); > >> + > >> + switch (rtt.state) { > >> + case RMI_UNASSIGNED: > >> + case RMI_DESTROYED: > >> + break; > >> + case RMI_TABLE: > >> + if (realm_tear_down_rtt_range(realm, level + 1, > >> + addr, end_addr)) { > >> + failed = true; > >> + break; > >> + } > >> + if (IS_ALIGNED(addr, map_size) && > >> + next_addr <= end && > >> + realm_destroy_free_rtt(realm, addr, level + 1, > >> + rtt_addr)) > >> + failed = true; > >> + break; > >> + case RMI_ASSIGNED: > >> + WARN_ON(!rtt_addr); > >> + /* > >> + * If there is a block mapping, break it now, using the > >> + * spare_page. We are sure to have a valid delegated > >> + * page at spare_page before we enter here, otherwise > >> + * WARN once, which will be followed by further > >> + * warnings. > >> + */ > >> + tmp_rtt = realm->spare_page; > >> + if (level == 2 && > >> + !WARN_ON_ONCE(tmp_rtt == PHYS_ADDR_MAX) && > >> + realm_rtt_create(realm, addr, > >> + RME_RTT_MAX_LEVEL, tmp_rtt)) { > >> + WARN_ON(1); > >> + failed = true; > >> + break; > >> + } > >> + realm_destroy_undelegate_range(realm, addr, > >> + rtt_addr, map_size); > >> + /* > >> + * Collapse the last level table and make the spare page > >> + * reusable again. > >> + */ > >> + if (level == 2 && > >> + realm_rtt_destroy(realm, addr, RME_RTT_MAX_LEVEL, > >> + tmp_rtt)) > >> + failed = true; > >> + break; > >> + case RMI_VALID_NS: > >> + WARN_ON(rmi_rtt_unmap_unprotected(rd, addr, level)); > >> + break; > >> + default: > >> + WARN_ON(1); > >> + failed = true; > >> + break; > >> + } > >> + } > >> + > >> + return failed ? -EINVAL : 0; > >> +} > >> + > >> +void kvm_realm_destroy_rtts(struct realm *realm, u32 ia_bits, u32 start_level) > >> +{ > >> + realm_tear_down_rtt_range(realm, start_level, 0, (1UL << ia_bits)); > >> +} > >> + > >> /* Protects access to rme_vmid_bitmap */ > >> static DEFINE_SPINLOCK(rme_vmid_lock); > >> static unsigned long *rme_vmid_bitmap; > > >
On 27-01-2023 04:59 pm, Steven Price wrote: > The RMM owns the stage 2 page tables for a realm, and KVM must request > that the RMM creates/destroys entries as necessary. The physical pages > to store the page tables are delegated to the realm as required, and can > be undelegated when no longer used. > > Signed-off-by: Steven Price <steven.price@arm.com> > --- > arch/arm64/include/asm/kvm_rme.h | 19 +++++ > arch/arm64/kvm/mmu.c | 7 +- > arch/arm64/kvm/rme.c | 139 +++++++++++++++++++++++++++++++ > 3 files changed, 162 insertions(+), 3 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h > index a6318af3ed11..eea5118dfa8a 100644 > --- a/arch/arm64/include/asm/kvm_rme.h > +++ b/arch/arm64/include/asm/kvm_rme.h > @@ -35,5 +35,24 @@ u32 kvm_realm_ipa_limit(void); > int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); > int kvm_init_realm_vm(struct kvm *kvm); > void kvm_destroy_realm(struct kvm *kvm); > +void kvm_realm_destroy_rtts(struct realm *realm, u32 ia_bits, u32 start_level); > + > +#define RME_RTT_BLOCK_LEVEL 2 > +#define RME_RTT_MAX_LEVEL 3 > + > +#define RME_PAGE_SHIFT 12 > +#define RME_PAGE_SIZE BIT(RME_PAGE_SHIFT) Can we use PAGE_SIZE and PAGE_SHIFT instead of redefining? May be we can use them to define RME_PAGE_SIZE and RME_PAGE_SHIFT. > +/* See ARM64_HW_PGTABLE_LEVEL_SHIFT() */ > +#define RME_RTT_LEVEL_SHIFT(l) \ > + ((RME_PAGE_SHIFT - 3) * (4 - (l)) + 3) Instead of defining again, can we define to ARM64_HW_PGTABLE_LEVEL_SHIFT? > +#define RME_L2_BLOCK_SIZE BIT(RME_RTT_LEVEL_SHIFT(2)) > + > +static inline unsigned long rme_rtt_level_mapsize(int level) > +{ > + if (WARN_ON(level > RME_RTT_MAX_LEVEL)) > + return RME_PAGE_SIZE; > + > + return (1UL << RME_RTT_LEVEL_SHIFT(level)); > +} > > #endif > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > index 22c00274884a..f29558c5dcbc 100644 > --- a/arch/arm64/kvm/mmu.c > +++ b/arch/arm64/kvm/mmu.c > @@ -834,16 +834,17 @@ void stage2_unmap_vm(struct kvm *kvm) > void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) > { > struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); > - struct kvm_pgtable *pgt = NULL; > + struct kvm_pgtable *pgt; > > write_lock(&kvm->mmu_lock); > + pgt = mmu->pgt; > if (kvm_is_realm(kvm) && > kvm_realm_state(kvm) != REALM_STATE_DYING) { > - /* TODO: teardown rtts */ > write_unlock(&kvm->mmu_lock); > + kvm_realm_destroy_rtts(&kvm->arch.realm, pgt->ia_bits, > + pgt->start_level); > return; > } > - pgt = mmu->pgt; > if (pgt) { > mmu->pgd_phys = 0; > mmu->pgt = NULL; > diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c > index 0c9d70e4d9e6..f7b0e5a779f8 100644 > --- a/arch/arm64/kvm/rme.c > +++ b/arch/arm64/kvm/rme.c > @@ -73,6 +73,28 @@ static int rmi_check_version(void) > return 0; > } > > +static void realm_destroy_undelegate_range(struct realm *realm, > + unsigned long ipa, > + unsigned long addr, > + ssize_t size) > +{ > + unsigned long rd = virt_to_phys(realm->rd); > + int ret; > + > + while (size > 0) { > + ret = rmi_data_destroy(rd, ipa); > + WARN_ON(ret); > + ret = rmi_granule_undelegate(addr); > + > + if (ret) > + get_page(phys_to_page(addr)); > + > + addr += PAGE_SIZE; > + ipa += PAGE_SIZE; > + size -= PAGE_SIZE; > + } > +} > + > static unsigned long create_realm_feat_reg0(struct kvm *kvm) > { > unsigned long ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr); > @@ -170,6 +192,123 @@ static int realm_create_rd(struct kvm *kvm) > return r; > } > > +static int realm_rtt_destroy(struct realm *realm, unsigned long addr, > + int level, phys_addr_t rtt_granule) > +{ > + addr = ALIGN_DOWN(addr, rme_rtt_level_mapsize(level - 1)); > + return rmi_rtt_destroy(rtt_granule, virt_to_phys(realm->rd), addr, > + level); > +} > + > +static int realm_destroy_free_rtt(struct realm *realm, unsigned long addr, > + int level, phys_addr_t rtt_granule) > +{ > + if (realm_rtt_destroy(realm, addr, level, rtt_granule)) > + return -ENXIO; > + if (!WARN_ON(rmi_granule_undelegate(rtt_granule))) > + put_page(phys_to_page(rtt_granule)); > + > + return 0; > +} > + > +static int realm_rtt_create(struct realm *realm, > + unsigned long addr, > + int level, > + phys_addr_t phys) > +{ > + addr = ALIGN_DOWN(addr, rme_rtt_level_mapsize(level - 1)); > + return rmi_rtt_create(phys, virt_to_phys(realm->rd), addr, level); > +} > + > +static int realm_tear_down_rtt_range(struct realm *realm, int level, > + unsigned long start, unsigned long end) > +{ > + phys_addr_t rd = virt_to_phys(realm->rd); > + ssize_t map_size = rme_rtt_level_mapsize(level); > + unsigned long addr, next_addr; > + bool failed = false; > + > + for (addr = start; addr < end; addr = next_addr) { > + phys_addr_t rtt_addr, tmp_rtt; > + struct rtt_entry rtt; > + unsigned long end_addr; > + > + next_addr = ALIGN(addr + 1, map_size); > + > + end_addr = min(next_addr, end); > + > + if (rmi_rtt_read_entry(rd, ALIGN_DOWN(addr, map_size), > + level, &rtt)) { > + failed = true; > + continue; > + } > + > + rtt_addr = rmi_rtt_get_phys(&rtt); > + WARN_ON(level != rtt.walk_level); > + > + switch (rtt.state) { > + case RMI_UNASSIGNED: > + case RMI_DESTROYED: > + break; > + case RMI_TABLE: > + if (realm_tear_down_rtt_range(realm, level + 1, > + addr, end_addr)) { > + failed = true; > + break; > + } > + if (IS_ALIGNED(addr, map_size) && > + next_addr <= end && > + realm_destroy_free_rtt(realm, addr, level + 1, > + rtt_addr)) > + failed = true; > + break; > + case RMI_ASSIGNED: > + WARN_ON(!rtt_addr); > + /* > + * If there is a block mapping, break it now, using the > + * spare_page. We are sure to have a valid delegated > + * page at spare_page before we enter here, otherwise > + * WARN once, which will be followed by further > + * warnings. > + */ > + tmp_rtt = realm->spare_page; > + if (level == 2 && > + !WARN_ON_ONCE(tmp_rtt == PHYS_ADDR_MAX) && > + realm_rtt_create(realm, addr, > + RME_RTT_MAX_LEVEL, tmp_rtt)) { > + WARN_ON(1); > + failed = true; > + break; > + } > + realm_destroy_undelegate_range(realm, addr, > + rtt_addr, map_size); > + /* > + * Collapse the last level table and make the spare page > + * reusable again. > + */ > + if (level == 2 && > + realm_rtt_destroy(realm, addr, RME_RTT_MAX_LEVEL, > + tmp_rtt)) > + failed = true; > + break; > + case RMI_VALID_NS: > + WARN_ON(rmi_rtt_unmap_unprotected(rd, addr, level)); > + break; > + default: > + WARN_ON(1); > + failed = true; > + break; > + } > + } > + > + return failed ? -EINVAL : 0; > +} > + > +void kvm_realm_destroy_rtts(struct realm *realm, u32 ia_bits, u32 start_level) > +{ > + realm_tear_down_rtt_range(realm, start_level, 0, (1UL << ia_bits)); > +} > + > /* Protects access to rme_vmid_bitmap */ > static DEFINE_SPINLOCK(rme_vmid_lock); > static unsigned long *rme_vmid_bitmap; Thanks, Ganapat
On 18/03/2024 11:01, Ganapatrao Kulkarni wrote: > > On 27-01-2023 04:59 pm, Steven Price wrote: >> The RMM owns the stage 2 page tables for a realm, and KVM must request >> that the RMM creates/destroys entries as necessary. The physical pages >> to store the page tables are delegated to the realm as required, and can >> be undelegated when no longer used. >> >> Signed-off-by: Steven Price <steven.price@arm.com> >> --- >> arch/arm64/include/asm/kvm_rme.h | 19 +++++ >> arch/arm64/kvm/mmu.c | 7 +- >> arch/arm64/kvm/rme.c | 139 +++++++++++++++++++++++++++++++ >> 3 files changed, 162 insertions(+), 3 deletions(-) >> >> diff --git a/arch/arm64/include/asm/kvm_rme.h >> b/arch/arm64/include/asm/kvm_rme.h >> index a6318af3ed11..eea5118dfa8a 100644 >> --- a/arch/arm64/include/asm/kvm_rme.h >> +++ b/arch/arm64/include/asm/kvm_rme.h >> @@ -35,5 +35,24 @@ u32 kvm_realm_ipa_limit(void); >> int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); >> int kvm_init_realm_vm(struct kvm *kvm); >> void kvm_destroy_realm(struct kvm *kvm); >> +void kvm_realm_destroy_rtts(struct realm *realm, u32 ia_bits, u32 >> start_level); >> + >> +#define RME_RTT_BLOCK_LEVEL 2 >> +#define RME_RTT_MAX_LEVEL 3 >> + >> +#define RME_PAGE_SHIFT 12 >> +#define RME_PAGE_SIZE BIT(RME_PAGE_SHIFT) > > Can we use PAGE_SIZE and PAGE_SHIFT instead of redefining? > May be we can use them to define RME_PAGE_SIZE and RME_PAGE_SHIFT. At the moment the code only supports the host page size matching the RMM's. But I want to leave open the possibility for the host size being larger than the RMM's. In this case PAGE_SHIFT/PAGE_SIZE will not equal RME_PAGE_SIZE and RME_PAGE_SHIFT. The host will have to create multiple RMM RTTs for each host page. >> +/* See ARM64_HW_PGTABLE_LEVEL_SHIFT() */ >> +#define RME_RTT_LEVEL_SHIFT(l) \ >> + ((RME_PAGE_SHIFT - 3) * (4 - (l)) + 3) > > Instead of defining again, can we define to > ARM64_HW_PGTABLE_LEVEL_SHIFT? Same as above - ARM64_HW_PGTABLE_LEVEL_SHIFT uses PAGE_SHIFT, but we want the same calculation using RME_PAGE_SHIFT which might be different. Thanks, Steve
diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h index a6318af3ed11..eea5118dfa8a 100644 --- a/arch/arm64/include/asm/kvm_rme.h +++ b/arch/arm64/include/asm/kvm_rme.h @@ -35,5 +35,24 @@ u32 kvm_realm_ipa_limit(void); int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); int kvm_init_realm_vm(struct kvm *kvm); void kvm_destroy_realm(struct kvm *kvm); +void kvm_realm_destroy_rtts(struct realm *realm, u32 ia_bits, u32 start_level); + +#define RME_RTT_BLOCK_LEVEL 2 +#define RME_RTT_MAX_LEVEL 3 + +#define RME_PAGE_SHIFT 12 +#define RME_PAGE_SIZE BIT(RME_PAGE_SHIFT) +/* See ARM64_HW_PGTABLE_LEVEL_SHIFT() */ +#define RME_RTT_LEVEL_SHIFT(l) \ + ((RME_PAGE_SHIFT - 3) * (4 - (l)) + 3) +#define RME_L2_BLOCK_SIZE BIT(RME_RTT_LEVEL_SHIFT(2)) + +static inline unsigned long rme_rtt_level_mapsize(int level) +{ + if (WARN_ON(level > RME_RTT_MAX_LEVEL)) + return RME_PAGE_SIZE; + + return (1UL << RME_RTT_LEVEL_SHIFT(level)); +} #endif diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 22c00274884a..f29558c5dcbc 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -834,16 +834,17 @@ void stage2_unmap_vm(struct kvm *kvm) void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) { struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); - struct kvm_pgtable *pgt = NULL; + struct kvm_pgtable *pgt; write_lock(&kvm->mmu_lock); + pgt = mmu->pgt; if (kvm_is_realm(kvm) && kvm_realm_state(kvm) != REALM_STATE_DYING) { - /* TODO: teardown rtts */ write_unlock(&kvm->mmu_lock); + kvm_realm_destroy_rtts(&kvm->arch.realm, pgt->ia_bits, + pgt->start_level); return; } - pgt = mmu->pgt; if (pgt) { mmu->pgd_phys = 0; mmu->pgt = NULL; diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index 0c9d70e4d9e6..f7b0e5a779f8 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -73,6 +73,28 @@ static int rmi_check_version(void) return 0; } +static void realm_destroy_undelegate_range(struct realm *realm, + unsigned long ipa, + unsigned long addr, + ssize_t size) +{ + unsigned long rd = virt_to_phys(realm->rd); + int ret; + + while (size > 0) { + ret = rmi_data_destroy(rd, ipa); + WARN_ON(ret); + ret = rmi_granule_undelegate(addr); + + if (ret) + get_page(phys_to_page(addr)); + + addr += PAGE_SIZE; + ipa += PAGE_SIZE; + size -= PAGE_SIZE; + } +} + static unsigned long create_realm_feat_reg0(struct kvm *kvm) { unsigned long ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr); @@ -170,6 +192,123 @@ static int realm_create_rd(struct kvm *kvm) return r; } +static int realm_rtt_destroy(struct realm *realm, unsigned long addr, + int level, phys_addr_t rtt_granule) +{ + addr = ALIGN_DOWN(addr, rme_rtt_level_mapsize(level - 1)); + return rmi_rtt_destroy(rtt_granule, virt_to_phys(realm->rd), addr, + level); +} + +static int realm_destroy_free_rtt(struct realm *realm, unsigned long addr, + int level, phys_addr_t rtt_granule) +{ + if (realm_rtt_destroy(realm, addr, level, rtt_granule)) + return -ENXIO; + if (!WARN_ON(rmi_granule_undelegate(rtt_granule))) + put_page(phys_to_page(rtt_granule)); + + return 0; +} + +static int realm_rtt_create(struct realm *realm, + unsigned long addr, + int level, + phys_addr_t phys) +{ + addr = ALIGN_DOWN(addr, rme_rtt_level_mapsize(level - 1)); + return rmi_rtt_create(phys, virt_to_phys(realm->rd), addr, level); +} + +static int realm_tear_down_rtt_range(struct realm *realm, int level, + unsigned long start, unsigned long end) +{ + phys_addr_t rd = virt_to_phys(realm->rd); + ssize_t map_size = rme_rtt_level_mapsize(level); + unsigned long addr, next_addr; + bool failed = false; + + for (addr = start; addr < end; addr = next_addr) { + phys_addr_t rtt_addr, tmp_rtt; + struct rtt_entry rtt; + unsigned long end_addr; + + next_addr = ALIGN(addr + 1, map_size); + + end_addr = min(next_addr, end); + + if (rmi_rtt_read_entry(rd, ALIGN_DOWN(addr, map_size), + level, &rtt)) { + failed = true; + continue; + } + + rtt_addr = rmi_rtt_get_phys(&rtt); + WARN_ON(level != rtt.walk_level); + + switch (rtt.state) { + case RMI_UNASSIGNED: + case RMI_DESTROYED: + break; + case RMI_TABLE: + if (realm_tear_down_rtt_range(realm, level + 1, + addr, end_addr)) { + failed = true; + break; + } + if (IS_ALIGNED(addr, map_size) && + next_addr <= end && + realm_destroy_free_rtt(realm, addr, level + 1, + rtt_addr)) + failed = true; + break; + case RMI_ASSIGNED: + WARN_ON(!rtt_addr); + /* + * If there is a block mapping, break it now, using the + * spare_page. We are sure to have a valid delegated + * page at spare_page before we enter here, otherwise + * WARN once, which will be followed by further + * warnings. + */ + tmp_rtt = realm->spare_page; + if (level == 2 && + !WARN_ON_ONCE(tmp_rtt == PHYS_ADDR_MAX) && + realm_rtt_create(realm, addr, + RME_RTT_MAX_LEVEL, tmp_rtt)) { + WARN_ON(1); + failed = true; + break; + } + realm_destroy_undelegate_range(realm, addr, + rtt_addr, map_size); + /* + * Collapse the last level table and make the spare page + * reusable again. + */ + if (level == 2 && + realm_rtt_destroy(realm, addr, RME_RTT_MAX_LEVEL, + tmp_rtt)) + failed = true; + break; + case RMI_VALID_NS: + WARN_ON(rmi_rtt_unmap_unprotected(rd, addr, level)); + break; + default: + WARN_ON(1); + failed = true; + break; + } + } + + return failed ? -EINVAL : 0; +} + +void kvm_realm_destroy_rtts(struct realm *realm, u32 ia_bits, u32 start_level) +{ + realm_tear_down_rtt_range(realm, start_level, 0, (1UL << ia_bits)); +} + /* Protects access to rme_vmid_bitmap */ static DEFINE_SPINLOCK(rme_vmid_lock); static unsigned long *rme_vmid_bitmap;
The RMM owns the stage 2 page tables for a realm, and KVM must request that the RMM creates/destroys entries as necessary. The physical pages to store the page tables are delegated to the realm as required, and can be undelegated when no longer used. Signed-off-by: Steven Price <steven.price@arm.com> --- arch/arm64/include/asm/kvm_rme.h | 19 +++++ arch/arm64/kvm/mmu.c | 7 +- arch/arm64/kvm/rme.c | 139 +++++++++++++++++++++++++++++++ 3 files changed, 162 insertions(+), 3 deletions(-)