Message ID | 20230127112932.38045-7-steven.price@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: Support for Arm CCA in KVM | expand |
On Fri, Jan 27, 2023 at 11:29:10AM +0000, Steven Price wrote: > +static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap) > +{ > + struct kvm_cap_arm_rme_config_item cfg; > + struct realm *realm = &kvm->arch.realm; > + int r = 0; > + > + if (kvm_realm_state(kvm) != REALM_STATE_NONE) > + return -EBUSY; This should also check kvm_is_realm() (otherwise we dereference a NULL realm). I was wondering about fuzzing the API to find more of this kind of issue, but don't know anything about it. Is there a recommended way to fuzz KVM? Thanks, Jean
On 07/02/2023 12:25, Jean-Philippe Brucker wrote: > On Fri, Jan 27, 2023 at 11:29:10AM +0000, Steven Price wrote: >> +static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap) >> +{ >> + struct kvm_cap_arm_rme_config_item cfg; >> + struct realm *realm = &kvm->arch.realm; >> + int r = 0; >> + >> + if (kvm_realm_state(kvm) != REALM_STATE_NONE) >> + return -EBUSY; > > This should also check kvm_is_realm() (otherwise we dereference a NULL > realm). Correct, I think this should be done way up in the stack at : kvm_vm_ioctl_enable_cap() for KVM_CAP_ARM_RME. > > I was wondering about fuzzing the API to find more of this kind of issue, > but don't know anything about it. Is there a recommended way to fuzz KVM? Not sure either. kselftests is one possible way to drive these test at least for unit-testing the new ABIs. This is something we plan to add. Thanks for catching this. Suzuki > Thanks, > Jean >
On Fri, 27 Jan 2023 11:29:10 +0000 Steven Price <steven.price@arm.com> wrote: > Add the KVM_CAP_ARM_RME_CREATE_FD ioctl to create a realm. This involves > delegating pages to the RMM to hold the Realm Descriptor (RD) and for > the base level of the Realm Translation Tables (RTT). A VMID also need > to be picked, since the RMM has a separate VMID address space a > dedicated allocator is added for this purpose. > > KVM_CAP_ARM_RME_CONFIG_REALM is provided to allow configuring the realm > before it is created. > > Signed-off-by: Steven Price <steven.price@arm.com> > --- > arch/arm64/include/asm/kvm_rme.h | 14 ++ > arch/arm64/kvm/arm.c | 19 ++ > arch/arm64/kvm/mmu.c | 6 + > arch/arm64/kvm/reset.c | 33 +++ > arch/arm64/kvm/rme.c | 357 +++++++++++++++++++++++++++++++ > 5 files changed, 429 insertions(+) > > diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h > index c26bc2c6770d..055a22accc08 100644 > --- a/arch/arm64/include/asm/kvm_rme.h > +++ b/arch/arm64/include/asm/kvm_rme.h > @@ -6,6 +6,8 @@ > #ifndef __ASM_KVM_RME_H > #define __ASM_KVM_RME_H > > +#include <uapi/linux/kvm.h> > + > enum realm_state { > REALM_STATE_NONE, > REALM_STATE_NEW, > @@ -15,8 +17,20 @@ enum realm_state { > > struct realm { > enum realm_state state; > + > + void *rd; > + struct realm_params *params; > + > + unsigned long num_aux; > + unsigned int vmid; > + unsigned int ia_bits; > }; > Maybe more comments for this structure? > int kvm_init_rme(void); > +u32 kvm_realm_ipa_limit(void); > + > +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); > +int kvm_init_realm_vm(struct kvm *kvm); > +void kvm_destroy_realm(struct kvm *kvm); > > #endif > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c > index d97b39d042ab..50f54a63732a 100644 > --- a/arch/arm64/kvm/arm.c > +++ b/arch/arm64/kvm/arm.c > @@ -103,6 +103,13 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, > r = 0; > set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags); > break; > + case KVM_CAP_ARM_RME: > + if (!static_branch_unlikely(&kvm_rme_is_available)) > + return -EINVAL; > + mutex_lock(&kvm->lock); > + r = kvm_realm_enable_cap(kvm, cap); > + mutex_unlock(&kvm->lock); > + break; > default: > r = -EINVAL; > break; > @@ -172,6 +179,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) > */ > kvm->arch.dfr0_pmuver.imp = kvm_arm_pmu_get_pmuver_limit(); > > + /* Initialise the realm bits after the generic bits are enabled */ > + if (kvm_is_realm(kvm)) { > + ret = kvm_init_realm_vm(kvm); > + if (ret) > + goto err_free_cpumask; > + } > + > return 0; > > err_free_cpumask: > @@ -204,6 +218,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) > kvm_destroy_vcpus(kvm); > > kvm_unshare_hyp(kvm, kvm + 1); > + > + kvm_destroy_realm(kvm); > } > > int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > @@ -300,6 +316,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_ARM_PTRAUTH_GENERIC: > r = system_has_full_ptr_auth(); > break; > + case KVM_CAP_ARM_RME: > + r = static_key_enabled(&kvm_rme_is_available); > + break; > default: > r = 0; > } > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > index 31d7fa4c7c14..d0f707767d05 100644 > --- a/arch/arm64/kvm/mmu.c > +++ b/arch/arm64/kvm/mmu.c > @@ -840,6 +840,12 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) > struct kvm_pgtable *pgt = NULL; > > write_lock(&kvm->mmu_lock); > + if (kvm_is_realm(kvm) && > + kvm_realm_state(kvm) != REALM_STATE_DYING) { > + /* TODO: teardown rtts */ > + write_unlock(&kvm->mmu_lock); > + return; > + } > pgt = mmu->pgt; > if (pgt) { > mmu->pgd_phys = 0; > diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c > index e0267f672b8a..c165df174737 100644 > --- a/arch/arm64/kvm/reset.c > +++ b/arch/arm64/kvm/reset.c > @@ -395,3 +395,36 @@ int kvm_set_ipa_limit(void) > > return 0; > } > + > +int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) > +{ > + u64 mmfr0, mmfr1; > + u32 phys_shift; > + u32 ipa_limit = kvm_ipa_limit; > + > + if (kvm_is_realm(kvm)) > + ipa_limit = kvm_realm_ipa_limit(); > + > + if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) > + return -EINVAL; > + > + phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); > + if (phys_shift) { > + if (phys_shift > ipa_limit || > + phys_shift < ARM64_MIN_PARANGE_BITS) > + return -EINVAL; > + } else { > + phys_shift = KVM_PHYS_SHIFT; > + if (phys_shift > ipa_limit) { > + pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n", > + current->comm); > + return -EINVAL; > + } > + } > + > + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); > + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); > + kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); > + > + return 0; > +} > diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c > index f6b587bc116e..9f8c5a91b8fc 100644 > --- a/arch/arm64/kvm/rme.c > +++ b/arch/arm64/kvm/rme.c > @@ -5,9 +5,49 @@ > > #include <linux/kvm_host.h> > > +#include <asm/kvm_emulate.h> > +#include <asm/kvm_mmu.h> > #include <asm/rmi_cmds.h> > #include <asm/virt.h> > > +/************ FIXME: Copied from kvm/hyp/pgtable.c **********/ > +#include <asm/kvm_pgtable.h> > + > +struct kvm_pgtable_walk_data { > + struct kvm_pgtable *pgt; > + struct kvm_pgtable_walker *walker; > + > + u64 addr; > + u64 end; > +}; > + > +static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) > +{ > + u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */ > + u64 mask = BIT(pgt->ia_bits) - 1; > + > + return (addr & mask) >> shift; > +} > + > +static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) > +{ > + struct kvm_pgtable pgt = { > + .ia_bits = ia_bits, > + .start_level = start_level, > + }; > + > + return __kvm_pgd_page_idx(&pgt, -1ULL) + 1; > +} > + > +/******************/ > + > +static unsigned long rmm_feat_reg0; > + > +static bool rme_supports(unsigned long feature) > +{ > + return !!u64_get_bits(rmm_feat_reg0, feature); > +} > + > static int rmi_check_version(void) > { > struct arm_smccc_res res; > @@ -33,8 +73,319 @@ static int rmi_check_version(void) > return 0; > } > > +static unsigned long create_realm_feat_reg0(struct kvm *kvm) > +{ > + unsigned long ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr); > + u64 feat_reg0 = 0; > + > + int num_bps = u64_get_bits(rmm_feat_reg0, > + RMI_FEATURE_REGISTER_0_NUM_BPS); > + int num_wps = u64_get_bits(rmm_feat_reg0, > + RMI_FEATURE_REGISTER_0_NUM_WPS); > + > + feat_reg0 |= u64_encode_bits(ia_bits, RMI_FEATURE_REGISTER_0_S2SZ); > + feat_reg0 |= u64_encode_bits(num_bps, RMI_FEATURE_REGISTER_0_NUM_BPS); > + feat_reg0 |= u64_encode_bits(num_wps, RMI_FEATURE_REGISTER_0_NUM_WPS); > + > + return feat_reg0; > +} > + > +u32 kvm_realm_ipa_limit(void) > +{ > + return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_S2SZ); > +} > + > +static u32 get_start_level(struct kvm *kvm) > +{ > + long sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, kvm->arch.vtcr); > + > + return VTCR_EL2_TGRAN_SL0_BASE - sl0; > +} > + > +static int realm_create_rd(struct kvm *kvm) > +{ > + struct realm *realm = &kvm->arch.realm; > + struct realm_params *params = realm->params; > + void *rd = NULL; > + phys_addr_t rd_phys, params_phys; > + struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; > + unsigned int pgd_sz; > + int i, r; > + > + if (WARN_ON(realm->rd) || WARN_ON(!realm->params)) > + return -EEXIST; > + > + rd = (void *)__get_free_page(GFP_KERNEL); > + if (!rd) > + return -ENOMEM; > + > + rd_phys = virt_to_phys(rd); > + if (rmi_granule_delegate(rd_phys)) { > + r = -ENXIO; > + goto out; > + } > + > + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level); > + for (i = 0; i < pgd_sz; i++) { > + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE; > + > + if (rmi_granule_delegate(pgd_phys)) { > + r = -ENXIO; > + goto out_undelegate_tables; > + } > + } > + > + params->rtt_level_start = get_start_level(kvm); > + params->rtt_num_start = pgd_sz; > + params->rtt_base = kvm->arch.mmu.pgd_phys; > + params->vmid = realm->vmid; > + > + params_phys = virt_to_phys(params); > + > + if (rmi_realm_create(rd_phys, params_phys)) { > + r = -ENXIO; > + goto out_undelegate_tables; > + } > + > + realm->rd = rd; > + realm->ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr); > + > + if (WARN_ON(rmi_rec_aux_count(rd_phys, &realm->num_aux))) { > + WARN_ON(rmi_realm_destroy(rd_phys)); > + goto out_undelegate_tables; > + } > + > + return 0; > + > +out_undelegate_tables: > + while (--i >= 0) { > + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE; > + > + WARN_ON(rmi_granule_undelegate(pgd_phys)); > + } > + WARN_ON(rmi_granule_undelegate(rd_phys)); > +out: > + free_page((unsigned long)rd); > + return r; > +} > + Just curious. Wouldn't it be better to use IDR as this is ID allocation? There were some efforts to change the use of bitmap allocation to IDR before. > +/* Protects access to rme_vmid_bitmap */ > +static DEFINE_SPINLOCK(rme_vmid_lock); > +static unsigned long *rme_vmid_bitmap; > + > +static int rme_vmid_init(void) > +{ > + unsigned int vmid_count = 1 << kvm_get_vmid_bits(); > + > + rme_vmid_bitmap = bitmap_zalloc(vmid_count, GFP_KERNEL); > + if (!rme_vmid_bitmap) { > + kvm_err("%s: Couldn't allocate rme vmid bitmap\n", __func__); > + return -ENOMEM; > + } > + > + return 0; > +} > + > +static int rme_vmid_reserve(void) > +{ > + int ret; > + unsigned int vmid_count = 1 << kvm_get_vmid_bits(); > + > + spin_lock(&rme_vmid_lock); > + ret = bitmap_find_free_region(rme_vmid_bitmap, vmid_count, 0); > + spin_unlock(&rme_vmid_lock); > + > + return ret; > +} > + > +static void rme_vmid_release(unsigned int vmid) > +{ > + spin_lock(&rme_vmid_lock); > + bitmap_release_region(rme_vmid_bitmap, vmid, 0); > + spin_unlock(&rme_vmid_lock); > +} > + > +static int kvm_create_realm(struct kvm *kvm) > +{ > + struct realm *realm = &kvm->arch.realm; > + int ret; > + > + if (!kvm_is_realm(kvm) || kvm_realm_state(kvm) != REALM_STATE_NONE) > + return -EEXIST; > + > + ret = rme_vmid_reserve(); > + if (ret < 0) > + return ret; > + realm->vmid = ret; > + > + ret = realm_create_rd(kvm); > + if (ret) { > + rme_vmid_release(realm->vmid); > + return ret; > + } > + > + WRITE_ONCE(realm->state, REALM_STATE_NEW); > + > + /* The realm is up, free the parameters. */ > + free_page((unsigned long)realm->params); > + realm->params = NULL; > + > + return 0; > +} > + > +static int config_realm_hash_algo(struct realm *realm, > + struct kvm_cap_arm_rme_config_item *cfg) > +{ > + switch (cfg->hash_algo) { > + case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA256: > + if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_256)) > + return -EINVAL; > + break; > + case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA512: > + if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_512)) > + return -EINVAL; > + break; > + default: > + return -EINVAL; > + } > + realm->params->measurement_algo = cfg->hash_algo; > + return 0; > +} > + > +static int config_realm_sve(struct realm *realm, > + struct kvm_cap_arm_rme_config_item *cfg) > +{ > + u64 features_0 = realm->params->features_0; > + int max_sve_vq = u64_get_bits(rmm_feat_reg0, > + RMI_FEATURE_REGISTER_0_SVE_VL); > + > + if (!rme_supports(RMI_FEATURE_REGISTER_0_SVE_EN)) > + return -EINVAL; > + > + if (cfg->sve_vq > max_sve_vq) > + return -EINVAL; > + > + features_0 &= ~(RMI_FEATURE_REGISTER_0_SVE_EN | > + RMI_FEATURE_REGISTER_0_SVE_VL); > + features_0 |= u64_encode_bits(1, RMI_FEATURE_REGISTER_0_SVE_EN); > + features_0 |= u64_encode_bits(cfg->sve_vq, > + RMI_FEATURE_REGISTER_0_SVE_VL); > + > + realm->params->features_0 = features_0; > + return 0; > +} > + > +static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap) > +{ > + struct kvm_cap_arm_rme_config_item cfg; > + struct realm *realm = &kvm->arch.realm; > + int r = 0; > + > + if (kvm_realm_state(kvm) != REALM_STATE_NONE) > + return -EBUSY; > + > + if (copy_from_user(&cfg, (void __user *)cap->args[1], sizeof(cfg))) > + return -EFAULT; > + > + switch (cfg.cfg) { > + case KVM_CAP_ARM_RME_CFG_RPV: > + memcpy(&realm->params->rpv, &cfg.rpv, sizeof(cfg.rpv)); > + break; > + case KVM_CAP_ARM_RME_CFG_HASH_ALGO: > + r = config_realm_hash_algo(realm, &cfg); > + break; > + case KVM_CAP_ARM_RME_CFG_SVE: > + r = config_realm_sve(realm, &cfg); > + break; > + default: > + r = -EINVAL; > + } > + > + return r; > +} > + > +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) > +{ > + int r = 0; > + > + switch (cap->args[0]) { > + case KVM_CAP_ARM_RME_CONFIG_REALM: > + r = kvm_rme_config_realm(kvm, cap); > + break; > + case KVM_CAP_ARM_RME_CREATE_RD: > + if (kvm->created_vcpus) { > + r = -EBUSY; > + break; > + } > + > + r = kvm_create_realm(kvm); > + break; > + default: > + r = -EINVAL; > + break; > + } > + > + return r; > +} > + > +void kvm_destroy_realm(struct kvm *kvm) > +{ > + struct realm *realm = &kvm->arch.realm; > + struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; > + unsigned int pgd_sz; > + int i; > + > + if (realm->params) { > + free_page((unsigned long)realm->params); > + realm->params = NULL; > + } > + > + if (kvm_realm_state(kvm) == REALM_STATE_NONE) > + return; > + > + WRITE_ONCE(realm->state, REALM_STATE_DYING); > + > + rme_vmid_release(realm->vmid); > + > + if (realm->rd) { > + phys_addr_t rd_phys = virt_to_phys(realm->rd); > + > + if (WARN_ON(rmi_realm_destroy(rd_phys))) > + return; > + if (WARN_ON(rmi_granule_undelegate(rd_phys))) > + return; > + free_page((unsigned long)realm->rd); > + realm->rd = NULL; > + } > + > + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level); > + for (i = 0; i < pgd_sz; i++) { > + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE; > + > + if (WARN_ON(rmi_granule_undelegate(pgd_phys))) > + return; > + } > + > + kvm_free_stage2_pgd(&kvm->arch.mmu); > +} > + > +int kvm_init_realm_vm(struct kvm *kvm) > +{ > + struct realm_params *params; > + > + params = (struct realm_params *)get_zeroed_page(GFP_KERNEL); > + if (!params) > + return -ENOMEM; > + > + params->features_0 = create_realm_feat_reg0(kvm); > + kvm->arch.realm.params = params; > + return 0; > +} > + > int kvm_init_rme(void) > { > + int ret; > + > if (PAGE_SIZE != SZ_4K) > /* Only 4k page size on the host is supported */ > return 0; > @@ -43,6 +394,12 @@ int kvm_init_rme(void) > /* Continue without realm support */ > return 0; > > + ret = rme_vmid_init(); > + if (ret) > + return ret; > + > + WARN_ON(rmi_features(0, &rmm_feat_reg0)); > + > /* Future patch will enable static branch kvm_rme_is_available */ > > return 0;
On 13/02/2023 16:10, Zhi Wang wrote: > On Fri, 27 Jan 2023 11:29:10 +0000 > Steven Price <steven.price@arm.com> wrote: > >> Add the KVM_CAP_ARM_RME_CREATE_FD ioctl to create a realm. This involves >> delegating pages to the RMM to hold the Realm Descriptor (RD) and for >> the base level of the Realm Translation Tables (RTT). A VMID also need >> to be picked, since the RMM has a separate VMID address space a >> dedicated allocator is added for this purpose. >> >> KVM_CAP_ARM_RME_CONFIG_REALM is provided to allow configuring the realm >> before it is created. >> >> Signed-off-by: Steven Price <steven.price@arm.com> >> --- >> arch/arm64/include/asm/kvm_rme.h | 14 ++ >> arch/arm64/kvm/arm.c | 19 ++ >> arch/arm64/kvm/mmu.c | 6 + >> arch/arm64/kvm/reset.c | 33 +++ >> arch/arm64/kvm/rme.c | 357 +++++++++++++++++++++++++++++++ >> 5 files changed, 429 insertions(+) >> >> diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h >> index c26bc2c6770d..055a22accc08 100644 >> --- a/arch/arm64/include/asm/kvm_rme.h >> +++ b/arch/arm64/include/asm/kvm_rme.h >> @@ -6,6 +6,8 @@ >> #ifndef __ASM_KVM_RME_H >> #define __ASM_KVM_RME_H >> >> +#include <uapi/linux/kvm.h> >> + >> enum realm_state { >> REALM_STATE_NONE, >> REALM_STATE_NEW, >> @@ -15,8 +17,20 @@ enum realm_state { >> >> struct realm { >> enum realm_state state; >> + >> + void *rd; >> + struct realm_params *params; >> + >> + unsigned long num_aux; >> + unsigned int vmid; >> + unsigned int ia_bits; >> }; >> > > Maybe more comments for this structure? Agreed, this series is a bit light on comments. I'll try to improve for v2. <snip> > > Just curious. Wouldn't it be better to use IDR as this is ID allocation? There > were some efforts to change the use of bitmap allocation to IDR before. I'm not sure it makes much difference really. This matches KVM's vmid_map, but here things are much more simple as there's no support for the likes of VMID rollover (the number of Realm VMs is just capped at the number of VMIDs). IDR provides a lot of functionality we don't need, but equally I don't think performance or memory usage are really a concern here. Steve >> +/* Protects access to rme_vmid_bitmap */ >> +static DEFINE_SPINLOCK(rme_vmid_lock); >> +static unsigned long *rme_vmid_bitmap; >> + >> +static int rme_vmid_init(void) >> +{ >> + unsigned int vmid_count = 1 << kvm_get_vmid_bits(); >> + >> + rme_vmid_bitmap = bitmap_zalloc(vmid_count, GFP_KERNEL); >> + if (!rme_vmid_bitmap) { >> + kvm_err("%s: Couldn't allocate rme vmid bitmap\n", __func__); >> + return -ENOMEM; >> + } >> + >> + return 0; >> +} >> + >> +static int rme_vmid_reserve(void) >> +{ >> + int ret; >> + unsigned int vmid_count = 1 << kvm_get_vmid_bits(); >> + >> + spin_lock(&rme_vmid_lock); >> + ret = bitmap_find_free_region(rme_vmid_bitmap, vmid_count, 0); >> + spin_unlock(&rme_vmid_lock); >> + >> + return ret; >> +} >> + >> +static void rme_vmid_release(unsigned int vmid) >> +{ >> + spin_lock(&rme_vmid_lock); >> + bitmap_release_region(rme_vmid_bitmap, vmid, 0); >> + spin_unlock(&rme_vmid_lock); >> +} >> + >> +static int kvm_create_realm(struct kvm *kvm) >> +{ >> + struct realm *realm = &kvm->arch.realm; >> + int ret; >> + >> + if (!kvm_is_realm(kvm) || kvm_realm_state(kvm) != REALM_STATE_NONE) >> + return -EEXIST; >> + >> + ret = rme_vmid_reserve(); >> + if (ret < 0) >> + return ret; >> + realm->vmid = ret; >> + >> + ret = realm_create_rd(kvm); >> + if (ret) { >> + rme_vmid_release(realm->vmid); >> + return ret; >> + } >> + >> + WRITE_ONCE(realm->state, REALM_STATE_NEW); >> + >> + /* The realm is up, free the parameters. */ >> + free_page((unsigned long)realm->params); >> + realm->params = NULL; >> + >> + return 0; >> +} >> + >> +static int config_realm_hash_algo(struct realm *realm, >> + struct kvm_cap_arm_rme_config_item *cfg) >> +{ >> + switch (cfg->hash_algo) { >> + case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA256: >> + if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_256)) >> + return -EINVAL; >> + break; >> + case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA512: >> + if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_512)) >> + return -EINVAL; >> + break; >> + default: >> + return -EINVAL; >> + } >> + realm->params->measurement_algo = cfg->hash_algo; >> + return 0; >> +} >> + >> +static int config_realm_sve(struct realm *realm, >> + struct kvm_cap_arm_rme_config_item *cfg) >> +{ >> + u64 features_0 = realm->params->features_0; >> + int max_sve_vq = u64_get_bits(rmm_feat_reg0, >> + RMI_FEATURE_REGISTER_0_SVE_VL); >> + >> + if (!rme_supports(RMI_FEATURE_REGISTER_0_SVE_EN)) >> + return -EINVAL; >> + >> + if (cfg->sve_vq > max_sve_vq) >> + return -EINVAL; >> + >> + features_0 &= ~(RMI_FEATURE_REGISTER_0_SVE_EN | >> + RMI_FEATURE_REGISTER_0_SVE_VL); >> + features_0 |= u64_encode_bits(1, RMI_FEATURE_REGISTER_0_SVE_EN); >> + features_0 |= u64_encode_bits(cfg->sve_vq, >> + RMI_FEATURE_REGISTER_0_SVE_VL); >> + >> + realm->params->features_0 = features_0; >> + return 0; >> +} >> + >> +static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap) >> +{ >> + struct kvm_cap_arm_rme_config_item cfg; >> + struct realm *realm = &kvm->arch.realm; >> + int r = 0; >> + >> + if (kvm_realm_state(kvm) != REALM_STATE_NONE) >> + return -EBUSY; >> + >> + if (copy_from_user(&cfg, (void __user *)cap->args[1], sizeof(cfg))) >> + return -EFAULT; >> + >> + switch (cfg.cfg) { >> + case KVM_CAP_ARM_RME_CFG_RPV: >> + memcpy(&realm->params->rpv, &cfg.rpv, sizeof(cfg.rpv)); >> + break; >> + case KVM_CAP_ARM_RME_CFG_HASH_ALGO: >> + r = config_realm_hash_algo(realm, &cfg); >> + break; >> + case KVM_CAP_ARM_RME_CFG_SVE: >> + r = config_realm_sve(realm, &cfg); >> + break; >> + default: >> + r = -EINVAL; >> + } >> + >> + return r; >> +} >> + >> +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) >> +{ >> + int r = 0; >> + >> + switch (cap->args[0]) { >> + case KVM_CAP_ARM_RME_CONFIG_REALM: >> + r = kvm_rme_config_realm(kvm, cap); >> + break; >> + case KVM_CAP_ARM_RME_CREATE_RD: >> + if (kvm->created_vcpus) { >> + r = -EBUSY; >> + break; >> + } >> + >> + r = kvm_create_realm(kvm); >> + break; >> + default: >> + r = -EINVAL; >> + break; >> + } >> + >> + return r; >> +} >> + >> +void kvm_destroy_realm(struct kvm *kvm) >> +{ >> + struct realm *realm = &kvm->arch.realm; >> + struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; >> + unsigned int pgd_sz; >> + int i; >> + >> + if (realm->params) { >> + free_page((unsigned long)realm->params); >> + realm->params = NULL; >> + } >> + >> + if (kvm_realm_state(kvm) == REALM_STATE_NONE) >> + return; >> + >> + WRITE_ONCE(realm->state, REALM_STATE_DYING); >> + >> + rme_vmid_release(realm->vmid); >> + >> + if (realm->rd) { >> + phys_addr_t rd_phys = virt_to_phys(realm->rd); >> + >> + if (WARN_ON(rmi_realm_destroy(rd_phys))) >> + return; >> + if (WARN_ON(rmi_granule_undelegate(rd_phys))) >> + return; >> + free_page((unsigned long)realm->rd); >> + realm->rd = NULL; >> + } >> + >> + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level); >> + for (i = 0; i < pgd_sz; i++) { >> + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE; >> + >> + if (WARN_ON(rmi_granule_undelegate(pgd_phys))) >> + return; >> + } >> + >> + kvm_free_stage2_pgd(&kvm->arch.mmu); >> +} >> + >> +int kvm_init_realm_vm(struct kvm *kvm) >> +{ >> + struct realm_params *params; >> + >> + params = (struct realm_params *)get_zeroed_page(GFP_KERNEL); >> + if (!params) >> + return -ENOMEM; >> + >> + params->features_0 = create_realm_feat_reg0(kvm); >> + kvm->arch.realm.params = params; >> + return 0; >> +} >> + >> int kvm_init_rme(void) >> { >> + int ret; >> + >> if (PAGE_SIZE != SZ_4K) >> /* Only 4k page size on the host is supported */ >> return 0; >> @@ -43,6 +394,12 @@ int kvm_init_rme(void) >> /* Continue without realm support */ >> return 0; >> >> + ret = rme_vmid_init(); >> + if (ret) >> + return ret; >> + >> + WARN_ON(rmi_features(0, &rmm_feat_reg0)); >> + >> /* Future patch will enable static branch kvm_rme_is_available */ >> >> return 0; >
On Wed, 1 Mar 2023 11:55:17 +0000 Steven Price <steven.price@arm.com> wrote: > On 13/02/2023 16:10, Zhi Wang wrote: > > On Fri, 27 Jan 2023 11:29:10 +0000 > > Steven Price <steven.price@arm.com> wrote: > > > >> Add the KVM_CAP_ARM_RME_CREATE_FD ioctl to create a realm. This involves > >> delegating pages to the RMM to hold the Realm Descriptor (RD) and for > >> the base level of the Realm Translation Tables (RTT). A VMID also need > >> to be picked, since the RMM has a separate VMID address space a > >> dedicated allocator is added for this purpose. > >> > >> KVM_CAP_ARM_RME_CONFIG_REALM is provided to allow configuring the realm > >> before it is created. > >> > >> Signed-off-by: Steven Price <steven.price@arm.com> > >> --- > >> arch/arm64/include/asm/kvm_rme.h | 14 ++ > >> arch/arm64/kvm/arm.c | 19 ++ > >> arch/arm64/kvm/mmu.c | 6 + > >> arch/arm64/kvm/reset.c | 33 +++ > >> arch/arm64/kvm/rme.c | 357 +++++++++++++++++++++++++++++++ > >> 5 files changed, 429 insertions(+) > >> > >> diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h > >> index c26bc2c6770d..055a22accc08 100644 > >> --- a/arch/arm64/include/asm/kvm_rme.h > >> +++ b/arch/arm64/include/asm/kvm_rme.h > >> @@ -6,6 +6,8 @@ > >> #ifndef __ASM_KVM_RME_H > >> #define __ASM_KVM_RME_H > >> > >> +#include <uapi/linux/kvm.h> > >> + > >> enum realm_state { > >> REALM_STATE_NONE, > >> REALM_STATE_NEW, > >> @@ -15,8 +17,20 @@ enum realm_state { > >> > >> struct realm { > >> enum realm_state state; > >> + > >> + void *rd; > >> + struct realm_params *params; > >> + > >> + unsigned long num_aux; > >> + unsigned int vmid; > >> + unsigned int ia_bits; > >> }; > >> > > > > Maybe more comments for this structure? > > Agreed, this series is a bit light on comments. I'll try to improve for v2. > > <snip> > > > > > Just curious. Wouldn't it be better to use IDR as this is ID allocation? There > > were some efforts to change the use of bitmap allocation to IDR before. > > I'm not sure it makes much difference really. This matches KVM's > vmid_map, but here things are much more simple as there's no support for > the likes of VMID rollover (the number of Realm VMs is just capped at > the number of VMIDs). > > IDR provides a lot of functionality we don't need, but equally I don't > think performance or memory usage are really a concern here. Agree. I am not opposed to the current approach. I gave this comment because I vaguely remember there were some patch bundles to covert bitmap to IDR in the kernel before. So I think it would be better to raise it up and get a conclusion. It would save some efforts for the people who might jump in the review later. > > Steve > > >> +/* Protects access to rme_vmid_bitmap */ > >> +static DEFINE_SPINLOCK(rme_vmid_lock); > >> +static unsigned long *rme_vmid_bitmap; > >> + > >> +static int rme_vmid_init(void) > >> +{ > >> + unsigned int vmid_count = 1 << kvm_get_vmid_bits(); > >> + > >> + rme_vmid_bitmap = bitmap_zalloc(vmid_count, GFP_KERNEL); > >> + if (!rme_vmid_bitmap) { > >> + kvm_err("%s: Couldn't allocate rme vmid bitmap\n", __func__); > >> + return -ENOMEM; > >> + } > >> + > >> + return 0; > >> +} > >> + > >> +static int rme_vmid_reserve(void) > >> +{ > >> + int ret; > >> + unsigned int vmid_count = 1 << kvm_get_vmid_bits(); > >> + > >> + spin_lock(&rme_vmid_lock); > >> + ret = bitmap_find_free_region(rme_vmid_bitmap, vmid_count, 0); > >> + spin_unlock(&rme_vmid_lock); > >> + > >> + return ret; > >> +} > >> + > >> +static void rme_vmid_release(unsigned int vmid) > >> +{ > >> + spin_lock(&rme_vmid_lock); > >> + bitmap_release_region(rme_vmid_bitmap, vmid, 0); > >> + spin_unlock(&rme_vmid_lock); > >> +} > >> + > >> +static int kvm_create_realm(struct kvm *kvm) > >> +{ > >> + struct realm *realm = &kvm->arch.realm; > >> + int ret; > >> + > >> + if (!kvm_is_realm(kvm) || kvm_realm_state(kvm) != REALM_STATE_NONE) > >> + return -EEXIST; > >> + > >> + ret = rme_vmid_reserve(); > >> + if (ret < 0) > >> + return ret; > >> + realm->vmid = ret; > >> + > >> + ret = realm_create_rd(kvm); > >> + if (ret) { > >> + rme_vmid_release(realm->vmid); > >> + return ret; > >> + } > >> + > >> + WRITE_ONCE(realm->state, REALM_STATE_NEW); > >> + > >> + /* The realm is up, free the parameters. */ > >> + free_page((unsigned long)realm->params); > >> + realm->params = NULL; > >> + > >> + return 0; > >> +} > >> + > >> +static int config_realm_hash_algo(struct realm *realm, > >> + struct kvm_cap_arm_rme_config_item *cfg) > >> +{ > >> + switch (cfg->hash_algo) { > >> + case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA256: > >> + if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_256)) > >> + return -EINVAL; > >> + break; > >> + case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA512: > >> + if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_512)) > >> + return -EINVAL; > >> + break; > >> + default: > >> + return -EINVAL; > >> + } > >> + realm->params->measurement_algo = cfg->hash_algo; > >> + return 0; > >> +} > >> + > >> +static int config_realm_sve(struct realm *realm, > >> + struct kvm_cap_arm_rme_config_item *cfg) > >> +{ > >> + u64 features_0 = realm->params->features_0; > >> + int max_sve_vq = u64_get_bits(rmm_feat_reg0, > >> + RMI_FEATURE_REGISTER_0_SVE_VL); > >> + > >> + if (!rme_supports(RMI_FEATURE_REGISTER_0_SVE_EN)) > >> + return -EINVAL; > >> + > >> + if (cfg->sve_vq > max_sve_vq) > >> + return -EINVAL; > >> + > >> + features_0 &= ~(RMI_FEATURE_REGISTER_0_SVE_EN | > >> + RMI_FEATURE_REGISTER_0_SVE_VL); > >> + features_0 |= u64_encode_bits(1, RMI_FEATURE_REGISTER_0_SVE_EN); > >> + features_0 |= u64_encode_bits(cfg->sve_vq, > >> + RMI_FEATURE_REGISTER_0_SVE_VL); > >> + > >> + realm->params->features_0 = features_0; > >> + return 0; > >> +} > >> + > >> +static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap) > >> +{ > >> + struct kvm_cap_arm_rme_config_item cfg; > >> + struct realm *realm = &kvm->arch.realm; > >> + int r = 0; > >> + > >> + if (kvm_realm_state(kvm) != REALM_STATE_NONE) > >> + return -EBUSY; > >> + > >> + if (copy_from_user(&cfg, (void __user *)cap->args[1], sizeof(cfg))) > >> + return -EFAULT; > >> + > >> + switch (cfg.cfg) { > >> + case KVM_CAP_ARM_RME_CFG_RPV: > >> + memcpy(&realm->params->rpv, &cfg.rpv, sizeof(cfg.rpv)); > >> + break; > >> + case KVM_CAP_ARM_RME_CFG_HASH_ALGO: > >> + r = config_realm_hash_algo(realm, &cfg); > >> + break; > >> + case KVM_CAP_ARM_RME_CFG_SVE: > >> + r = config_realm_sve(realm, &cfg); > >> + break; > >> + default: > >> + r = -EINVAL; > >> + } > >> + > >> + return r; > >> +} > >> + > >> +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) > >> +{ > >> + int r = 0; > >> + > >> + switch (cap->args[0]) { > >> + case KVM_CAP_ARM_RME_CONFIG_REALM: > >> + r = kvm_rme_config_realm(kvm, cap); > >> + break; > >> + case KVM_CAP_ARM_RME_CREATE_RD: > >> + if (kvm->created_vcpus) { > >> + r = -EBUSY; > >> + break; > >> + } > >> + > >> + r = kvm_create_realm(kvm); > >> + break; > >> + default: > >> + r = -EINVAL; > >> + break; > >> + } > >> + > >> + return r; > >> +} > >> + > >> +void kvm_destroy_realm(struct kvm *kvm) > >> +{ > >> + struct realm *realm = &kvm->arch.realm; > >> + struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; > >> + unsigned int pgd_sz; > >> + int i; > >> + > >> + if (realm->params) { > >> + free_page((unsigned long)realm->params); > >> + realm->params = NULL; > >> + } > >> + > >> + if (kvm_realm_state(kvm) == REALM_STATE_NONE) > >> + return; > >> + > >> + WRITE_ONCE(realm->state, REALM_STATE_DYING); > >> + > >> + rme_vmid_release(realm->vmid); > >> + > >> + if (realm->rd) { > >> + phys_addr_t rd_phys = virt_to_phys(realm->rd); > >> + > >> + if (WARN_ON(rmi_realm_destroy(rd_phys))) > >> + return; > >> + if (WARN_ON(rmi_granule_undelegate(rd_phys))) > >> + return; > >> + free_page((unsigned long)realm->rd); > >> + realm->rd = NULL; > >> + } > >> + > >> + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level); > >> + for (i = 0; i < pgd_sz; i++) { > >> + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE; > >> + > >> + if (WARN_ON(rmi_granule_undelegate(pgd_phys))) > >> + return; > >> + } > >> + > >> + kvm_free_stage2_pgd(&kvm->arch.mmu); > >> +} > >> + > >> +int kvm_init_realm_vm(struct kvm *kvm) > >> +{ > >> + struct realm_params *params; > >> + > >> + params = (struct realm_params *)get_zeroed_page(GFP_KERNEL); > >> + if (!params) > >> + return -ENOMEM; > >> + > >> + params->features_0 = create_realm_feat_reg0(kvm); > >> + kvm->arch.realm.params = params; > >> + return 0; > >> +} > >> + > >> int kvm_init_rme(void) > >> { > >> + int ret; > >> + > >> if (PAGE_SIZE != SZ_4K) > >> /* Only 4k page size on the host is supported */ > >> return 0; > >> @@ -43,6 +394,12 @@ int kvm_init_rme(void) > >> /* Continue without realm support */ > >> return 0; > >> > >> + ret = rme_vmid_init(); > >> + if (ret) > >> + return ret; > >> + > >> + WARN_ON(rmi_features(0, &rmm_feat_reg0)); > >> + > >> /* Future patch will enable static branch kvm_rme_is_available */ > >> > >> return 0; > > >
On Fri, 27 Jan 2023 11:29:10 +0000 Steven Price <steven.price@arm.com> wrote: > Add the KVM_CAP_ARM_RME_CREATE_FD ioctl to create a realm. This involves > delegating pages to the RMM to hold the Realm Descriptor (RD) and for > the base level of the Realm Translation Tables (RTT). A VMID also need > to be picked, since the RMM has a separate VMID address space a > dedicated allocator is added for this purpose. > > KVM_CAP_ARM_RME_CONFIG_REALM is provided to allow configuring the realm > before it is created. > > Signed-off-by: Steven Price <steven.price@arm.com> > --- > arch/arm64/include/asm/kvm_rme.h | 14 ++ > arch/arm64/kvm/arm.c | 19 ++ > arch/arm64/kvm/mmu.c | 6 + > arch/arm64/kvm/reset.c | 33 +++ > arch/arm64/kvm/rme.c | 357 +++++++++++++++++++++++++++++++ > 5 files changed, 429 insertions(+) > > diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h > index c26bc2c6770d..055a22accc08 100644 > --- a/arch/arm64/include/asm/kvm_rme.h > +++ b/arch/arm64/include/asm/kvm_rme.h > @@ -6,6 +6,8 @@ > #ifndef __ASM_KVM_RME_H > #define __ASM_KVM_RME_H > > +#include <uapi/linux/kvm.h> > + > enum realm_state { > REALM_STATE_NONE, > REALM_STATE_NEW, > @@ -15,8 +17,20 @@ enum realm_state { > > struct realm { > enum realm_state state; > + > + void *rd; > + struct realm_params *params; > + > + unsigned long num_aux; > + unsigned int vmid; > + unsigned int ia_bits; > }; > > int kvm_init_rme(void); > +u32 kvm_realm_ipa_limit(void); > + > +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); > +int kvm_init_realm_vm(struct kvm *kvm); > +void kvm_destroy_realm(struct kvm *kvm); > > #endif > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c > index d97b39d042ab..50f54a63732a 100644 > --- a/arch/arm64/kvm/arm.c > +++ b/arch/arm64/kvm/arm.c > @@ -103,6 +103,13 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, > r = 0; > set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags); > break; > + case KVM_CAP_ARM_RME: > + if (!static_branch_unlikely(&kvm_rme_is_available)) > + return -EINVAL; > + mutex_lock(&kvm->lock); > + r = kvm_realm_enable_cap(kvm, cap); > + mutex_unlock(&kvm->lock); > + break; > default: > r = -EINVAL; > break; > @@ -172,6 +179,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) > */ > kvm->arch.dfr0_pmuver.imp = kvm_arm_pmu_get_pmuver_limit(); > > + /* Initialise the realm bits after the generic bits are enabled */ > + if (kvm_is_realm(kvm)) { > + ret = kvm_init_realm_vm(kvm); > + if (ret) > + goto err_free_cpumask; > + } > + > return 0; > > err_free_cpumask: > @@ -204,6 +218,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) > kvm_destroy_vcpus(kvm); > > kvm_unshare_hyp(kvm, kvm + 1); > + > + kvm_destroy_realm(kvm); > } > > int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > @@ -300,6 +316,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_ARM_PTRAUTH_GENERIC: > r = system_has_full_ptr_auth(); > break; > + case KVM_CAP_ARM_RME: > + r = static_key_enabled(&kvm_rme_is_available); > + break; > default: > r = 0; > } > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > index 31d7fa4c7c14..d0f707767d05 100644 > --- a/arch/arm64/kvm/mmu.c > +++ b/arch/arm64/kvm/mmu.c > @@ -840,6 +840,12 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) > struct kvm_pgtable *pgt = NULL; > > write_lock(&kvm->mmu_lock); > + if (kvm_is_realm(kvm) && > + kvm_realm_state(kvm) != REALM_STATE_DYING) { > + /* TODO: teardown rtts */ > + write_unlock(&kvm->mmu_lock); > + return; > + } > pgt = mmu->pgt; > if (pgt) { > mmu->pgd_phys = 0; > diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c > index e0267f672b8a..c165df174737 100644 > --- a/arch/arm64/kvm/reset.c > +++ b/arch/arm64/kvm/reset.c > @@ -395,3 +395,36 @@ int kvm_set_ipa_limit(void) > > return 0; > } > + The below function doesn't have an user in this patch. Also, it looks like a part of copy from kvm_init_stage2_mmu() in arch/arm64/kvm/mmu.c. > +int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) > +{ > + u64 mmfr0, mmfr1; > + u32 phys_shift; > + u32 ipa_limit = kvm_ipa_limit; > + > + if (kvm_is_realm(kvm)) > + ipa_limit = kvm_realm_ipa_limit(); > + > + if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) > + return -EINVAL; > + > + phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); > + if (phys_shift) { > + if (phys_shift > ipa_limit || > + phys_shift < ARM64_MIN_PARANGE_BITS) > + return -EINVAL; > + } else { > + phys_shift = KVM_PHYS_SHIFT; > + if (phys_shift > ipa_limit) { > + pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n", > + current->comm); > + return -EINVAL; > + } > + } > + > + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); > + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); > + kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); > + > + return 0; > +} > diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c > index f6b587bc116e..9f8c5a91b8fc 100644 > --- a/arch/arm64/kvm/rme.c > +++ b/arch/arm64/kvm/rme.c > @@ -5,9 +5,49 @@ > > #include <linux/kvm_host.h> > > +#include <asm/kvm_emulate.h> > +#include <asm/kvm_mmu.h> > #include <asm/rmi_cmds.h> > #include <asm/virt.h> > > +/************ FIXME: Copied from kvm/hyp/pgtable.c **********/ > +#include <asm/kvm_pgtable.h> > + > +struct kvm_pgtable_walk_data { > + struct kvm_pgtable *pgt; > + struct kvm_pgtable_walker *walker; > + > + u64 addr; > + u64 end; > +}; > + > +static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) > +{ > + u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */ > + u64 mask = BIT(pgt->ia_bits) - 1; > + > + return (addr & mask) >> shift; > +} > + > +static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) > +{ > + struct kvm_pgtable pgt = { > + .ia_bits = ia_bits, > + .start_level = start_level, > + }; > + > + return __kvm_pgd_page_idx(&pgt, -1ULL) + 1; > +} > + > +/******************/ > + > +static unsigned long rmm_feat_reg0; > + > +static bool rme_supports(unsigned long feature) > +{ > + return !!u64_get_bits(rmm_feat_reg0, feature); > +} > + > static int rmi_check_version(void) > { > struct arm_smccc_res res; > @@ -33,8 +73,319 @@ static int rmi_check_version(void) > return 0; > } > > +static unsigned long create_realm_feat_reg0(struct kvm *kvm) > +{ > + unsigned long ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr); > + u64 feat_reg0 = 0; > + > + int num_bps = u64_get_bits(rmm_feat_reg0, > + RMI_FEATURE_REGISTER_0_NUM_BPS); > + int num_wps = u64_get_bits(rmm_feat_reg0, > + RMI_FEATURE_REGISTER_0_NUM_WPS); > + > + feat_reg0 |= u64_encode_bits(ia_bits, RMI_FEATURE_REGISTER_0_S2SZ); > + feat_reg0 |= u64_encode_bits(num_bps, RMI_FEATURE_REGISTER_0_NUM_BPS); > + feat_reg0 |= u64_encode_bits(num_wps, RMI_FEATURE_REGISTER_0_NUM_WPS); > + > + return feat_reg0; > +} > + > +u32 kvm_realm_ipa_limit(void) > +{ > + return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_S2SZ); > +} > + > +static u32 get_start_level(struct kvm *kvm) > +{ > + long sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, kvm->arch.vtcr); > + > + return VTCR_EL2_TGRAN_SL0_BASE - sl0; > +} > + > +static int realm_create_rd(struct kvm *kvm) > +{ > + struct realm *realm = &kvm->arch.realm; > + struct realm_params *params = realm->params; > + void *rd = NULL; > + phys_addr_t rd_phys, params_phys; > + struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; > + unsigned int pgd_sz; > + int i, r; > + > + if (WARN_ON(realm->rd) || WARN_ON(!realm->params)) > + return -EEXIST; > + > + rd = (void *)__get_free_page(GFP_KERNEL); > + if (!rd) > + return -ENOMEM; > + > + rd_phys = virt_to_phys(rd); > + if (rmi_granule_delegate(rd_phys)) { > + r = -ENXIO; > + goto out; > + } > + > + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level); > + for (i = 0; i < pgd_sz; i++) { > + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE; > + > + if (rmi_granule_delegate(pgd_phys)) { > + r = -ENXIO; > + goto out_undelegate_tables; > + } > + } > + > + params->rtt_level_start = get_start_level(kvm); > + params->rtt_num_start = pgd_sz; > + params->rtt_base = kvm->arch.mmu.pgd_phys; > + params->vmid = realm->vmid; > + > + params_phys = virt_to_phys(params); > + > + if (rmi_realm_create(rd_phys, params_phys)) { > + r = -ENXIO; > + goto out_undelegate_tables; > + } > + > + realm->rd = rd; > + realm->ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr); > + > + if (WARN_ON(rmi_rec_aux_count(rd_phys, &realm->num_aux))) { > + WARN_ON(rmi_realm_destroy(rd_phys)); > + goto out_undelegate_tables; > + } > + > + return 0; > + > +out_undelegate_tables: > + while (--i >= 0) { > + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE; > + > + WARN_ON(rmi_granule_undelegate(pgd_phys)); > + } > + WARN_ON(rmi_granule_undelegate(rd_phys)); > +out: > + free_page((unsigned long)rd); > + return r; > +} > + > +/* Protects access to rme_vmid_bitmap */ > +static DEFINE_SPINLOCK(rme_vmid_lock); > +static unsigned long *rme_vmid_bitmap; > + > +static int rme_vmid_init(void) > +{ > + unsigned int vmid_count = 1 << kvm_get_vmid_bits(); > + > + rme_vmid_bitmap = bitmap_zalloc(vmid_count, GFP_KERNEL); > + if (!rme_vmid_bitmap) { > + kvm_err("%s: Couldn't allocate rme vmid bitmap\n", __func__); > + return -ENOMEM; > + } > + > + return 0; > +} > + > +static int rme_vmid_reserve(void) > +{ > + int ret; > + unsigned int vmid_count = 1 << kvm_get_vmid_bits(); > + > + spin_lock(&rme_vmid_lock); > + ret = bitmap_find_free_region(rme_vmid_bitmap, vmid_count, 0); > + spin_unlock(&rme_vmid_lock); > + > + return ret; > +} > + > +static void rme_vmid_release(unsigned int vmid) > +{ > + spin_lock(&rme_vmid_lock); > + bitmap_release_region(rme_vmid_bitmap, vmid, 0); > + spin_unlock(&rme_vmid_lock); > +} > + > +static int kvm_create_realm(struct kvm *kvm) > +{ > + struct realm *realm = &kvm->arch.realm; > + int ret; > + > + if (!kvm_is_realm(kvm) || kvm_realm_state(kvm) != REALM_STATE_NONE) > + return -EEXIST; > + > + ret = rme_vmid_reserve(); > + if (ret < 0) > + return ret; > + realm->vmid = ret; > + > + ret = realm_create_rd(kvm); > + if (ret) { > + rme_vmid_release(realm->vmid); > + return ret; > + } > + > + WRITE_ONCE(realm->state, REALM_STATE_NEW); > + > + /* The realm is up, free the parameters. */ > + free_page((unsigned long)realm->params); > + realm->params = NULL; > + > + return 0; > +} > + > +static int config_realm_hash_algo(struct realm *realm, > + struct kvm_cap_arm_rme_config_item *cfg) > +{ > + switch (cfg->hash_algo) { > + case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA256: > + if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_256)) > + return -EINVAL; > + break; > + case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA512: > + if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_512)) > + return -EINVAL; > + break; > + default: > + return -EINVAL; > + } > + realm->params->measurement_algo = cfg->hash_algo; > + return 0; > +} > + > +static int config_realm_sve(struct realm *realm, > + struct kvm_cap_arm_rme_config_item *cfg) > +{ > + u64 features_0 = realm->params->features_0; > + int max_sve_vq = u64_get_bits(rmm_feat_reg0, > + RMI_FEATURE_REGISTER_0_SVE_VL); > + > + if (!rme_supports(RMI_FEATURE_REGISTER_0_SVE_EN)) > + return -EINVAL; > + > + if (cfg->sve_vq > max_sve_vq) > + return -EINVAL; > + > + features_0 &= ~(RMI_FEATURE_REGISTER_0_SVE_EN | > + RMI_FEATURE_REGISTER_0_SVE_VL); > + features_0 |= u64_encode_bits(1, RMI_FEATURE_REGISTER_0_SVE_EN); > + features_0 |= u64_encode_bits(cfg->sve_vq, > + RMI_FEATURE_REGISTER_0_SVE_VL); > + > + realm->params->features_0 = features_0; > + return 0; > +} > + > +static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap) > +{ > + struct kvm_cap_arm_rme_config_item cfg; > + struct realm *realm = &kvm->arch.realm; > + int r = 0; > + > + if (kvm_realm_state(kvm) != REALM_STATE_NONE) > + return -EBUSY; > + > + if (copy_from_user(&cfg, (void __user *)cap->args[1], sizeof(cfg))) > + return -EFAULT; > + > + switch (cfg.cfg) { > + case KVM_CAP_ARM_RME_CFG_RPV: > + memcpy(&realm->params->rpv, &cfg.rpv, sizeof(cfg.rpv)); > + break; > + case KVM_CAP_ARM_RME_CFG_HASH_ALGO: > + r = config_realm_hash_algo(realm, &cfg); > + break; > + case KVM_CAP_ARM_RME_CFG_SVE: > + r = config_realm_sve(realm, &cfg); > + break; > + default: > + r = -EINVAL; > + } > + > + return r; > +} > + > +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) > +{ > + int r = 0; > + > + switch (cap->args[0]) { > + case KVM_CAP_ARM_RME_CONFIG_REALM: > + r = kvm_rme_config_realm(kvm, cap); > + break; > + case KVM_CAP_ARM_RME_CREATE_RD: > + if (kvm->created_vcpus) { > + r = -EBUSY; > + break; > + } > + > + r = kvm_create_realm(kvm); > + break; > + default: > + r = -EINVAL; > + break; > + } > + > + return r; > +} > + > +void kvm_destroy_realm(struct kvm *kvm) > +{ > + struct realm *realm = &kvm->arch.realm; > + struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; > + unsigned int pgd_sz; > + int i; > + > + if (realm->params) { > + free_page((unsigned long)realm->params); > + realm->params = NULL; > + } > + > + if (kvm_realm_state(kvm) == REALM_STATE_NONE) > + return; > + > + WRITE_ONCE(realm->state, REALM_STATE_DYING); > + > + rme_vmid_release(realm->vmid); > + > + if (realm->rd) { > + phys_addr_t rd_phys = virt_to_phys(realm->rd); > + > + if (WARN_ON(rmi_realm_destroy(rd_phys))) > + return; > + if (WARN_ON(rmi_granule_undelegate(rd_phys))) > + return; > + free_page((unsigned long)realm->rd); > + realm->rd = NULL; > + } > + > + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level); > + for (i = 0; i < pgd_sz; i++) { > + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE; > + > + if (WARN_ON(rmi_granule_undelegate(pgd_phys))) > + return; > + } > + > + kvm_free_stage2_pgd(&kvm->arch.mmu); > +} > + > +int kvm_init_realm_vm(struct kvm *kvm) > +{ > + struct realm_params *params; > + > + params = (struct realm_params *)get_zeroed_page(GFP_KERNEL); > + if (!params) > + return -ENOMEM; > + > + params->features_0 = create_realm_feat_reg0(kvm); > + kvm->arch.realm.params = params; > + return 0; > +} > + > int kvm_init_rme(void) > { > + int ret; > + > if (PAGE_SIZE != SZ_4K) > /* Only 4k page size on the host is supported */ > return 0; > @@ -43,6 +394,12 @@ int kvm_init_rme(void) > /* Continue without realm support */ > return 0; > > + ret = rme_vmid_init(); > + if (ret) > + return ret; > + > + WARN_ON(rmi_features(0, &rmm_feat_reg0)); > + > /* Future patch will enable static branch kvm_rme_is_available */ > > return 0;
On 06/03/2023 19:10, Zhi Wang wrote: > On Fri, 27 Jan 2023 11:29:10 +0000 > Steven Price <steven.price@arm.com> wrote: > >> Add the KVM_CAP_ARM_RME_CREATE_FD ioctl to create a realm. This involves >> delegating pages to the RMM to hold the Realm Descriptor (RD) and for >> the base level of the Realm Translation Tables (RTT). A VMID also need >> to be picked, since the RMM has a separate VMID address space a >> dedicated allocator is added for this purpose. >> >> KVM_CAP_ARM_RME_CONFIG_REALM is provided to allow configuring the realm >> before it is created. >> >> Signed-off-by: Steven Price <steven.price@arm.com> >> --- >> arch/arm64/include/asm/kvm_rme.h | 14 ++ >> arch/arm64/kvm/arm.c | 19 ++ >> arch/arm64/kvm/mmu.c | 6 + >> arch/arm64/kvm/reset.c | 33 +++ >> arch/arm64/kvm/rme.c | 357 +++++++++++++++++++++++++++++++ >> 5 files changed, 429 insertions(+) >> >> diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h >> index c26bc2c6770d..055a22accc08 100644 >> --- a/arch/arm64/include/asm/kvm_rme.h >> +++ b/arch/arm64/include/asm/kvm_rme.h >> @@ -6,6 +6,8 @@ >> #ifndef __ASM_KVM_RME_H >> #define __ASM_KVM_RME_H >> >> +#include <uapi/linux/kvm.h> >> + >> enum realm_state { >> REALM_STATE_NONE, >> REALM_STATE_NEW, >> @@ -15,8 +17,20 @@ enum realm_state { >> >> struct realm { >> enum realm_state state; >> + >> + void *rd; >> + struct realm_params *params; >> + >> + unsigned long num_aux; >> + unsigned int vmid; >> + unsigned int ia_bits; >> }; >> >> int kvm_init_rme(void); >> +u32 kvm_realm_ipa_limit(void); >> + >> +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); >> +int kvm_init_realm_vm(struct kvm *kvm); >> +void kvm_destroy_realm(struct kvm *kvm); >> >> #endif >> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c >> index d97b39d042ab..50f54a63732a 100644 >> --- a/arch/arm64/kvm/arm.c >> +++ b/arch/arm64/kvm/arm.c >> @@ -103,6 +103,13 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, >> r = 0; >> set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags); >> break; >> + case KVM_CAP_ARM_RME: >> + if (!static_branch_unlikely(&kvm_rme_is_available)) >> + return -EINVAL; >> + mutex_lock(&kvm->lock); >> + r = kvm_realm_enable_cap(kvm, cap); >> + mutex_unlock(&kvm->lock); >> + break; >> default: >> r = -EINVAL; >> break; >> @@ -172,6 +179,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) >> */ >> kvm->arch.dfr0_pmuver.imp = kvm_arm_pmu_get_pmuver_limit(); >> >> + /* Initialise the realm bits after the generic bits are enabled */ >> + if (kvm_is_realm(kvm)) { >> + ret = kvm_init_realm_vm(kvm); >> + if (ret) >> + goto err_free_cpumask; >> + } >> + >> return 0; >> >> err_free_cpumask: >> @@ -204,6 +218,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) >> kvm_destroy_vcpus(kvm); >> >> kvm_unshare_hyp(kvm, kvm + 1); >> + >> + kvm_destroy_realm(kvm); >> } >> >> int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) >> @@ -300,6 +316,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) >> case KVM_CAP_ARM_PTRAUTH_GENERIC: >> r = system_has_full_ptr_auth(); >> break; >> + case KVM_CAP_ARM_RME: >> + r = static_key_enabled(&kvm_rme_is_available); >> + break; >> default: >> r = 0; >> } >> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c >> index 31d7fa4c7c14..d0f707767d05 100644 >> --- a/arch/arm64/kvm/mmu.c >> +++ b/arch/arm64/kvm/mmu.c >> @@ -840,6 +840,12 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) >> struct kvm_pgtable *pgt = NULL; >> >> write_lock(&kvm->mmu_lock); >> + if (kvm_is_realm(kvm) && >> + kvm_realm_state(kvm) != REALM_STATE_DYING) { >> + /* TODO: teardown rtts */ >> + write_unlock(&kvm->mmu_lock); >> + return; >> + } >> pgt = mmu->pgt; >> if (pgt) { >> mmu->pgd_phys = 0; >> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c >> index e0267f672b8a..c165df174737 100644 >> --- a/arch/arm64/kvm/reset.c >> +++ b/arch/arm64/kvm/reset.c >> @@ -395,3 +395,36 @@ int kvm_set_ipa_limit(void) >> >> return 0; >> } >> + > > The below function doesn't have an user in this patch. Also, > it looks like a part of copy from kvm_init_stage2_mmu() > in arch/arm64/kvm/mmu.c. Good spot ;) Yes I discovered this, it should have been removed - it's no longer used. I think this was an error when I was rebasing: kvm_arm_setup-stage2() was removed in 315775ff7c6d ("KVM: arm64: Consolidate stage-2 initialisation into a single function"). Steve >> +int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) >> +{ >> + u64 mmfr0, mmfr1; >> + u32 phys_shift; >> + u32 ipa_limit = kvm_ipa_limit; >> + >> + if (kvm_is_realm(kvm)) >> + ipa_limit = kvm_realm_ipa_limit(); >> + >> + if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) >> + return -EINVAL; >> + >> + phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); >> + if (phys_shift) { >> + if (phys_shift > ipa_limit || >> + phys_shift < ARM64_MIN_PARANGE_BITS) >> + return -EINVAL; >> + } else { >> + phys_shift = KVM_PHYS_SHIFT; >> + if (phys_shift > ipa_limit) { >> + pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n", >> + current->comm); >> + return -EINVAL; >> + } >> + } >> + >> + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); >> + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); >> + kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); >> + >> + return 0; >> +} >> diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c >> index f6b587bc116e..9f8c5a91b8fc 100644 >> --- a/arch/arm64/kvm/rme.c >> +++ b/arch/arm64/kvm/rme.c >> @@ -5,9 +5,49 @@ >> >> #include <linux/kvm_host.h> >> >> +#include <asm/kvm_emulate.h> >> +#include <asm/kvm_mmu.h> >> #include <asm/rmi_cmds.h> >> #include <asm/virt.h> >> >> +/************ FIXME: Copied from kvm/hyp/pgtable.c **********/ >> +#include <asm/kvm_pgtable.h> >> + >> +struct kvm_pgtable_walk_data { >> + struct kvm_pgtable *pgt; >> + struct kvm_pgtable_walker *walker; >> + >> + u64 addr; >> + u64 end; >> +}; >> + >> +static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) >> +{ >> + u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */ >> + u64 mask = BIT(pgt->ia_bits) - 1; >> + >> + return (addr & mask) >> shift; >> +} >> + >> +static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) >> +{ >> + struct kvm_pgtable pgt = { >> + .ia_bits = ia_bits, >> + .start_level = start_level, >> + }; >> + >> + return __kvm_pgd_page_idx(&pgt, -1ULL) + 1; >> +} >> + >> +/******************/ >> + >> +static unsigned long rmm_feat_reg0; >> + >> +static bool rme_supports(unsigned long feature) >> +{ >> + return !!u64_get_bits(rmm_feat_reg0, feature); >> +} >> + >> static int rmi_check_version(void) >> { >> struct arm_smccc_res res; >> @@ -33,8 +73,319 @@ static int rmi_check_version(void) >> return 0; >> } >> >> +static unsigned long create_realm_feat_reg0(struct kvm *kvm) >> +{ >> + unsigned long ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr); >> + u64 feat_reg0 = 0; >> + >> + int num_bps = u64_get_bits(rmm_feat_reg0, >> + RMI_FEATURE_REGISTER_0_NUM_BPS); >> + int num_wps = u64_get_bits(rmm_feat_reg0, >> + RMI_FEATURE_REGISTER_0_NUM_WPS); >> + >> + feat_reg0 |= u64_encode_bits(ia_bits, RMI_FEATURE_REGISTER_0_S2SZ); >> + feat_reg0 |= u64_encode_bits(num_bps, RMI_FEATURE_REGISTER_0_NUM_BPS); >> + feat_reg0 |= u64_encode_bits(num_wps, RMI_FEATURE_REGISTER_0_NUM_WPS); >> + >> + return feat_reg0; >> +} >> + >> +u32 kvm_realm_ipa_limit(void) >> +{ >> + return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_S2SZ); >> +} >> + >> +static u32 get_start_level(struct kvm *kvm) >> +{ >> + long sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, kvm->arch.vtcr); >> + >> + return VTCR_EL2_TGRAN_SL0_BASE - sl0; >> +} >> + >> +static int realm_create_rd(struct kvm *kvm) >> +{ >> + struct realm *realm = &kvm->arch.realm; >> + struct realm_params *params = realm->params; >> + void *rd = NULL; >> + phys_addr_t rd_phys, params_phys; >> + struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; >> + unsigned int pgd_sz; >> + int i, r; >> + >> + if (WARN_ON(realm->rd) || WARN_ON(!realm->params)) >> + return -EEXIST; >> + >> + rd = (void *)__get_free_page(GFP_KERNEL); >> + if (!rd) >> + return -ENOMEM; >> + >> + rd_phys = virt_to_phys(rd); >> + if (rmi_granule_delegate(rd_phys)) { >> + r = -ENXIO; >> + goto out; >> + } >> + >> + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level); >> + for (i = 0; i < pgd_sz; i++) { >> + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE; >> + >> + if (rmi_granule_delegate(pgd_phys)) { >> + r = -ENXIO; >> + goto out_undelegate_tables; >> + } >> + } >> + >> + params->rtt_level_start = get_start_level(kvm); >> + params->rtt_num_start = pgd_sz; >> + params->rtt_base = kvm->arch.mmu.pgd_phys; >> + params->vmid = realm->vmid; >> + >> + params_phys = virt_to_phys(params); >> + >> + if (rmi_realm_create(rd_phys, params_phys)) { >> + r = -ENXIO; >> + goto out_undelegate_tables; >> + } >> + >> + realm->rd = rd; >> + realm->ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr); >> + >> + if (WARN_ON(rmi_rec_aux_count(rd_phys, &realm->num_aux))) { >> + WARN_ON(rmi_realm_destroy(rd_phys)); >> + goto out_undelegate_tables; >> + } >> + >> + return 0; >> + >> +out_undelegate_tables: >> + while (--i >= 0) { >> + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE; >> + >> + WARN_ON(rmi_granule_undelegate(pgd_phys)); >> + } >> + WARN_ON(rmi_granule_undelegate(rd_phys)); >> +out: >> + free_page((unsigned long)rd); >> + return r; >> +} >> + >> +/* Protects access to rme_vmid_bitmap */ >> +static DEFINE_SPINLOCK(rme_vmid_lock); >> +static unsigned long *rme_vmid_bitmap; >> + >> +static int rme_vmid_init(void) >> +{ >> + unsigned int vmid_count = 1 << kvm_get_vmid_bits(); >> + >> + rme_vmid_bitmap = bitmap_zalloc(vmid_count, GFP_KERNEL); >> + if (!rme_vmid_bitmap) { >> + kvm_err("%s: Couldn't allocate rme vmid bitmap\n", __func__); >> + return -ENOMEM; >> + } >> + >> + return 0; >> +} >> + >> +static int rme_vmid_reserve(void) >> +{ >> + int ret; >> + unsigned int vmid_count = 1 << kvm_get_vmid_bits(); >> + >> + spin_lock(&rme_vmid_lock); >> + ret = bitmap_find_free_region(rme_vmid_bitmap, vmid_count, 0); >> + spin_unlock(&rme_vmid_lock); >> + >> + return ret; >> +} >> + >> +static void rme_vmid_release(unsigned int vmid) >> +{ >> + spin_lock(&rme_vmid_lock); >> + bitmap_release_region(rme_vmid_bitmap, vmid, 0); >> + spin_unlock(&rme_vmid_lock); >> +} >> + >> +static int kvm_create_realm(struct kvm *kvm) >> +{ >> + struct realm *realm = &kvm->arch.realm; >> + int ret; >> + >> + if (!kvm_is_realm(kvm) || kvm_realm_state(kvm) != REALM_STATE_NONE) >> + return -EEXIST; >> + >> + ret = rme_vmid_reserve(); >> + if (ret < 0) >> + return ret; >> + realm->vmid = ret; >> + >> + ret = realm_create_rd(kvm); >> + if (ret) { >> + rme_vmid_release(realm->vmid); >> + return ret; >> + } >> + >> + WRITE_ONCE(realm->state, REALM_STATE_NEW); >> + >> + /* The realm is up, free the parameters. */ >> + free_page((unsigned long)realm->params); >> + realm->params = NULL; >> + >> + return 0; >> +} >> + >> +static int config_realm_hash_algo(struct realm *realm, >> + struct kvm_cap_arm_rme_config_item *cfg) >> +{ >> + switch (cfg->hash_algo) { >> + case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA256: >> + if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_256)) >> + return -EINVAL; >> + break; >> + case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA512: >> + if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_512)) >> + return -EINVAL; >> + break; >> + default: >> + return -EINVAL; >> + } >> + realm->params->measurement_algo = cfg->hash_algo; >> + return 0; >> +} >> + >> +static int config_realm_sve(struct realm *realm, >> + struct kvm_cap_arm_rme_config_item *cfg) >> +{ >> + u64 features_0 = realm->params->features_0; >> + int max_sve_vq = u64_get_bits(rmm_feat_reg0, >> + RMI_FEATURE_REGISTER_0_SVE_VL); >> + >> + if (!rme_supports(RMI_FEATURE_REGISTER_0_SVE_EN)) >> + return -EINVAL; >> + >> + if (cfg->sve_vq > max_sve_vq) >> + return -EINVAL; >> + >> + features_0 &= ~(RMI_FEATURE_REGISTER_0_SVE_EN | >> + RMI_FEATURE_REGISTER_0_SVE_VL); >> + features_0 |= u64_encode_bits(1, RMI_FEATURE_REGISTER_0_SVE_EN); >> + features_0 |= u64_encode_bits(cfg->sve_vq, >> + RMI_FEATURE_REGISTER_0_SVE_VL); >> + >> + realm->params->features_0 = features_0; >> + return 0; >> +} >> + >> +static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap) >> +{ >> + struct kvm_cap_arm_rme_config_item cfg; >> + struct realm *realm = &kvm->arch.realm; >> + int r = 0; >> + >> + if (kvm_realm_state(kvm) != REALM_STATE_NONE) >> + return -EBUSY; >> + >> + if (copy_from_user(&cfg, (void __user *)cap->args[1], sizeof(cfg))) >> + return -EFAULT; >> + >> + switch (cfg.cfg) { >> + case KVM_CAP_ARM_RME_CFG_RPV: >> + memcpy(&realm->params->rpv, &cfg.rpv, sizeof(cfg.rpv)); >> + break; >> + case KVM_CAP_ARM_RME_CFG_HASH_ALGO: >> + r = config_realm_hash_algo(realm, &cfg); >> + break; >> + case KVM_CAP_ARM_RME_CFG_SVE: >> + r = config_realm_sve(realm, &cfg); >> + break; >> + default: >> + r = -EINVAL; >> + } >> + >> + return r; >> +} >> + >> +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) >> +{ >> + int r = 0; >> + >> + switch (cap->args[0]) { >> + case KVM_CAP_ARM_RME_CONFIG_REALM: >> + r = kvm_rme_config_realm(kvm, cap); >> + break; >> + case KVM_CAP_ARM_RME_CREATE_RD: >> + if (kvm->created_vcpus) { >> + r = -EBUSY; >> + break; >> + } >> + >> + r = kvm_create_realm(kvm); >> + break; >> + default: >> + r = -EINVAL; >> + break; >> + } >> + >> + return r; >> +} >> + >> +void kvm_destroy_realm(struct kvm *kvm) >> +{ >> + struct realm *realm = &kvm->arch.realm; >> + struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; >> + unsigned int pgd_sz; >> + int i; >> + >> + if (realm->params) { >> + free_page((unsigned long)realm->params); >> + realm->params = NULL; >> + } >> + >> + if (kvm_realm_state(kvm) == REALM_STATE_NONE) >> + return; >> + >> + WRITE_ONCE(realm->state, REALM_STATE_DYING); >> + >> + rme_vmid_release(realm->vmid); >> + >> + if (realm->rd) { >> + phys_addr_t rd_phys = virt_to_phys(realm->rd); >> + >> + if (WARN_ON(rmi_realm_destroy(rd_phys))) >> + return; >> + if (WARN_ON(rmi_granule_undelegate(rd_phys))) >> + return; >> + free_page((unsigned long)realm->rd); >> + realm->rd = NULL; >> + } >> + >> + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level); >> + for (i = 0; i < pgd_sz; i++) { >> + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE; >> + >> + if (WARN_ON(rmi_granule_undelegate(pgd_phys))) >> + return; >> + } >> + >> + kvm_free_stage2_pgd(&kvm->arch.mmu); >> +} >> + >> +int kvm_init_realm_vm(struct kvm *kvm) >> +{ >> + struct realm_params *params; >> + >> + params = (struct realm_params *)get_zeroed_page(GFP_KERNEL); >> + if (!params) >> + return -ENOMEM; >> + >> + params->features_0 = create_realm_feat_reg0(kvm); >> + kvm->arch.realm.params = params; >> + return 0; >> +} >> + >> int kvm_init_rme(void) >> { >> + int ret; >> + >> if (PAGE_SIZE != SZ_4K) >> /* Only 4k page size on the host is supported */ >> return 0; >> @@ -43,6 +394,12 @@ int kvm_init_rme(void) >> /* Continue without realm support */ >> return 0; >> >> + ret = rme_vmid_init(); >> + if (ret) >> + return ret; >> + >> + WARN_ON(rmi_features(0, &rmm_feat_reg0)); >> + >> /* Future patch will enable static branch kvm_rme_is_available */ >> >> return 0; >
On 27-01-2023 04:59 pm, Steven Price wrote: > Add the KVM_CAP_ARM_RME_CREATE_FD ioctl to create a realm. This involves > delegating pages to the RMM to hold the Realm Descriptor (RD) and for > the base level of the Realm Translation Tables (RTT). A VMID also need > to be picked, since the RMM has a separate VMID address space a > dedicated allocator is added for this purpose. > > KVM_CAP_ARM_RME_CONFIG_REALM is provided to allow configuring the realm > before it is created. > > Signed-off-by: Steven Price <steven.price@arm.com> > --- > arch/arm64/include/asm/kvm_rme.h | 14 ++ > arch/arm64/kvm/arm.c | 19 ++ > arch/arm64/kvm/mmu.c | 6 + > arch/arm64/kvm/reset.c | 33 +++ > arch/arm64/kvm/rme.c | 357 +++++++++++++++++++++++++++++++ > 5 files changed, 429 insertions(+) > > diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h > index c26bc2c6770d..055a22accc08 100644 > --- a/arch/arm64/include/asm/kvm_rme.h > +++ b/arch/arm64/include/asm/kvm_rme.h > @@ -6,6 +6,8 @@ > #ifndef __ASM_KVM_RME_H > #define __ASM_KVM_RME_H > > +#include <uapi/linux/kvm.h> > + > enum realm_state { > REALM_STATE_NONE, > REALM_STATE_NEW, > @@ -15,8 +17,20 @@ enum realm_state { > > struct realm { > enum realm_state state; > + > + void *rd; > + struct realm_params *params; > + > + unsigned long num_aux; > + unsigned int vmid; > + unsigned int ia_bits; > }; > > int kvm_init_rme(void); > +u32 kvm_realm_ipa_limit(void); > + > +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); > +int kvm_init_realm_vm(struct kvm *kvm); > +void kvm_destroy_realm(struct kvm *kvm); > > #endif > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c > index d97b39d042ab..50f54a63732a 100644 > --- a/arch/arm64/kvm/arm.c > +++ b/arch/arm64/kvm/arm.c > @@ -103,6 +103,13 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, > r = 0; > set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags); > break; > + case KVM_CAP_ARM_RME: > + if (!static_branch_unlikely(&kvm_rme_is_available)) > + return -EINVAL; > + mutex_lock(&kvm->lock); > + r = kvm_realm_enable_cap(kvm, cap); > + mutex_unlock(&kvm->lock); > + break; > default: > r = -EINVAL; > break; > @@ -172,6 +179,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) > */ > kvm->arch.dfr0_pmuver.imp = kvm_arm_pmu_get_pmuver_limit(); > > + /* Initialise the realm bits after the generic bits are enabled */ > + if (kvm_is_realm(kvm)) { > + ret = kvm_init_realm_vm(kvm); > + if (ret) > + goto err_free_cpumask; > + } > + > return 0; > > err_free_cpumask: > @@ -204,6 +218,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) > kvm_destroy_vcpus(kvm); > > kvm_unshare_hyp(kvm, kvm + 1); > + > + kvm_destroy_realm(kvm); > } > > int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > @@ -300,6 +316,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_ARM_PTRAUTH_GENERIC: > r = system_has_full_ptr_auth(); > break; > + case KVM_CAP_ARM_RME: > + r = static_key_enabled(&kvm_rme_is_available); > + break; > default: > r = 0; > } > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > index 31d7fa4c7c14..d0f707767d05 100644 > --- a/arch/arm64/kvm/mmu.c > +++ b/arch/arm64/kvm/mmu.c > @@ -840,6 +840,12 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) > struct kvm_pgtable *pgt = NULL; > > write_lock(&kvm->mmu_lock); > + if (kvm_is_realm(kvm) && > + kvm_realm_state(kvm) != REALM_STATE_DYING) { > + /* TODO: teardown rtts */ > + write_unlock(&kvm->mmu_lock); > + return; > + } > pgt = mmu->pgt; > if (pgt) { > mmu->pgd_phys = 0; > diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c > index e0267f672b8a..c165df174737 100644 > --- a/arch/arm64/kvm/reset.c > +++ b/arch/arm64/kvm/reset.c > @@ -395,3 +395,36 @@ int kvm_set_ipa_limit(void) > > return 0; > } > + > +int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) > +{ > + u64 mmfr0, mmfr1; > + u32 phys_shift; > + u32 ipa_limit = kvm_ipa_limit; > + > + if (kvm_is_realm(kvm)) > + ipa_limit = kvm_realm_ipa_limit(); > + > + if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) > + return -EINVAL; > + > + phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); > + if (phys_shift) { > + if (phys_shift > ipa_limit || > + phys_shift < ARM64_MIN_PARANGE_BITS) > + return -EINVAL; > + } else { > + phys_shift = KVM_PHYS_SHIFT; > + if (phys_shift > ipa_limit) { > + pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n", > + current->comm); > + return -EINVAL; > + } > + } > + > + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); > + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); > + kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); > + > + return 0; > +} > diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c > index f6b587bc116e..9f8c5a91b8fc 100644 > --- a/arch/arm64/kvm/rme.c > +++ b/arch/arm64/kvm/rme.c > @@ -5,9 +5,49 @@ > > #include <linux/kvm_host.h> > > +#include <asm/kvm_emulate.h> > +#include <asm/kvm_mmu.h> > #include <asm/rmi_cmds.h> > #include <asm/virt.h> > > +/************ FIXME: Copied from kvm/hyp/pgtable.c **********/ > +#include <asm/kvm_pgtable.h> > + > +struct kvm_pgtable_walk_data { > + struct kvm_pgtable *pgt; > + struct kvm_pgtable_walker *walker; > + > + u64 addr; > + u64 end; > +}; > + > +static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) > +{ > + u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */ > + u64 mask = BIT(pgt->ia_bits) - 1; > + > + return (addr & mask) >> shift; > +} > + > +static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) > +{ > + struct kvm_pgtable pgt = { > + .ia_bits = ia_bits, > + .start_level = start_level, > + }; > + > + return __kvm_pgd_page_idx(&pgt, -1ULL) + 1; > +} > + > +/******************/ > + > +static unsigned long rmm_feat_reg0; > + > +static bool rme_supports(unsigned long feature) > +{ > + return !!u64_get_bits(rmm_feat_reg0, feature); > +} > + > static int rmi_check_version(void) > { > struct arm_smccc_res res; > @@ -33,8 +73,319 @@ static int rmi_check_version(void) > return 0; > } > > +static unsigned long create_realm_feat_reg0(struct kvm *kvm) > +{ > + unsigned long ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr); > + u64 feat_reg0 = 0; > + > + int num_bps = u64_get_bits(rmm_feat_reg0, > + RMI_FEATURE_REGISTER_0_NUM_BPS); > + int num_wps = u64_get_bits(rmm_feat_reg0, > + RMI_FEATURE_REGISTER_0_NUM_WPS); > + > + feat_reg0 |= u64_encode_bits(ia_bits, RMI_FEATURE_REGISTER_0_S2SZ); > + feat_reg0 |= u64_encode_bits(num_bps, RMI_FEATURE_REGISTER_0_NUM_BPS); > + feat_reg0 |= u64_encode_bits(num_wps, RMI_FEATURE_REGISTER_0_NUM_WPS); > + > + return feat_reg0; > +} > + > +u32 kvm_realm_ipa_limit(void) > +{ > + return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_S2SZ); > +} > + > +static u32 get_start_level(struct kvm *kvm) > +{ > + long sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, kvm->arch.vtcr); > + > + return VTCR_EL2_TGRAN_SL0_BASE - sl0; > +} > + > +static int realm_create_rd(struct kvm *kvm) > +{ > + struct realm *realm = &kvm->arch.realm; > + struct realm_params *params = realm->params; > + void *rd = NULL; > + phys_addr_t rd_phys, params_phys; > + struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; > + unsigned int pgd_sz; > + int i, r; > + > + if (WARN_ON(realm->rd) || WARN_ON(!realm->params)) > + return -EEXIST; > + > + rd = (void *)__get_free_page(GFP_KERNEL); > + if (!rd) > + return -ENOMEM; > + > + rd_phys = virt_to_phys(rd); > + if (rmi_granule_delegate(rd_phys)) { > + r = -ENXIO; > + goto out; > + } > + > + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level); > + for (i = 0; i < pgd_sz; i++) { > + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE; > + > + if (rmi_granule_delegate(pgd_phys)) { > + r = -ENXIO; > + goto out_undelegate_tables; > + } > + } > + > + params->rtt_level_start = get_start_level(kvm); > + params->rtt_num_start = pgd_sz; > + params->rtt_base = kvm->arch.mmu.pgd_phys; > + params->vmid = realm->vmid; > + > + params_phys = virt_to_phys(params); > + > + if (rmi_realm_create(rd_phys, params_phys)) { > + r = -ENXIO; > + goto out_undelegate_tables; > + } > + > + realm->rd = rd; > + realm->ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr); > + > + if (WARN_ON(rmi_rec_aux_count(rd_phys, &realm->num_aux))) { > + WARN_ON(rmi_realm_destroy(rd_phys)); > + goto out_undelegate_tables; > + } > + > + return 0; > + > +out_undelegate_tables: > + while (--i >= 0) { > + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE; > + > + WARN_ON(rmi_granule_undelegate(pgd_phys)); > + } > + WARN_ON(rmi_granule_undelegate(rd_phys)); > +out: > + free_page((unsigned long)rd); > + return r; > +} > + > +/* Protects access to rme_vmid_bitmap */ > +static DEFINE_SPINLOCK(rme_vmid_lock); > +static unsigned long *rme_vmid_bitmap; > + > +static int rme_vmid_init(void) > +{ > + unsigned int vmid_count = 1 << kvm_get_vmid_bits(); > + > + rme_vmid_bitmap = bitmap_zalloc(vmid_count, GFP_KERNEL); > + if (!rme_vmid_bitmap) { > + kvm_err("%s: Couldn't allocate rme vmid bitmap\n", __func__); > + return -ENOMEM; > + } > + > + return 0; > +} > + > +static int rme_vmid_reserve(void) > +{ > + int ret; > + unsigned int vmid_count = 1 << kvm_get_vmid_bits(); > + > + spin_lock(&rme_vmid_lock); > + ret = bitmap_find_free_region(rme_vmid_bitmap, vmid_count, 0); > + spin_unlock(&rme_vmid_lock); > + > + return ret; > +} > + > +static void rme_vmid_release(unsigned int vmid) > +{ > + spin_lock(&rme_vmid_lock); > + bitmap_release_region(rme_vmid_bitmap, vmid, 0); > + spin_unlock(&rme_vmid_lock); > +} > + > +static int kvm_create_realm(struct kvm *kvm) > +{ > + struct realm *realm = &kvm->arch.realm; > + int ret; > + > + if (!kvm_is_realm(kvm) || kvm_realm_state(kvm) != REALM_STATE_NONE) > + return -EEXIST; > + > + ret = rme_vmid_reserve(); > + if (ret < 0) > + return ret; > + realm->vmid = ret; > + > + ret = realm_create_rd(kvm); > + if (ret) { > + rme_vmid_release(realm->vmid); > + return ret; > + } > + > + WRITE_ONCE(realm->state, REALM_STATE_NEW); > + > + /* The realm is up, free the parameters. */ > + free_page((unsigned long)realm->params); > + realm->params = NULL; > + > + return 0; > +} > + > +static int config_realm_hash_algo(struct realm *realm, > + struct kvm_cap_arm_rme_config_item *cfg) > +{ > + switch (cfg->hash_algo) { > + case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA256: > + if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_256)) > + return -EINVAL; > + break; > + case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA512: > + if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_512)) > + return -EINVAL; > + break; > + default: > + return -EINVAL; > + } > + realm->params->measurement_algo = cfg->hash_algo; > + return 0; > +} > + > +static int config_realm_sve(struct realm *realm, > + struct kvm_cap_arm_rme_config_item *cfg) > +{ > + u64 features_0 = realm->params->features_0; > + int max_sve_vq = u64_get_bits(rmm_feat_reg0, > + RMI_FEATURE_REGISTER_0_SVE_VL); > + > + if (!rme_supports(RMI_FEATURE_REGISTER_0_SVE_EN)) > + return -EINVAL; > + > + if (cfg->sve_vq > max_sve_vq) > + return -EINVAL; > + > + features_0 &= ~(RMI_FEATURE_REGISTER_0_SVE_EN | > + RMI_FEATURE_REGISTER_0_SVE_VL); > + features_0 |= u64_encode_bits(1, RMI_FEATURE_REGISTER_0_SVE_EN); > + features_0 |= u64_encode_bits(cfg->sve_vq, > + RMI_FEATURE_REGISTER_0_SVE_VL); > + > + realm->params->features_0 = features_0; > + return 0; > +} > + > +static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap) > +{ > + struct kvm_cap_arm_rme_config_item cfg; > + struct realm *realm = &kvm->arch.realm; > + int r = 0; > + > + if (kvm_realm_state(kvm) != REALM_STATE_NONE) > + return -EBUSY; > + > + if (copy_from_user(&cfg, (void __user *)cap->args[1], sizeof(cfg))) > + return -EFAULT; > + > + switch (cfg.cfg) { > + case KVM_CAP_ARM_RME_CFG_RPV: > + memcpy(&realm->params->rpv, &cfg.rpv, sizeof(cfg.rpv)); > + break; > + case KVM_CAP_ARM_RME_CFG_HASH_ALGO: > + r = config_realm_hash_algo(realm, &cfg); > + break; > + case KVM_CAP_ARM_RME_CFG_SVE: > + r = config_realm_sve(realm, &cfg); > + break; > + default: > + r = -EINVAL; > + } > + > + return r; > +} > + > +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) > +{ > + int r = 0; > + > + switch (cap->args[0]) { > + case KVM_CAP_ARM_RME_CONFIG_REALM: > + r = kvm_rme_config_realm(kvm, cap); > + break; > + case KVM_CAP_ARM_RME_CREATE_RD: > + if (kvm->created_vcpus) { > + r = -EBUSY; > + break; > + } > + > + r = kvm_create_realm(kvm); > + break; > + default: > + r = -EINVAL; > + break; > + } > + > + return r; > +} > + > +void kvm_destroy_realm(struct kvm *kvm) > +{ > + struct realm *realm = &kvm->arch.realm; > + struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; > + unsigned int pgd_sz; > + int i; > + > + if (realm->params) { > + free_page((unsigned long)realm->params); > + realm->params = NULL; > + } > + > + if (kvm_realm_state(kvm) == REALM_STATE_NONE) > + return; > + > + WRITE_ONCE(realm->state, REALM_STATE_DYING); > + > + rme_vmid_release(realm->vmid); > + > + if (realm->rd) { > + phys_addr_t rd_phys = virt_to_phys(realm->rd); > + > + if (WARN_ON(rmi_realm_destroy(rd_phys))) > + return; > + if (WARN_ON(rmi_granule_undelegate(rd_phys))) > + return; > + free_page((unsigned long)realm->rd); > + realm->rd = NULL; > + } > + > + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level); > + for (i = 0; i < pgd_sz; i++) { > + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE; > + > + if (WARN_ON(rmi_granule_undelegate(pgd_phys))) > + return; > + } > + > + kvm_free_stage2_pgd(&kvm->arch.mmu); > +} > + > +int kvm_init_realm_vm(struct kvm *kvm) > +{ > + struct realm_params *params; > + > + params = (struct realm_params *)get_zeroed_page(GFP_KERNEL); > + if (!params) > + return -ENOMEM; > + > + params->features_0 = create_realm_feat_reg0(kvm); > + kvm->arch.realm.params = params; > + return 0; > +} > + > int kvm_init_rme(void) > { > + int ret; > + > if (PAGE_SIZE != SZ_4K) > /* Only 4k page size on the host is supported */ > return 0; > @@ -43,6 +394,12 @@ int kvm_init_rme(void) > /* Continue without realm support */ > return 0; > > + ret = rme_vmid_init(); > + if (ret) > + return ret; > + > + WARN_ON(rmi_features(0, &rmm_feat_reg0)); Why WARN_ON, Is that good enough to print err/info message and keep "kvm_rme_is_available" disabled? IMO, we should print message when rme is enabled, otherwise it should be silent return. > + > /* Future patch will enable static branch kvm_rme_is_available */ > > return 0; Thanks, Ganapat
Thanks for taking a look at this. On 18/03/2024 07:40, Ganapatrao Kulkarni wrote: > On 27-01-2023 04:59 pm, Steven Price wrote: [...] >> int kvm_init_rme(void) >> { >> + int ret; >> + >> if (PAGE_SIZE != SZ_4K) >> /* Only 4k page size on the host is supported */ >> return 0; >> @@ -43,6 +394,12 @@ int kvm_init_rme(void) >> /* Continue without realm support */ >> return 0; >> + ret = rme_vmid_init(); >> + if (ret) >> + return ret; >> + >> + WARN_ON(rmi_features(0, &rmm_feat_reg0)); > > Why WARN_ON, Is that good enough to print err/info message and keep > "kvm_rme_is_available" disabled? Good point. RMI_FEATURES "does not have any failure conditions" so this is very much a "should never happen" situation. Assuming the call gracefully fails then rmm_feat_reg0 would remain 0 which would in practise stop realms being created, but this is clearly non-ideal. I'll fix this up in the next version to do the rmi_features() call before rme_vmid_init(), that way we can just return early without setting kvm_rme_is_available in this situation. I'll keep the WARN_ON because something has gone very wrong if this call fails. > IMO, we should print message when rme is enabled, otherwise it should be > silent return. The rmi_check_version() call already outputs a "RMI ABI version %d.%d" message - I don't want to be too noisy here. Other than the 'cannot happen' situations if you see the "RMI ABI" message then kvm_rme_is_available will be set. And those 'cannot happen' routes will print their own error message (and point to a seriously broken system). And obviously in the case of SMC_RMI_VERSION not being supported then we silently return as this is taken to mean there isn't an RMM. Thanks, Steve
diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h index c26bc2c6770d..055a22accc08 100644 --- a/arch/arm64/include/asm/kvm_rme.h +++ b/arch/arm64/include/asm/kvm_rme.h @@ -6,6 +6,8 @@ #ifndef __ASM_KVM_RME_H #define __ASM_KVM_RME_H +#include <uapi/linux/kvm.h> + enum realm_state { REALM_STATE_NONE, REALM_STATE_NEW, @@ -15,8 +17,20 @@ enum realm_state { struct realm { enum realm_state state; + + void *rd; + struct realm_params *params; + + unsigned long num_aux; + unsigned int vmid; + unsigned int ia_bits; }; int kvm_init_rme(void); +u32 kvm_realm_ipa_limit(void); + +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); +int kvm_init_realm_vm(struct kvm *kvm); +void kvm_destroy_realm(struct kvm *kvm); #endif diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index d97b39d042ab..50f54a63732a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -103,6 +103,13 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, r = 0; set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags); break; + case KVM_CAP_ARM_RME: + if (!static_branch_unlikely(&kvm_rme_is_available)) + return -EINVAL; + mutex_lock(&kvm->lock); + r = kvm_realm_enable_cap(kvm, cap); + mutex_unlock(&kvm->lock); + break; default: r = -EINVAL; break; @@ -172,6 +179,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) */ kvm->arch.dfr0_pmuver.imp = kvm_arm_pmu_get_pmuver_limit(); + /* Initialise the realm bits after the generic bits are enabled */ + if (kvm_is_realm(kvm)) { + ret = kvm_init_realm_vm(kvm); + if (ret) + goto err_free_cpumask; + } + return 0; err_free_cpumask: @@ -204,6 +218,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_destroy_vcpus(kvm); kvm_unshare_hyp(kvm, kvm + 1); + + kvm_destroy_realm(kvm); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -300,6 +316,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_PTRAUTH_GENERIC: r = system_has_full_ptr_auth(); break; + case KVM_CAP_ARM_RME: + r = static_key_enabled(&kvm_rme_is_available); + break; default: r = 0; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 31d7fa4c7c14..d0f707767d05 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -840,6 +840,12 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) struct kvm_pgtable *pgt = NULL; write_lock(&kvm->mmu_lock); + if (kvm_is_realm(kvm) && + kvm_realm_state(kvm) != REALM_STATE_DYING) { + /* TODO: teardown rtts */ + write_unlock(&kvm->mmu_lock); + return; + } pgt = mmu->pgt; if (pgt) { mmu->pgd_phys = 0; diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index e0267f672b8a..c165df174737 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -395,3 +395,36 @@ int kvm_set_ipa_limit(void) return 0; } + +int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) +{ + u64 mmfr0, mmfr1; + u32 phys_shift; + u32 ipa_limit = kvm_ipa_limit; + + if (kvm_is_realm(kvm)) + ipa_limit = kvm_realm_ipa_limit(); + + if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) + return -EINVAL; + + phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); + if (phys_shift) { + if (phys_shift > ipa_limit || + phys_shift < ARM64_MIN_PARANGE_BITS) + return -EINVAL; + } else { + phys_shift = KVM_PHYS_SHIFT; + if (phys_shift > ipa_limit) { + pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n", + current->comm); + return -EINVAL; + } + } + + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); + + return 0; +} diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index f6b587bc116e..9f8c5a91b8fc 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -5,9 +5,49 @@ #include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_mmu.h> #include <asm/rmi_cmds.h> #include <asm/virt.h> +/************ FIXME: Copied from kvm/hyp/pgtable.c **********/ +#include <asm/kvm_pgtable.h> + +struct kvm_pgtable_walk_data { + struct kvm_pgtable *pgt; + struct kvm_pgtable_walker *walker; + + u64 addr; + u64 end; +}; + +static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) +{ + u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */ + u64 mask = BIT(pgt->ia_bits) - 1; + + return (addr & mask) >> shift; +} + +static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) +{ + struct kvm_pgtable pgt = { + .ia_bits = ia_bits, + .start_level = start_level, + }; + + return __kvm_pgd_page_idx(&pgt, -1ULL) + 1; +} + +/******************/ + +static unsigned long rmm_feat_reg0; + +static bool rme_supports(unsigned long feature) +{ + return !!u64_get_bits(rmm_feat_reg0, feature); +} + static int rmi_check_version(void) { struct arm_smccc_res res; @@ -33,8 +73,319 @@ static int rmi_check_version(void) return 0; } +static unsigned long create_realm_feat_reg0(struct kvm *kvm) +{ + unsigned long ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr); + u64 feat_reg0 = 0; + + int num_bps = u64_get_bits(rmm_feat_reg0, + RMI_FEATURE_REGISTER_0_NUM_BPS); + int num_wps = u64_get_bits(rmm_feat_reg0, + RMI_FEATURE_REGISTER_0_NUM_WPS); + + feat_reg0 |= u64_encode_bits(ia_bits, RMI_FEATURE_REGISTER_0_S2SZ); + feat_reg0 |= u64_encode_bits(num_bps, RMI_FEATURE_REGISTER_0_NUM_BPS); + feat_reg0 |= u64_encode_bits(num_wps, RMI_FEATURE_REGISTER_0_NUM_WPS); + + return feat_reg0; +} + +u32 kvm_realm_ipa_limit(void) +{ + return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_S2SZ); +} + +static u32 get_start_level(struct kvm *kvm) +{ + long sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, kvm->arch.vtcr); + + return VTCR_EL2_TGRAN_SL0_BASE - sl0; +} + +static int realm_create_rd(struct kvm *kvm) +{ + struct realm *realm = &kvm->arch.realm; + struct realm_params *params = realm->params; + void *rd = NULL; + phys_addr_t rd_phys, params_phys; + struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; + unsigned int pgd_sz; + int i, r; + + if (WARN_ON(realm->rd) || WARN_ON(!realm->params)) + return -EEXIST; + + rd = (void *)__get_free_page(GFP_KERNEL); + if (!rd) + return -ENOMEM; + + rd_phys = virt_to_phys(rd); + if (rmi_granule_delegate(rd_phys)) { + r = -ENXIO; + goto out; + } + + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level); + for (i = 0; i < pgd_sz; i++) { + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE; + + if (rmi_granule_delegate(pgd_phys)) { + r = -ENXIO; + goto out_undelegate_tables; + } + } + + params->rtt_level_start = get_start_level(kvm); + params->rtt_num_start = pgd_sz; + params->rtt_base = kvm->arch.mmu.pgd_phys; + params->vmid = realm->vmid; + + params_phys = virt_to_phys(params); + + if (rmi_realm_create(rd_phys, params_phys)) { + r = -ENXIO; + goto out_undelegate_tables; + } + + realm->rd = rd; + realm->ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr); + + if (WARN_ON(rmi_rec_aux_count(rd_phys, &realm->num_aux))) { + WARN_ON(rmi_realm_destroy(rd_phys)); + goto out_undelegate_tables; + } + + return 0; + +out_undelegate_tables: + while (--i >= 0) { + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE; + + WARN_ON(rmi_granule_undelegate(pgd_phys)); + } + WARN_ON(rmi_granule_undelegate(rd_phys)); +out: + free_page((unsigned long)rd); + return r; +} + +/* Protects access to rme_vmid_bitmap */ +static DEFINE_SPINLOCK(rme_vmid_lock); +static unsigned long *rme_vmid_bitmap; + +static int rme_vmid_init(void) +{ + unsigned int vmid_count = 1 << kvm_get_vmid_bits(); + + rme_vmid_bitmap = bitmap_zalloc(vmid_count, GFP_KERNEL); + if (!rme_vmid_bitmap) { + kvm_err("%s: Couldn't allocate rme vmid bitmap\n", __func__); + return -ENOMEM; + } + + return 0; +} + +static int rme_vmid_reserve(void) +{ + int ret; + unsigned int vmid_count = 1 << kvm_get_vmid_bits(); + + spin_lock(&rme_vmid_lock); + ret = bitmap_find_free_region(rme_vmid_bitmap, vmid_count, 0); + spin_unlock(&rme_vmid_lock); + + return ret; +} + +static void rme_vmid_release(unsigned int vmid) +{ + spin_lock(&rme_vmid_lock); + bitmap_release_region(rme_vmid_bitmap, vmid, 0); + spin_unlock(&rme_vmid_lock); +} + +static int kvm_create_realm(struct kvm *kvm) +{ + struct realm *realm = &kvm->arch.realm; + int ret; + + if (!kvm_is_realm(kvm) || kvm_realm_state(kvm) != REALM_STATE_NONE) + return -EEXIST; + + ret = rme_vmid_reserve(); + if (ret < 0) + return ret; + realm->vmid = ret; + + ret = realm_create_rd(kvm); + if (ret) { + rme_vmid_release(realm->vmid); + return ret; + } + + WRITE_ONCE(realm->state, REALM_STATE_NEW); + + /* The realm is up, free the parameters. */ + free_page((unsigned long)realm->params); + realm->params = NULL; + + return 0; +} + +static int config_realm_hash_algo(struct realm *realm, + struct kvm_cap_arm_rme_config_item *cfg) +{ + switch (cfg->hash_algo) { + case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA256: + if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_256)) + return -EINVAL; + break; + case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA512: + if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_512)) + return -EINVAL; + break; + default: + return -EINVAL; + } + realm->params->measurement_algo = cfg->hash_algo; + return 0; +} + +static int config_realm_sve(struct realm *realm, + struct kvm_cap_arm_rme_config_item *cfg) +{ + u64 features_0 = realm->params->features_0; + int max_sve_vq = u64_get_bits(rmm_feat_reg0, + RMI_FEATURE_REGISTER_0_SVE_VL); + + if (!rme_supports(RMI_FEATURE_REGISTER_0_SVE_EN)) + return -EINVAL; + + if (cfg->sve_vq > max_sve_vq) + return -EINVAL; + + features_0 &= ~(RMI_FEATURE_REGISTER_0_SVE_EN | + RMI_FEATURE_REGISTER_0_SVE_VL); + features_0 |= u64_encode_bits(1, RMI_FEATURE_REGISTER_0_SVE_EN); + features_0 |= u64_encode_bits(cfg->sve_vq, + RMI_FEATURE_REGISTER_0_SVE_VL); + + realm->params->features_0 = features_0; + return 0; +} + +static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap) +{ + struct kvm_cap_arm_rme_config_item cfg; + struct realm *realm = &kvm->arch.realm; + int r = 0; + + if (kvm_realm_state(kvm) != REALM_STATE_NONE) + return -EBUSY; + + if (copy_from_user(&cfg, (void __user *)cap->args[1], sizeof(cfg))) + return -EFAULT; + + switch (cfg.cfg) { + case KVM_CAP_ARM_RME_CFG_RPV: + memcpy(&realm->params->rpv, &cfg.rpv, sizeof(cfg.rpv)); + break; + case KVM_CAP_ARM_RME_CFG_HASH_ALGO: + r = config_realm_hash_algo(realm, &cfg); + break; + case KVM_CAP_ARM_RME_CFG_SVE: + r = config_realm_sve(realm, &cfg); + break; + default: + r = -EINVAL; + } + + return r; +} + +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) +{ + int r = 0; + + switch (cap->args[0]) { + case KVM_CAP_ARM_RME_CONFIG_REALM: + r = kvm_rme_config_realm(kvm, cap); + break; + case KVM_CAP_ARM_RME_CREATE_RD: + if (kvm->created_vcpus) { + r = -EBUSY; + break; + } + + r = kvm_create_realm(kvm); + break; + default: + r = -EINVAL; + break; + } + + return r; +} + +void kvm_destroy_realm(struct kvm *kvm) +{ + struct realm *realm = &kvm->arch.realm; + struct kvm_pgtable *pgt = kvm->arch.mmu.pgt; + unsigned int pgd_sz; + int i; + + if (realm->params) { + free_page((unsigned long)realm->params); + realm->params = NULL; + } + + if (kvm_realm_state(kvm) == REALM_STATE_NONE) + return; + + WRITE_ONCE(realm->state, REALM_STATE_DYING); + + rme_vmid_release(realm->vmid); + + if (realm->rd) { + phys_addr_t rd_phys = virt_to_phys(realm->rd); + + if (WARN_ON(rmi_realm_destroy(rd_phys))) + return; + if (WARN_ON(rmi_granule_undelegate(rd_phys))) + return; + free_page((unsigned long)realm->rd); + realm->rd = NULL; + } + + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level); + for (i = 0; i < pgd_sz; i++) { + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE; + + if (WARN_ON(rmi_granule_undelegate(pgd_phys))) + return; + } + + kvm_free_stage2_pgd(&kvm->arch.mmu); +} + +int kvm_init_realm_vm(struct kvm *kvm) +{ + struct realm_params *params; + + params = (struct realm_params *)get_zeroed_page(GFP_KERNEL); + if (!params) + return -ENOMEM; + + params->features_0 = create_realm_feat_reg0(kvm); + kvm->arch.realm.params = params; + return 0; +} + int kvm_init_rme(void) { + int ret; + if (PAGE_SIZE != SZ_4K) /* Only 4k page size on the host is supported */ return 0; @@ -43,6 +394,12 @@ int kvm_init_rme(void) /* Continue without realm support */ return 0; + ret = rme_vmid_init(); + if (ret) + return ret; + + WARN_ON(rmi_features(0, &rmm_feat_reg0)); + /* Future patch will enable static branch kvm_rme_is_available */ return 0;
Add the KVM_CAP_ARM_RME_CREATE_FD ioctl to create a realm. This involves delegating pages to the RMM to hold the Realm Descriptor (RD) and for the base level of the Realm Translation Tables (RTT). A VMID also need to be picked, since the RMM has a separate VMID address space a dedicated allocator is added for this purpose. KVM_CAP_ARM_RME_CONFIG_REALM is provided to allow configuring the realm before it is created. Signed-off-by: Steven Price <steven.price@arm.com> --- arch/arm64/include/asm/kvm_rme.h | 14 ++ arch/arm64/kvm/arm.c | 19 ++ arch/arm64/kvm/mmu.c | 6 + arch/arm64/kvm/reset.c | 33 +++ arch/arm64/kvm/rme.c | 357 +++++++++++++++++++++++++++++++ 5 files changed, 429 insertions(+)