Message ID | 20240821153844.60084-19-steven.price@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: Support for Arm CCA in KVM | expand |
Steven Price <steven.price@arm.com> writes: > Entering a realm is done using a SMC call to the RMM. On exit the > exit-codes need to be handled slightly differently to the normal KVM > path so define our own functions for realm enter/exit and hook them > in if the guest is a realm guest. > > Signed-off-by: Steven Price <steven.price@arm.com> > --- > Changes since v2: > * realm_set_ipa_state() now provides an output parameter for the > top_iap that was changed. Use this to signal the VMM with the correct > range that has been transitioned. > * Adapt to previous patch changes. > --- > arch/arm64/include/asm/kvm_rme.h | 3 + > arch/arm64/kvm/Makefile | 2 +- > arch/arm64/kvm/arm.c | 19 +++- > arch/arm64/kvm/rme-exit.c | 181 +++++++++++++++++++++++++++++++ > arch/arm64/kvm/rme.c | 11 ++ > 5 files changed, 210 insertions(+), 6 deletions(-) > create mode 100644 arch/arm64/kvm/rme-exit.c > > diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h > index c064bfb080ad..0e44b20cfa48 100644 > --- a/arch/arm64/include/asm/kvm_rme.h > +++ b/arch/arm64/include/asm/kvm_rme.h > @@ -96,6 +96,9 @@ void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits); > int kvm_create_rec(struct kvm_vcpu *vcpu); > void kvm_destroy_rec(struct kvm_vcpu *vcpu); > > +int kvm_rec_enter(struct kvm_vcpu *vcpu); > +int handle_rme_exit(struct kvm_vcpu *vcpu, int rec_run_status); > + > void kvm_realm_unmap_range(struct kvm *kvm, > unsigned long ipa, > u64 size, > diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile > index 5e79e5eee88d..9f893e86cac9 100644 > --- a/arch/arm64/kvm/Makefile > +++ b/arch/arm64/kvm/Makefile > @@ -21,7 +21,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ > vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \ > vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ > vgic/vgic-its.o vgic/vgic-debug.o \ > - rme.o > + rme.o rme-exit.o > > kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o > kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c > index 568e9e6e5a4e..e8dabb996705 100644 > --- a/arch/arm64/kvm/arm.c > +++ b/arch/arm64/kvm/arm.c > @@ -1282,7 +1282,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) > trace_kvm_entry(*vcpu_pc(vcpu)); > guest_timing_enter_irqoff(); > > - ret = kvm_arm_vcpu_enter_exit(vcpu); > + if (vcpu_is_rec(vcpu)) > + ret = kvm_rec_enter(vcpu); > + else > + ret = kvm_arm_vcpu_enter_exit(vcpu); > > vcpu->mode = OUTSIDE_GUEST_MODE; > vcpu->stat.exits++; > @@ -1336,10 +1339,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) > > local_irq_enable(); > > - trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); > - > /* Exit types that need handling before we can be preempted */ > - handle_exit_early(vcpu, ret); > + if (!vcpu_is_rec(vcpu)) { > + trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), > + *vcpu_pc(vcpu)); > + > + handle_exit_early(vcpu, ret); > + } > > preempt_enable(); > > @@ -1362,7 +1368,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) > ret = ARM_EXCEPTION_IL; > } > > - ret = handle_exit(vcpu, ret); > + if (vcpu_is_rec(vcpu)) > + ret = handle_rme_exit(vcpu, ret); > + else > + ret = handle_exit(vcpu, ret); > } > like kvm_rec_enter, should we name this as handle_rec_exit()? arch/arm64/include/asm/kvm_rme.h | 2 +- arch/arm64/kvm/arm.c | 2 +- arch/arm64/kvm/rme-exit.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h index a72e06cf4ea6..cd42c19ca21d 100644 --- a/arch/arm64/include/asm/kvm_rme.h +++ b/arch/arm64/include/asm/kvm_rme.h @@ -102,7 +102,7 @@ int kvm_create_rec(struct kvm_vcpu *vcpu); void kvm_destroy_rec(struct kvm_vcpu *vcpu); int kvm_rec_enter(struct kvm_vcpu *vcpu); -int handle_rme_exit(struct kvm_vcpu *vcpu, int rec_run_status); +int handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_status); void kvm_realm_unmap_range(struct kvm *kvm, unsigned long ipa, diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 05d9062470c2..1e34541d88db 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1391,7 +1391,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) } if (vcpu_is_rec(vcpu)) - ret = handle_rme_exit(vcpu, ret); + ret = handle_rec_exit(vcpu, ret); else ret = handle_exit(vcpu, ret); } diff --git a/arch/arm64/kvm/rme-exit.c b/arch/arm64/kvm/rme-exit.c index 0940575b0a14..f888cfe72dfa 100644 --- a/arch/arm64/kvm/rme-exit.c +++ b/arch/arm64/kvm/rme-exit.c @@ -156,7 +156,7 @@ static void update_arch_timer_irq_lines(struct kvm_vcpu *vcpu) * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on * proper exit to userspace. */ -int handle_rme_exit(struct kvm_vcpu *vcpu, int rec_run_ret) +int handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_ret) { struct realm_rec *rec = &vcpu->arch.rec; u8 esr_ec = ESR_ELx_EC(rec->run->exit.esr); --
Steven Price <steven.price@arm.com> writes: > + /* Exit to VMM to complete the change */ > + kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false, > + ripas == 1); > + s/1/RMI_RAM ? May be we can make it an enum like rsi_ripas modified arch/arm64/include/asm/rmi_smc.h @@ -62,9 +62,11 @@ #define RMI_ERROR_REC 3 #define RMI_ERROR_RTT 4 -#define RMI_EMPTY 0 -#define RMI_RAM 1 -#define RMI_DESTROYED 2 +enum rmi_ripas { + RMI_EMPTY, + RMI_RAM, + RMI_DESTROYED, +}; #define RMI_NO_MEASURE_CONTENT 0 #define RMI_MEASURE_CONTENT 1 modified arch/arm64/kvm/rme-exit.c @@ -112,7 +112,7 @@ static int rec_exit_ripas_change(struct kvm_vcpu *vcpu) /* Exit to VMM to complete the change */ kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false, - ripas == 1); + ripas == RMI_RAM); return 0; }
Steven Price <steven.price@arm.com> writes: .... > +static int rec_exit_ripas_change(struct kvm_vcpu *vcpu) > +{ > + struct kvm *kvm = vcpu->kvm; > + struct realm *realm = &kvm->arch.realm; > + struct realm_rec *rec = &vcpu->arch.rec; > + unsigned long base = rec->run->exit.ripas_base; > + unsigned long top = rec->run->exit.ripas_top; > + unsigned long ripas = rec->run->exit.ripas_value & 1; > + unsigned long top_ipa; > + int ret = -EINVAL; > + > + if (realm_is_addr_protected(realm, base) && > + realm_is_addr_protected(realm, top - 1)) { > + kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_page_cache, > + kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu)); > + write_lock(&kvm->mmu_lock); > + ret = realm_set_ipa_state(vcpu, base, top, ripas, &top_ipa); > + write_unlock(&kvm->mmu_lock); > + } > + > + WARN(ret && ret != -ENOMEM, > + "Unable to satisfy SET_IPAS for %#lx - %#lx, ripas: %#lx\n", > + base, top, ripas); > + > + /* Exit to VMM to complete the change */ > + kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false, > + ripas == 1); > + > + return 0; > +} > + arch/arm64/kvm/rme-exit.c:100:6: warning: variable 'top_ipa' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] if (realm_is_addr_protected(realm, base) && ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/arm64/kvm/rme-exit.c:114:44: note: uninitialized use occurs here kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false, ^~~~~~~ -aneesh
Hi Steven, kernel test robot noticed the following build warnings: [auto build test WARNING on kvmarm/next] [also build test WARNING on kvm/queue arm64/for-next/core linus/master v6.11-rc4 next-20240822] [cannot apply to kvm/linux-next] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Steven-Price/KVM-Prepare-for-handling-only-shared-mappings-in-mmu_notifier-events/20240821-235327 base: https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git next patch link: https://lore.kernel.org/r/20240821153844.60084-19-steven.price%40arm.com patch subject: [PATCH v4 18/43] arm64: RME: Handle realm enter/exit config: arm64-allmodconfig (https://download.01.org/0day-ci/archive/20240822/202408222119.6YF3UR6O-lkp@intel.com/config) compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project 26670e7fa4f032a019d23d56c6a02926e854e8af) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240822/202408222119.6YF3UR6O-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202408222119.6YF3UR6O-lkp@intel.com/ All warnings (new ones prefixed by >>): In file included from arch/arm64/kvm/rme-exit.c:6: In file included from include/linux/kvm_host.h:16: In file included from include/linux/mm.h:2228: include/linux/vmstat.h:500:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion] 500 | return vmstat_text[NR_VM_ZONE_STAT_ITEMS + | ~~~~~~~~~~~~~~~~~~~~~ ^ 501 | item]; | ~~~~ include/linux/vmstat.h:507:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion] 507 | return vmstat_text[NR_VM_ZONE_STAT_ITEMS + | ~~~~~~~~~~~~~~~~~~~~~ ^ 508 | NR_VM_NUMA_EVENT_ITEMS + | ~~~~~~~~~~~~~~~~~~~~~~ include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion] 514 | return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" | ~~~~~~~~~~~ ^ ~~~ include/linux/vmstat.h:519:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion] 519 | return vmstat_text[NR_VM_ZONE_STAT_ITEMS + | ~~~~~~~~~~~~~~~~~~~~~ ^ 520 | NR_VM_NUMA_EVENT_ITEMS + | ~~~~~~~~~~~~~~~~~~~~~~ include/linux/vmstat.h:528:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion] 528 | return vmstat_text[NR_VM_ZONE_STAT_ITEMS + | ~~~~~~~~~~~~~~~~~~~~~ ^ 529 | NR_VM_NUMA_EVENT_ITEMS + | ~~~~~~~~~~~~~~~~~~~~~~ arch/arm64/kvm/rme-exit.c:61:23: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides] 61 | [ESR_ELx_EC_SYS64] = rec_exit_sys_reg, | ^~~~~~~~~~~~~~~~ arch/arm64/kvm/rme-exit.c:60:27: note: previous initialization is here 60 | [0 ... ESR_ELx_EC_MAX] = rec_exit_reason_notimpl, | ^~~~~~~~~~~~~~~~~~~~~~~ arch/arm64/kvm/rme-exit.c:62:26: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides] 62 | [ESR_ELx_EC_DABT_LOW] = rec_exit_sync_dabt, | ^~~~~~~~~~~~~~~~~~ arch/arm64/kvm/rme-exit.c:60:27: note: previous initialization is here 60 | [0 ... ESR_ELx_EC_MAX] = rec_exit_reason_notimpl, | ^~~~~~~~~~~~~~~~~~~~~~~ arch/arm64/kvm/rme-exit.c:63:26: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides] 63 | [ESR_ELx_EC_IABT_LOW] = rec_exit_sync_iabt | ^~~~~~~~~~~~~~~~~~ arch/arm64/kvm/rme-exit.c:60:27: note: previous initialization is here 60 | [0 ... ESR_ELx_EC_MAX] = rec_exit_reason_notimpl, | ^~~~~~~~~~~~~~~~~~~~~~~ >> arch/arm64/kvm/rme-exit.c:94:6: warning: variable 'top_ipa' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] 94 | if (realm_is_addr_protected(realm, base) && | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 95 | realm_is_addr_protected(realm, top - 1)) { | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/arm64/kvm/rme-exit.c:108:44: note: uninitialized use occurs here 108 | kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false, | ^~~~~~~ arch/arm64/kvm/rme-exit.c:94:2: note: remove the 'if' if its condition is always true 94 | if (realm_is_addr_protected(realm, base) && | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 95 | realm_is_addr_protected(realm, top - 1)) { | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ >> arch/arm64/kvm/rme-exit.c:94:6: warning: variable 'top_ipa' is used uninitialized whenever '&&' condition is false [-Wsometimes-uninitialized] 94 | if (realm_is_addr_protected(realm, base) && | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/arm64/kvm/rme-exit.c:108:44: note: uninitialized use occurs here 108 | kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false, | ^~~~~~~ arch/arm64/kvm/rme-exit.c:94:6: note: remove the '&&' if its condition is always true 94 | if (realm_is_addr_protected(realm, base) && | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/arm64/kvm/rme-exit.c:91:23: note: initialize the variable 'top_ipa' to silence this warning 91 | unsigned long top_ipa; | ^ | = 0 10 warnings generated. vim +94 arch/arm64/kvm/rme-exit.c 58 59 static exit_handler_fn rec_exit_handlers[] = { > 60 [0 ... ESR_ELx_EC_MAX] = rec_exit_reason_notimpl, 61 [ESR_ELx_EC_SYS64] = rec_exit_sys_reg, 62 [ESR_ELx_EC_DABT_LOW] = rec_exit_sync_dabt, 63 [ESR_ELx_EC_IABT_LOW] = rec_exit_sync_iabt 64 }; 65 66 static int rec_exit_psci(struct kvm_vcpu *vcpu) 67 { 68 struct realm_rec *rec = &vcpu->arch.rec; 69 int i; 70 int ret; 71 72 for (i = 0; i < REC_RUN_GPRS; i++) 73 vcpu_set_reg(vcpu, i, rec->run->exit.gprs[i]); 74 75 ret = kvm_smccc_call_handler(vcpu); 76 77 for (i = 0; i < REC_RUN_GPRS; i++) 78 rec->run->enter.gprs[i] = vcpu_get_reg(vcpu, i); 79 80 return ret; 81 } 82 83 static int rec_exit_ripas_change(struct kvm_vcpu *vcpu) 84 { 85 struct kvm *kvm = vcpu->kvm; 86 struct realm *realm = &kvm->arch.realm; 87 struct realm_rec *rec = &vcpu->arch.rec; 88 unsigned long base = rec->run->exit.ripas_base; 89 unsigned long top = rec->run->exit.ripas_top; 90 unsigned long ripas = rec->run->exit.ripas_value & 1; 91 unsigned long top_ipa; 92 int ret = -EINVAL; 93 > 94 if (realm_is_addr_protected(realm, base) && 95 realm_is_addr_protected(realm, top - 1)) { 96 kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_page_cache, 97 kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu)); 98 write_lock(&kvm->mmu_lock); 99 ret = realm_set_ipa_state(vcpu, base, top, ripas, &top_ipa); 100 write_unlock(&kvm->mmu_lock); 101 } 102 103 WARN(ret && ret != -ENOMEM, 104 "Unable to satisfy SET_IPAS for %#lx - %#lx, ripas: %#lx\n", 105 base, top, ripas); 106 107 /* Exit to VMM to complete the change */ > 108 kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false, 109 ripas == 1); 110 111 return 0; 112 } 113
On 22/08/2024 04:53, Aneesh Kumar K.V wrote: > Steven Price <steven.price@arm.com> writes: > >> Entering a realm is done using a SMC call to the RMM. On exit the >> exit-codes need to be handled slightly differently to the normal KVM >> path so define our own functions for realm enter/exit and hook them >> in if the guest is a realm guest. >> >> Signed-off-by: Steven Price <steven.price@arm.com> >> --- >> Changes since v2: >> * realm_set_ipa_state() now provides an output parameter for the >> top_iap that was changed. Use this to signal the VMM with the correct >> range that has been transitioned. >> * Adapt to previous patch changes. >> --- >> arch/arm64/include/asm/kvm_rme.h | 3 + >> arch/arm64/kvm/Makefile | 2 +- >> arch/arm64/kvm/arm.c | 19 +++- >> arch/arm64/kvm/rme-exit.c | 181 +++++++++++++++++++++++++++++++ >> arch/arm64/kvm/rme.c | 11 ++ >> 5 files changed, 210 insertions(+), 6 deletions(-) >> create mode 100644 arch/arm64/kvm/rme-exit.c >> >> diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h >> index c064bfb080ad..0e44b20cfa48 100644 >> --- a/arch/arm64/include/asm/kvm_rme.h >> +++ b/arch/arm64/include/asm/kvm_rme.h >> @@ -96,6 +96,9 @@ void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits); >> int kvm_create_rec(struct kvm_vcpu *vcpu); >> void kvm_destroy_rec(struct kvm_vcpu *vcpu); >> >> +int kvm_rec_enter(struct kvm_vcpu *vcpu); >> +int handle_rme_exit(struct kvm_vcpu *vcpu, int rec_run_status); >> + >> void kvm_realm_unmap_range(struct kvm *kvm, >> unsigned long ipa, >> u64 size, >> diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile >> index 5e79e5eee88d..9f893e86cac9 100644 >> --- a/arch/arm64/kvm/Makefile >> +++ b/arch/arm64/kvm/Makefile >> @@ -21,7 +21,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ >> vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \ >> vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ >> vgic/vgic-its.o vgic/vgic-debug.o \ >> - rme.o >> + rme.o rme-exit.o >> >> kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o >> kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o >> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c >> index 568e9e6e5a4e..e8dabb996705 100644 >> --- a/arch/arm64/kvm/arm.c >> +++ b/arch/arm64/kvm/arm.c >> @@ -1282,7 +1282,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) >> trace_kvm_entry(*vcpu_pc(vcpu)); >> guest_timing_enter_irqoff(); >> >> - ret = kvm_arm_vcpu_enter_exit(vcpu); >> + if (vcpu_is_rec(vcpu)) >> + ret = kvm_rec_enter(vcpu); >> + else >> + ret = kvm_arm_vcpu_enter_exit(vcpu); >> >> vcpu->mode = OUTSIDE_GUEST_MODE; >> vcpu->stat.exits++; >> @@ -1336,10 +1339,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) >> >> local_irq_enable(); >> >> - trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); >> - >> /* Exit types that need handling before we can be preempted */ >> - handle_exit_early(vcpu, ret); >> + if (!vcpu_is_rec(vcpu)) { >> + trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), >> + *vcpu_pc(vcpu)); >> + >> + handle_exit_early(vcpu, ret); >> + } >> >> preempt_enable(); >> >> @@ -1362,7 +1368,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) >> ret = ARM_EXCEPTION_IL; >> } >> >> - ret = handle_exit(vcpu, ret); >> + if (vcpu_is_rec(vcpu)) >> + ret = handle_rme_exit(vcpu, ret); >> + else >> + ret = handle_exit(vcpu, ret); >> } >> > > like kvm_rec_enter, should we name this as handle_rec_exit()? Fair enough, will rename. Steve > > arch/arm64/include/asm/kvm_rme.h | 2 +- > arch/arm64/kvm/arm.c | 2 +- > arch/arm64/kvm/rme-exit.c | 2 +- > 3 files changed, 3 insertions(+), 3 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h > index a72e06cf4ea6..cd42c19ca21d 100644 > --- a/arch/arm64/include/asm/kvm_rme.h > +++ b/arch/arm64/include/asm/kvm_rme.h > @@ -102,7 +102,7 @@ int kvm_create_rec(struct kvm_vcpu *vcpu); > void kvm_destroy_rec(struct kvm_vcpu *vcpu); > > int kvm_rec_enter(struct kvm_vcpu *vcpu); > -int handle_rme_exit(struct kvm_vcpu *vcpu, int rec_run_status); > +int handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_status); > > void kvm_realm_unmap_range(struct kvm *kvm, > unsigned long ipa, > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c > index 05d9062470c2..1e34541d88db 100644 > --- a/arch/arm64/kvm/arm.c > +++ b/arch/arm64/kvm/arm.c > @@ -1391,7 +1391,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) > } > > if (vcpu_is_rec(vcpu)) > - ret = handle_rme_exit(vcpu, ret); > + ret = handle_rec_exit(vcpu, ret); > else > ret = handle_exit(vcpu, ret); > } > diff --git a/arch/arm64/kvm/rme-exit.c b/arch/arm64/kvm/rme-exit.c > index 0940575b0a14..f888cfe72dfa 100644 > --- a/arch/arm64/kvm/rme-exit.c > +++ b/arch/arm64/kvm/rme-exit.c > @@ -156,7 +156,7 @@ static void update_arch_timer_irq_lines(struct kvm_vcpu *vcpu) > * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on > * proper exit to userspace. > */ > -int handle_rme_exit(struct kvm_vcpu *vcpu, int rec_run_ret) > +int handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_ret) > { > struct realm_rec *rec = &vcpu->arch.rec; > u8 esr_ec = ESR_ELx_EC(rec->run->exit.esr);
On 22/08/2024 04:58, Aneesh Kumar K.V wrote: > Steven Price <steven.price@arm.com> writes: > >> + /* Exit to VMM to complete the change */ >> + kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false, >> + ripas == 1); >> + > > > s/1/RMI_RAM ? Ah, good spot - I must have missed that when I added the definitions. > May be we can make it an enum like rsi_ripas I guess that makes sense - I tend to avoid enums where the value is controlled by something external, but here it makes some sense. Thanks, Steve > modified arch/arm64/include/asm/rmi_smc.h > @@ -62,9 +62,11 @@ > #define RMI_ERROR_REC 3 > #define RMI_ERROR_RTT 4 > > -#define RMI_EMPTY 0 > -#define RMI_RAM 1 > -#define RMI_DESTROYED 2 > +enum rmi_ripas { > + RMI_EMPTY, > + RMI_RAM, > + RMI_DESTROYED, > +}; > > #define RMI_NO_MEASURE_CONTENT 0 > #define RMI_MEASURE_CONTENT 1 > modified arch/arm64/kvm/rme-exit.c > @@ -112,7 +112,7 @@ static int rec_exit_ripas_change(struct kvm_vcpu *vcpu) > > /* Exit to VMM to complete the change */ > kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false, > - ripas == 1); > + ripas == RMI_RAM); > > return 0; > }
On 22/08/2024 05:04, Aneesh Kumar K.V wrote: > Steven Price <steven.price@arm.com> writes: > > .... > >> +static int rec_exit_ripas_change(struct kvm_vcpu *vcpu) >> +{ >> + struct kvm *kvm = vcpu->kvm; >> + struct realm *realm = &kvm->arch.realm; >> + struct realm_rec *rec = &vcpu->arch.rec; >> + unsigned long base = rec->run->exit.ripas_base; >> + unsigned long top = rec->run->exit.ripas_top; >> + unsigned long ripas = rec->run->exit.ripas_value & 1; >> + unsigned long top_ipa; >> + int ret = -EINVAL; >> + >> + if (realm_is_addr_protected(realm, base) && >> + realm_is_addr_protected(realm, top - 1)) { >> + kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_page_cache, >> + kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu)); >> + write_lock(&kvm->mmu_lock); >> + ret = realm_set_ipa_state(vcpu, base, top, ripas, &top_ipa); >> + write_unlock(&kvm->mmu_lock); >> + } >> + >> + WARN(ret && ret != -ENOMEM, >> + "Unable to satisfy SET_IPAS for %#lx - %#lx, ripas: %#lx\n", >> + base, top, ripas); >> + >> + /* Exit to VMM to complete the change */ >> + kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false, >> + ripas == 1); >> + >> + return 0; >> +} >> + > > arch/arm64/kvm/rme-exit.c:100:6: warning: variable 'top_ipa' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] > if (realm_is_addr_protected(realm, base) && > ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > arch/arm64/kvm/rme-exit.c:114:44: note: uninitialized use occurs here > kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false, > ^~~~~~~ I'm not sure why I didn't notice this before, if the condition is false then we're hitting the WARN (i.e. this shouldn't happen), but I should fix that up to handle the situation better. Steve
diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h index c064bfb080ad..0e44b20cfa48 100644 --- a/arch/arm64/include/asm/kvm_rme.h +++ b/arch/arm64/include/asm/kvm_rme.h @@ -96,6 +96,9 @@ void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits); int kvm_create_rec(struct kvm_vcpu *vcpu); void kvm_destroy_rec(struct kvm_vcpu *vcpu); +int kvm_rec_enter(struct kvm_vcpu *vcpu); +int handle_rme_exit(struct kvm_vcpu *vcpu, int rec_run_status); + void kvm_realm_unmap_range(struct kvm *kvm, unsigned long ipa, u64 size, diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 5e79e5eee88d..9f893e86cac9 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -21,7 +21,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \ vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ vgic/vgic-its.o vgic/vgic-debug.o \ - rme.o + rme.o rme-exit.o kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 568e9e6e5a4e..e8dabb996705 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1282,7 +1282,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) trace_kvm_entry(*vcpu_pc(vcpu)); guest_timing_enter_irqoff(); - ret = kvm_arm_vcpu_enter_exit(vcpu); + if (vcpu_is_rec(vcpu)) + ret = kvm_rec_enter(vcpu); + else + ret = kvm_arm_vcpu_enter_exit(vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->stat.exits++; @@ -1336,10 +1339,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) local_irq_enable(); - trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); - /* Exit types that need handling before we can be preempted */ - handle_exit_early(vcpu, ret); + if (!vcpu_is_rec(vcpu)) { + trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), + *vcpu_pc(vcpu)); + + handle_exit_early(vcpu, ret); + } preempt_enable(); @@ -1362,7 +1368,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) ret = ARM_EXCEPTION_IL; } - ret = handle_exit(vcpu, ret); + if (vcpu_is_rec(vcpu)) + ret = handle_rme_exit(vcpu, ret); + else + ret = handle_exit(vcpu, ret); } /* Tell userspace about in-kernel device output levels */ diff --git a/arch/arm64/kvm/rme-exit.c b/arch/arm64/kvm/rme-exit.c new file mode 100644 index 000000000000..f4e45d475798 --- /dev/null +++ b/arch/arm64/kvm/rme-exit.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 ARM Ltd. + */ + +#include <linux/kvm_host.h> +#include <kvm/arm_hypercalls.h> +#include <kvm/arm_psci.h> + +#include <asm/rmi_smc.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_rme.h> +#include <asm/kvm_mmu.h> + +typedef int (*exit_handler_fn)(struct kvm_vcpu *vcpu); + +static int rec_exit_reason_notimpl(struct kvm_vcpu *vcpu) +{ + struct realm_rec *rec = &vcpu->arch.rec; + + pr_err("[vcpu %d] Unhandled exit reason from realm (ESR: %#llx)\n", + vcpu->vcpu_id, rec->run->exit.esr); + return -ENXIO; +} + +static int rec_exit_sync_dabt(struct kvm_vcpu *vcpu) +{ + return kvm_handle_guest_abort(vcpu); +} + +static int rec_exit_sync_iabt(struct kvm_vcpu *vcpu) +{ + struct realm_rec *rec = &vcpu->arch.rec; + + pr_err("[vcpu %d] Unhandled instruction abort (ESR: %#llx).\n", + vcpu->vcpu_id, rec->run->exit.esr); + return -ENXIO; +} + +static int rec_exit_sys_reg(struct kvm_vcpu *vcpu) +{ + struct realm_rec *rec = &vcpu->arch.rec; + unsigned long esr = kvm_vcpu_get_esr(vcpu); + int rt = kvm_vcpu_sys_get_rt(vcpu); + bool is_write = !(esr & 1); + int ret; + + if (is_write) + vcpu_set_reg(vcpu, rt, rec->run->exit.gprs[0]); + + ret = kvm_handle_sys_reg(vcpu); + + if (ret >= 0 && !is_write) + rec->run->enter.gprs[0] = vcpu_get_reg(vcpu, rt); + + return ret; +} + +static exit_handler_fn rec_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = rec_exit_reason_notimpl, + [ESR_ELx_EC_SYS64] = rec_exit_sys_reg, + [ESR_ELx_EC_DABT_LOW] = rec_exit_sync_dabt, + [ESR_ELx_EC_IABT_LOW] = rec_exit_sync_iabt +}; + +static int rec_exit_psci(struct kvm_vcpu *vcpu) +{ + struct realm_rec *rec = &vcpu->arch.rec; + int i; + int ret; + + for (i = 0; i < REC_RUN_GPRS; i++) + vcpu_set_reg(vcpu, i, rec->run->exit.gprs[i]); + + ret = kvm_smccc_call_handler(vcpu); + + for (i = 0; i < REC_RUN_GPRS; i++) + rec->run->enter.gprs[i] = vcpu_get_reg(vcpu, i); + + return ret; +} + +static int rec_exit_ripas_change(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct realm *realm = &kvm->arch.realm; + struct realm_rec *rec = &vcpu->arch.rec; + unsigned long base = rec->run->exit.ripas_base; + unsigned long top = rec->run->exit.ripas_top; + unsigned long ripas = rec->run->exit.ripas_value & 1; + unsigned long top_ipa; + int ret = -EINVAL; + + if (realm_is_addr_protected(realm, base) && + realm_is_addr_protected(realm, top - 1)) { + kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_page_cache, + kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu)); + write_lock(&kvm->mmu_lock); + ret = realm_set_ipa_state(vcpu, base, top, ripas, &top_ipa); + write_unlock(&kvm->mmu_lock); + } + + WARN(ret && ret != -ENOMEM, + "Unable to satisfy SET_IPAS for %#lx - %#lx, ripas: %#lx\n", + base, top, ripas); + + /* Exit to VMM to complete the change */ + kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false, + ripas == 1); + + return 0; +} + +static void update_arch_timer_irq_lines(struct kvm_vcpu *vcpu) +{ + struct realm_rec *rec = &vcpu->arch.rec; + + __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = rec->run->exit.cntv_ctl; + __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = rec->run->exit.cntv_cval; + __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = rec->run->exit.cntp_ctl; + __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = rec->run->exit.cntp_cval; + + kvm_realm_timers_update(vcpu); +} + +/* + * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on + * proper exit to userspace. + */ +int handle_rme_exit(struct kvm_vcpu *vcpu, int rec_run_ret) +{ + struct realm_rec *rec = &vcpu->arch.rec; + u8 esr_ec = ESR_ELx_EC(rec->run->exit.esr); + unsigned long status, index; + + status = RMI_RETURN_STATUS(rec_run_ret); + index = RMI_RETURN_INDEX(rec_run_ret); + + /* + * If a PSCI_SYSTEM_OFF request raced with a vcpu executing, we might + * see the following status code and index indicating an attempt to run + * a REC when the RD state is SYSTEM_OFF. In this case, we just need to + * return to user space which can deal with the system event or will try + * to run the KVM VCPU again, at which point we will no longer attempt + * to enter the Realm because we will have a sleep request pending on + * the VCPU as a result of KVM's PSCI handling. + */ + if (status == RMI_ERROR_REALM && index == 1) { + vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; + return 0; + } + + if (rec_run_ret) + return -ENXIO; + + vcpu->arch.fault.esr_el2 = rec->run->exit.esr; + vcpu->arch.fault.far_el2 = rec->run->exit.far; + vcpu->arch.fault.hpfar_el2 = rec->run->exit.hpfar; + + update_arch_timer_irq_lines(vcpu); + + /* Reset the emulation flags for the next run of the REC */ + rec->run->enter.flags = 0; + + switch (rec->run->exit.exit_reason) { + case RMI_EXIT_SYNC: + return rec_exit_handlers[esr_ec](vcpu); + case RMI_EXIT_IRQ: + case RMI_EXIT_FIQ: + return 1; + case RMI_EXIT_PSCI: + return rec_exit_psci(vcpu); + case RMI_EXIT_RIPAS_CHANGE: + return rec_exit_ripas_change(vcpu); + } + + kvm_pr_unimpl("Unsupported exit reason: %u\n", + rec->run->exit.exit_reason); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return 0; +} diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index 1fa9991d708b..fd4162af551d 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -899,6 +899,17 @@ void kvm_destroy_realm(struct kvm *kvm) kvm_free_stage2_pgd(&kvm->arch.mmu); } +int kvm_rec_enter(struct kvm_vcpu *vcpu) +{ + struct realm_rec *rec = &vcpu->arch.rec; + + if (kvm_realm_state(vcpu->kvm) != REALM_STATE_ACTIVE) + return -EINVAL; + + return rmi_rec_enter(virt_to_phys(rec->rec_page), + virt_to_phys(rec->run)); +} + static void free_rec_aux(struct page **aux_pages, unsigned int num_aux) {
Entering a realm is done using a SMC call to the RMM. On exit the exit-codes need to be handled slightly differently to the normal KVM path so define our own functions for realm enter/exit and hook them in if the guest is a realm guest. Signed-off-by: Steven Price <steven.price@arm.com> --- Changes since v2: * realm_set_ipa_state() now provides an output parameter for the top_iap that was changed. Use this to signal the VMM with the correct range that has been transitioned. * Adapt to previous patch changes. --- arch/arm64/include/asm/kvm_rme.h | 3 + arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/arm.c | 19 +++- arch/arm64/kvm/rme-exit.c | 181 +++++++++++++++++++++++++++++++ arch/arm64/kvm/rme.c | 11 ++ 5 files changed, 210 insertions(+), 6 deletions(-) create mode 100644 arch/arm64/kvm/rme-exit.c