diff mbox series

[v4,18/43] arm64: RME: Handle realm enter/exit

Message ID 20240821153844.60084-19-steven.price@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64: Support for Arm CCA in KVM | expand

Commit Message

Steven Price Aug. 21, 2024, 3:38 p.m. UTC
Entering a realm is done using a SMC call to the RMM. On exit the
exit-codes need to be handled slightly differently to the normal KVM
path so define our own functions for realm enter/exit and hook them
in if the guest is a realm guest.

Signed-off-by: Steven Price <steven.price@arm.com>
---
Changes since v2:
 * realm_set_ipa_state() now provides an output parameter for the
   top_iap that was changed. Use this to signal the VMM with the correct
   range that has been transitioned.
 * Adapt to previous patch changes.
---
 arch/arm64/include/asm/kvm_rme.h |   3 +
 arch/arm64/kvm/Makefile          |   2 +-
 arch/arm64/kvm/arm.c             |  19 +++-
 arch/arm64/kvm/rme-exit.c        | 181 +++++++++++++++++++++++++++++++
 arch/arm64/kvm/rme.c             |  11 ++
 5 files changed, 210 insertions(+), 6 deletions(-)
 create mode 100644 arch/arm64/kvm/rme-exit.c

Comments

Aneesh Kumar K.V (Arm) Aug. 22, 2024, 3:53 a.m. UTC | #1
Steven Price <steven.price@arm.com> writes:

> Entering a realm is done using a SMC call to the RMM. On exit the
> exit-codes need to be handled slightly differently to the normal KVM
> path so define our own functions for realm enter/exit and hook them
> in if the guest is a realm guest.
>
> Signed-off-by: Steven Price <steven.price@arm.com>
> ---
> Changes since v2:
>  * realm_set_ipa_state() now provides an output parameter for the
>    top_iap that was changed. Use this to signal the VMM with the correct
>    range that has been transitioned.
>  * Adapt to previous patch changes.
> ---
>  arch/arm64/include/asm/kvm_rme.h |   3 +
>  arch/arm64/kvm/Makefile          |   2 +-
>  arch/arm64/kvm/arm.c             |  19 +++-
>  arch/arm64/kvm/rme-exit.c        | 181 +++++++++++++++++++++++++++++++
>  arch/arm64/kvm/rme.c             |  11 ++
>  5 files changed, 210 insertions(+), 6 deletions(-)
>  create mode 100644 arch/arm64/kvm/rme-exit.c
>
> diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h
> index c064bfb080ad..0e44b20cfa48 100644
> --- a/arch/arm64/include/asm/kvm_rme.h
> +++ b/arch/arm64/include/asm/kvm_rme.h
> @@ -96,6 +96,9 @@ void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits);
>  int kvm_create_rec(struct kvm_vcpu *vcpu);
>  void kvm_destroy_rec(struct kvm_vcpu *vcpu);
>  
> +int kvm_rec_enter(struct kvm_vcpu *vcpu);
> +int handle_rme_exit(struct kvm_vcpu *vcpu, int rec_run_status);
> +
>  void kvm_realm_unmap_range(struct kvm *kvm,
>  			   unsigned long ipa,
>  			   u64 size,
> diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> index 5e79e5eee88d..9f893e86cac9 100644
> --- a/arch/arm64/kvm/Makefile
> +++ b/arch/arm64/kvm/Makefile
> @@ -21,7 +21,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
>  	 vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \
>  	 vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \
>  	 vgic/vgic-its.o vgic/vgic-debug.o \
> -	 rme.o
> +	 rme.o rme-exit.o
>  
>  kvm-$(CONFIG_HW_PERF_EVENTS)  += pmu-emul.o pmu.o
>  kvm-$(CONFIG_ARM64_PTR_AUTH)  += pauth.o
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index 568e9e6e5a4e..e8dabb996705 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -1282,7 +1282,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
>  		trace_kvm_entry(*vcpu_pc(vcpu));
>  		guest_timing_enter_irqoff();
>  
> -		ret = kvm_arm_vcpu_enter_exit(vcpu);
> +		if (vcpu_is_rec(vcpu))
> +			ret = kvm_rec_enter(vcpu);
> +		else
> +			ret = kvm_arm_vcpu_enter_exit(vcpu);
>  
>  		vcpu->mode = OUTSIDE_GUEST_MODE;
>  		vcpu->stat.exits++;
> @@ -1336,10 +1339,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
>  
>  		local_irq_enable();
>  
> -		trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
> -
>  		/* Exit types that need handling before we can be preempted */
> -		handle_exit_early(vcpu, ret);
> +		if (!vcpu_is_rec(vcpu)) {
> +			trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu),
> +				       *vcpu_pc(vcpu));
> +
> +			handle_exit_early(vcpu, ret);
> +		}
>  
>  		preempt_enable();
>  
> @@ -1362,7 +1368,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
>  			ret = ARM_EXCEPTION_IL;
>  		}
>  
> -		ret = handle_exit(vcpu, ret);
> +		if (vcpu_is_rec(vcpu))
> +			ret = handle_rme_exit(vcpu, ret);
> +		else
> +			ret = handle_exit(vcpu, ret);
>  	}
>

like kvm_rec_enter, should we name this as handle_rec_exit()?

 arch/arm64/include/asm/kvm_rme.h | 2 +-
 arch/arm64/kvm/arm.c             | 2 +-
 arch/arm64/kvm/rme-exit.c        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h
index a72e06cf4ea6..cd42c19ca21d 100644
--- a/arch/arm64/include/asm/kvm_rme.h
+++ b/arch/arm64/include/asm/kvm_rme.h
@@ -102,7 +102,7 @@ int kvm_create_rec(struct kvm_vcpu *vcpu);
 void kvm_destroy_rec(struct kvm_vcpu *vcpu);
 
 int kvm_rec_enter(struct kvm_vcpu *vcpu);
-int handle_rme_exit(struct kvm_vcpu *vcpu, int rec_run_status);
+int handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_status);
 
 void kvm_realm_unmap_range(struct kvm *kvm,
 			   unsigned long ipa,
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 05d9062470c2..1e34541d88db 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1391,7 +1391,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		}
 
 		if (vcpu_is_rec(vcpu))
-			ret = handle_rme_exit(vcpu, ret);
+			ret = handle_rec_exit(vcpu, ret);
 		else
 			ret = handle_exit(vcpu, ret);
 	}
diff --git a/arch/arm64/kvm/rme-exit.c b/arch/arm64/kvm/rme-exit.c
index 0940575b0a14..f888cfe72dfa 100644
--- a/arch/arm64/kvm/rme-exit.c
+++ b/arch/arm64/kvm/rme-exit.c
@@ -156,7 +156,7 @@ static void update_arch_timer_irq_lines(struct kvm_vcpu *vcpu)
  * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
  * proper exit to userspace.
  */
-int handle_rme_exit(struct kvm_vcpu *vcpu, int rec_run_ret)
+int handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_ret)
 {
 	struct realm_rec *rec = &vcpu->arch.rec;
 	u8 esr_ec = ESR_ELx_EC(rec->run->exit.esr);
--
Aneesh Kumar K.V (Arm) Aug. 22, 2024, 3:58 a.m. UTC | #2
Steven Price <steven.price@arm.com> writes:

> +	/* Exit to VMM to complete the change */
> +	kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false,
> +				      ripas == 1);
> +


s/1/RMI_RAM ?

May be we can make it an enum like rsi_ripas

modified   arch/arm64/include/asm/rmi_smc.h
@@ -62,9 +62,11 @@
 #define RMI_ERROR_REC		3
 #define RMI_ERROR_RTT		4
 
-#define RMI_EMPTY		0
-#define RMI_RAM			1
-#define RMI_DESTROYED		2
+enum rmi_ripas {
+	RMI_EMPTY,
+	RMI_RAM,
+	RMI_DESTROYED,
+};
 
 #define RMI_NO_MEASURE_CONTENT	0
 #define RMI_MEASURE_CONTENT	1
modified   arch/arm64/kvm/rme-exit.c
@@ -112,7 +112,7 @@ static int rec_exit_ripas_change(struct kvm_vcpu *vcpu)
 
 	/* Exit to VMM to complete the change */
 	kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false,
-				      ripas == 1);
+				      ripas == RMI_RAM);
 
 	return 0;
 }
Aneesh Kumar K.V (Arm) Aug. 22, 2024, 4:04 a.m. UTC | #3
Steven Price <steven.price@arm.com> writes:

....

> +static int rec_exit_ripas_change(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	struct realm *realm = &kvm->arch.realm;
> +	struct realm_rec *rec = &vcpu->arch.rec;
> +	unsigned long base = rec->run->exit.ripas_base;
> +	unsigned long top = rec->run->exit.ripas_top;
> +	unsigned long ripas = rec->run->exit.ripas_value & 1;
> +	unsigned long top_ipa;
> +	int ret = -EINVAL;
> +
> +	if (realm_is_addr_protected(realm, base) &&
> +	    realm_is_addr_protected(realm, top - 1)) {
> +		kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_page_cache,
> +					   kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu));
> +		write_lock(&kvm->mmu_lock);
> +		ret = realm_set_ipa_state(vcpu, base, top, ripas, &top_ipa);
> +		write_unlock(&kvm->mmu_lock);
> +	}
> +
> +	WARN(ret && ret != -ENOMEM,
> +	     "Unable to satisfy SET_IPAS for %#lx - %#lx, ripas: %#lx\n",
> +	     base, top, ripas);
> +
> +	/* Exit to VMM to complete the change */
> +	kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false,
> +				      ripas == 1);
> +
> +	return 0;
> +}
> +

arch/arm64/kvm/rme-exit.c:100:6: warning: variable 'top_ipa' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
        if (realm_is_addr_protected(realm, base) &&
            ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
arch/arm64/kvm/rme-exit.c:114:44: note: uninitialized use occurs here
        kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false,
                                                  ^~~~~~~

-aneesh
kernel test robot Aug. 22, 2024, 2:14 p.m. UTC | #4
Hi Steven,

kernel test robot noticed the following build warnings:

[auto build test WARNING on kvmarm/next]
[also build test WARNING on kvm/queue arm64/for-next/core linus/master v6.11-rc4 next-20240822]
[cannot apply to kvm/linux-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Steven-Price/KVM-Prepare-for-handling-only-shared-mappings-in-mmu_notifier-events/20240821-235327
base:   https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git next
patch link:    https://lore.kernel.org/r/20240821153844.60084-19-steven.price%40arm.com
patch subject: [PATCH v4 18/43] arm64: RME: Handle realm enter/exit
config: arm64-allmodconfig (https://download.01.org/0day-ci/archive/20240822/202408222119.6YF3UR6O-lkp@intel.com/config)
compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project 26670e7fa4f032a019d23d56c6a02926e854e8af)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240822/202408222119.6YF3UR6O-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408222119.6YF3UR6O-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from arch/arm64/kvm/rme-exit.c:6:
   In file included from include/linux/kvm_host.h:16:
   In file included from include/linux/mm.h:2228:
   include/linux/vmstat.h:500:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
     500 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~ ^
     501 |                            item];
         |                            ~~~~
   include/linux/vmstat.h:507:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
     507 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~ ^
     508 |                            NR_VM_NUMA_EVENT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~~
   include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     514 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
   include/linux/vmstat.h:519:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
     519 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~ ^
     520 |                            NR_VM_NUMA_EVENT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~~
   include/linux/vmstat.h:528:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
     528 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~ ^
     529 |                            NR_VM_NUMA_EVENT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~~
   arch/arm64/kvm/rme-exit.c:61:23: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides]
      61 |         [ESR_ELx_EC_SYS64]      = rec_exit_sys_reg,
         |                                   ^~~~~~~~~~~~~~~~
   arch/arm64/kvm/rme-exit.c:60:27: note: previous initialization is here
      60 |         [0 ... ESR_ELx_EC_MAX]  = rec_exit_reason_notimpl,
         |                                   ^~~~~~~~~~~~~~~~~~~~~~~
   arch/arm64/kvm/rme-exit.c:62:26: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides]
      62 |         [ESR_ELx_EC_DABT_LOW]   = rec_exit_sync_dabt,
         |                                   ^~~~~~~~~~~~~~~~~~
   arch/arm64/kvm/rme-exit.c:60:27: note: previous initialization is here
      60 |         [0 ... ESR_ELx_EC_MAX]  = rec_exit_reason_notimpl,
         |                                   ^~~~~~~~~~~~~~~~~~~~~~~
   arch/arm64/kvm/rme-exit.c:63:26: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides]
      63 |         [ESR_ELx_EC_IABT_LOW]   = rec_exit_sync_iabt
         |                                   ^~~~~~~~~~~~~~~~~~
   arch/arm64/kvm/rme-exit.c:60:27: note: previous initialization is here
      60 |         [0 ... ESR_ELx_EC_MAX]  = rec_exit_reason_notimpl,
         |                                   ^~~~~~~~~~~~~~~~~~~~~~~
>> arch/arm64/kvm/rme-exit.c:94:6: warning: variable 'top_ipa' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
      94 |         if (realm_is_addr_protected(realm, base) &&
         |             ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      95 |             realm_is_addr_protected(realm, top - 1)) {
         |             ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   arch/arm64/kvm/rme-exit.c:108:44: note: uninitialized use occurs here
     108 |         kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false,
         |                                                   ^~~~~~~
   arch/arm64/kvm/rme-exit.c:94:2: note: remove the 'if' if its condition is always true
      94 |         if (realm_is_addr_protected(realm, base) &&
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      95 |             realm_is_addr_protected(realm, top - 1)) {
         |             ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> arch/arm64/kvm/rme-exit.c:94:6: warning: variable 'top_ipa' is used uninitialized whenever '&&' condition is false [-Wsometimes-uninitialized]
      94 |         if (realm_is_addr_protected(realm, base) &&
         |             ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   arch/arm64/kvm/rme-exit.c:108:44: note: uninitialized use occurs here
     108 |         kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false,
         |                                                   ^~~~~~~
   arch/arm64/kvm/rme-exit.c:94:6: note: remove the '&&' if its condition is always true
      94 |         if (realm_is_addr_protected(realm, base) &&
         |             ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   arch/arm64/kvm/rme-exit.c:91:23: note: initialize the variable 'top_ipa' to silence this warning
      91 |         unsigned long top_ipa;
         |                              ^
         |                               = 0
   10 warnings generated.


vim +94 arch/arm64/kvm/rme-exit.c

    58	
    59	static exit_handler_fn rec_exit_handlers[] = {
  > 60		[0 ... ESR_ELx_EC_MAX]	= rec_exit_reason_notimpl,
    61		[ESR_ELx_EC_SYS64]	= rec_exit_sys_reg,
    62		[ESR_ELx_EC_DABT_LOW]	= rec_exit_sync_dabt,
    63		[ESR_ELx_EC_IABT_LOW]	= rec_exit_sync_iabt
    64	};
    65	
    66	static int rec_exit_psci(struct kvm_vcpu *vcpu)
    67	{
    68		struct realm_rec *rec = &vcpu->arch.rec;
    69		int i;
    70		int ret;
    71	
    72		for (i = 0; i < REC_RUN_GPRS; i++)
    73			vcpu_set_reg(vcpu, i, rec->run->exit.gprs[i]);
    74	
    75		ret = kvm_smccc_call_handler(vcpu);
    76	
    77		for (i = 0; i < REC_RUN_GPRS; i++)
    78			rec->run->enter.gprs[i] = vcpu_get_reg(vcpu, i);
    79	
    80		return ret;
    81	}
    82	
    83	static int rec_exit_ripas_change(struct kvm_vcpu *vcpu)
    84	{
    85		struct kvm *kvm = vcpu->kvm;
    86		struct realm *realm = &kvm->arch.realm;
    87		struct realm_rec *rec = &vcpu->arch.rec;
    88		unsigned long base = rec->run->exit.ripas_base;
    89		unsigned long top = rec->run->exit.ripas_top;
    90		unsigned long ripas = rec->run->exit.ripas_value & 1;
    91		unsigned long top_ipa;
    92		int ret = -EINVAL;
    93	
  > 94		if (realm_is_addr_protected(realm, base) &&
    95		    realm_is_addr_protected(realm, top - 1)) {
    96			kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_page_cache,
    97						   kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu));
    98			write_lock(&kvm->mmu_lock);
    99			ret = realm_set_ipa_state(vcpu, base, top, ripas, &top_ipa);
   100			write_unlock(&kvm->mmu_lock);
   101		}
   102	
   103		WARN(ret && ret != -ENOMEM,
   104		     "Unable to satisfy SET_IPAS for %#lx - %#lx, ripas: %#lx\n",
   105		     base, top, ripas);
   106	
   107		/* Exit to VMM to complete the change */
 > 108		kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false,
   109					      ripas == 1);
   110	
   111		return 0;
   112	}
   113
Steven Price Aug. 22, 2024, 3:05 p.m. UTC | #5
On 22/08/2024 04:53, Aneesh Kumar K.V wrote:
> Steven Price <steven.price@arm.com> writes:
> 
>> Entering a realm is done using a SMC call to the RMM. On exit the
>> exit-codes need to be handled slightly differently to the normal KVM
>> path so define our own functions for realm enter/exit and hook them
>> in if the guest is a realm guest.
>>
>> Signed-off-by: Steven Price <steven.price@arm.com>
>> ---
>> Changes since v2:
>>  * realm_set_ipa_state() now provides an output parameter for the
>>    top_iap that was changed. Use this to signal the VMM with the correct
>>    range that has been transitioned.
>>  * Adapt to previous patch changes.
>> ---
>>  arch/arm64/include/asm/kvm_rme.h |   3 +
>>  arch/arm64/kvm/Makefile          |   2 +-
>>  arch/arm64/kvm/arm.c             |  19 +++-
>>  arch/arm64/kvm/rme-exit.c        | 181 +++++++++++++++++++++++++++++++
>>  arch/arm64/kvm/rme.c             |  11 ++
>>  5 files changed, 210 insertions(+), 6 deletions(-)
>>  create mode 100644 arch/arm64/kvm/rme-exit.c
>>
>> diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h
>> index c064bfb080ad..0e44b20cfa48 100644
>> --- a/arch/arm64/include/asm/kvm_rme.h
>> +++ b/arch/arm64/include/asm/kvm_rme.h
>> @@ -96,6 +96,9 @@ void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits);
>>  int kvm_create_rec(struct kvm_vcpu *vcpu);
>>  void kvm_destroy_rec(struct kvm_vcpu *vcpu);
>>  
>> +int kvm_rec_enter(struct kvm_vcpu *vcpu);
>> +int handle_rme_exit(struct kvm_vcpu *vcpu, int rec_run_status);
>> +
>>  void kvm_realm_unmap_range(struct kvm *kvm,
>>  			   unsigned long ipa,
>>  			   u64 size,
>> diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
>> index 5e79e5eee88d..9f893e86cac9 100644
>> --- a/arch/arm64/kvm/Makefile
>> +++ b/arch/arm64/kvm/Makefile
>> @@ -21,7 +21,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
>>  	 vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \
>>  	 vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \
>>  	 vgic/vgic-its.o vgic/vgic-debug.o \
>> -	 rme.o
>> +	 rme.o rme-exit.o
>>  
>>  kvm-$(CONFIG_HW_PERF_EVENTS)  += pmu-emul.o pmu.o
>>  kvm-$(CONFIG_ARM64_PTR_AUTH)  += pauth.o
>> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
>> index 568e9e6e5a4e..e8dabb996705 100644
>> --- a/arch/arm64/kvm/arm.c
>> +++ b/arch/arm64/kvm/arm.c
>> @@ -1282,7 +1282,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
>>  		trace_kvm_entry(*vcpu_pc(vcpu));
>>  		guest_timing_enter_irqoff();
>>  
>> -		ret = kvm_arm_vcpu_enter_exit(vcpu);
>> +		if (vcpu_is_rec(vcpu))
>> +			ret = kvm_rec_enter(vcpu);
>> +		else
>> +			ret = kvm_arm_vcpu_enter_exit(vcpu);
>>  
>>  		vcpu->mode = OUTSIDE_GUEST_MODE;
>>  		vcpu->stat.exits++;
>> @@ -1336,10 +1339,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
>>  
>>  		local_irq_enable();
>>  
>> -		trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
>> -
>>  		/* Exit types that need handling before we can be preempted */
>> -		handle_exit_early(vcpu, ret);
>> +		if (!vcpu_is_rec(vcpu)) {
>> +			trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu),
>> +				       *vcpu_pc(vcpu));
>> +
>> +			handle_exit_early(vcpu, ret);
>> +		}
>>  
>>  		preempt_enable();
>>  
>> @@ -1362,7 +1368,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
>>  			ret = ARM_EXCEPTION_IL;
>>  		}
>>  
>> -		ret = handle_exit(vcpu, ret);
>> +		if (vcpu_is_rec(vcpu))
>> +			ret = handle_rme_exit(vcpu, ret);
>> +		else
>> +			ret = handle_exit(vcpu, ret);
>>  	}
>>
> 
> like kvm_rec_enter, should we name this as handle_rec_exit()?

Fair enough, will rename.

Steve

> 
>  arch/arm64/include/asm/kvm_rme.h | 2 +-
>  arch/arm64/kvm/arm.c             | 2 +-
>  arch/arm64/kvm/rme-exit.c        | 2 +-
>  3 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h
> index a72e06cf4ea6..cd42c19ca21d 100644
> --- a/arch/arm64/include/asm/kvm_rme.h
> +++ b/arch/arm64/include/asm/kvm_rme.h
> @@ -102,7 +102,7 @@ int kvm_create_rec(struct kvm_vcpu *vcpu);
>  void kvm_destroy_rec(struct kvm_vcpu *vcpu);
>  
>  int kvm_rec_enter(struct kvm_vcpu *vcpu);
> -int handle_rme_exit(struct kvm_vcpu *vcpu, int rec_run_status);
> +int handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_status);
>  
>  void kvm_realm_unmap_range(struct kvm *kvm,
>  			   unsigned long ipa,
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index 05d9062470c2..1e34541d88db 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -1391,7 +1391,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
>  		}
>  
>  		if (vcpu_is_rec(vcpu))
> -			ret = handle_rme_exit(vcpu, ret);
> +			ret = handle_rec_exit(vcpu, ret);
>  		else
>  			ret = handle_exit(vcpu, ret);
>  	}
> diff --git a/arch/arm64/kvm/rme-exit.c b/arch/arm64/kvm/rme-exit.c
> index 0940575b0a14..f888cfe72dfa 100644
> --- a/arch/arm64/kvm/rme-exit.c
> +++ b/arch/arm64/kvm/rme-exit.c
> @@ -156,7 +156,7 @@ static void update_arch_timer_irq_lines(struct kvm_vcpu *vcpu)
>   * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
>   * proper exit to userspace.
>   */
> -int handle_rme_exit(struct kvm_vcpu *vcpu, int rec_run_ret)
> +int handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_ret)
>  {
>  	struct realm_rec *rec = &vcpu->arch.rec;
>  	u8 esr_ec = ESR_ELx_EC(rec->run->exit.esr);
Steven Price Aug. 22, 2024, 3:05 p.m. UTC | #6
On 22/08/2024 04:58, Aneesh Kumar K.V wrote:
> Steven Price <steven.price@arm.com> writes:
> 
>> +	/* Exit to VMM to complete the change */
>> +	kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false,
>> +				      ripas == 1);
>> +
> 
> 
> s/1/RMI_RAM ?

Ah, good spot - I must have missed that when I added the definitions.

> May be we can make it an enum like rsi_ripas

I guess that makes sense - I tend to avoid enums where the value is
controlled by something external, but here it makes some sense.

Thanks,

Steve

> modified   arch/arm64/include/asm/rmi_smc.h
> @@ -62,9 +62,11 @@
>  #define RMI_ERROR_REC		3
>  #define RMI_ERROR_RTT		4
>  
> -#define RMI_EMPTY		0
> -#define RMI_RAM			1
> -#define RMI_DESTROYED		2
> +enum rmi_ripas {
> +	RMI_EMPTY,
> +	RMI_RAM,
> +	RMI_DESTROYED,
> +};
>  
>  #define RMI_NO_MEASURE_CONTENT	0
>  #define RMI_MEASURE_CONTENT	1
> modified   arch/arm64/kvm/rme-exit.c
> @@ -112,7 +112,7 @@ static int rec_exit_ripas_change(struct kvm_vcpu *vcpu)
>  
>  	/* Exit to VMM to complete the change */
>  	kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false,
> -				      ripas == 1);
> +				      ripas == RMI_RAM);
>  
>  	return 0;
>  }
Steven Price Aug. 22, 2024, 3:06 p.m. UTC | #7
On 22/08/2024 05:04, Aneesh Kumar K.V wrote:
> Steven Price <steven.price@arm.com> writes:
> 
> ....
> 
>> +static int rec_exit_ripas_change(struct kvm_vcpu *vcpu)
>> +{
>> +	struct kvm *kvm = vcpu->kvm;
>> +	struct realm *realm = &kvm->arch.realm;
>> +	struct realm_rec *rec = &vcpu->arch.rec;
>> +	unsigned long base = rec->run->exit.ripas_base;
>> +	unsigned long top = rec->run->exit.ripas_top;
>> +	unsigned long ripas = rec->run->exit.ripas_value & 1;
>> +	unsigned long top_ipa;
>> +	int ret = -EINVAL;
>> +
>> +	if (realm_is_addr_protected(realm, base) &&
>> +	    realm_is_addr_protected(realm, top - 1)) {
>> +		kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_page_cache,
>> +					   kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu));
>> +		write_lock(&kvm->mmu_lock);
>> +		ret = realm_set_ipa_state(vcpu, base, top, ripas, &top_ipa);
>> +		write_unlock(&kvm->mmu_lock);
>> +	}
>> +
>> +	WARN(ret && ret != -ENOMEM,
>> +	     "Unable to satisfy SET_IPAS for %#lx - %#lx, ripas: %#lx\n",
>> +	     base, top, ripas);
>> +
>> +	/* Exit to VMM to complete the change */
>> +	kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false,
>> +				      ripas == 1);
>> +
>> +	return 0;
>> +}
>> +
> 
> arch/arm64/kvm/rme-exit.c:100:6: warning: variable 'top_ipa' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
>         if (realm_is_addr_protected(realm, base) &&
>             ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> arch/arm64/kvm/rme-exit.c:114:44: note: uninitialized use occurs here
>         kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false,
>                                                   ^~~~~~~

I'm not sure why I didn't notice this before, if the condition is false
then we're hitting the WARN (i.e. this shouldn't happen), but I should
fix that up to handle the situation better.

Steve
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h
index c064bfb080ad..0e44b20cfa48 100644
--- a/arch/arm64/include/asm/kvm_rme.h
+++ b/arch/arm64/include/asm/kvm_rme.h
@@ -96,6 +96,9 @@  void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits);
 int kvm_create_rec(struct kvm_vcpu *vcpu);
 void kvm_destroy_rec(struct kvm_vcpu *vcpu);
 
+int kvm_rec_enter(struct kvm_vcpu *vcpu);
+int handle_rme_exit(struct kvm_vcpu *vcpu, int rec_run_status);
+
 void kvm_realm_unmap_range(struct kvm *kvm,
 			   unsigned long ipa,
 			   u64 size,
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 5e79e5eee88d..9f893e86cac9 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -21,7 +21,7 @@  kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
 	 vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \
 	 vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \
 	 vgic/vgic-its.o vgic/vgic-debug.o \
-	 rme.o
+	 rme.o rme-exit.o
 
 kvm-$(CONFIG_HW_PERF_EVENTS)  += pmu-emul.o pmu.o
 kvm-$(CONFIG_ARM64_PTR_AUTH)  += pauth.o
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 568e9e6e5a4e..e8dabb996705 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1282,7 +1282,10 @@  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		trace_kvm_entry(*vcpu_pc(vcpu));
 		guest_timing_enter_irqoff();
 
-		ret = kvm_arm_vcpu_enter_exit(vcpu);
+		if (vcpu_is_rec(vcpu))
+			ret = kvm_rec_enter(vcpu);
+		else
+			ret = kvm_arm_vcpu_enter_exit(vcpu);
 
 		vcpu->mode = OUTSIDE_GUEST_MODE;
 		vcpu->stat.exits++;
@@ -1336,10 +1339,13 @@  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 
 		local_irq_enable();
 
-		trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
-
 		/* Exit types that need handling before we can be preempted */
-		handle_exit_early(vcpu, ret);
+		if (!vcpu_is_rec(vcpu)) {
+			trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu),
+				       *vcpu_pc(vcpu));
+
+			handle_exit_early(vcpu, ret);
+		}
 
 		preempt_enable();
 
@@ -1362,7 +1368,10 @@  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 			ret = ARM_EXCEPTION_IL;
 		}
 
-		ret = handle_exit(vcpu, ret);
+		if (vcpu_is_rec(vcpu))
+			ret = handle_rme_exit(vcpu, ret);
+		else
+			ret = handle_exit(vcpu, ret);
 	}
 
 	/* Tell userspace about in-kernel device output levels */
diff --git a/arch/arm64/kvm/rme-exit.c b/arch/arm64/kvm/rme-exit.c
new file mode 100644
index 000000000000..f4e45d475798
--- /dev/null
+++ b/arch/arm64/kvm/rme-exit.c
@@ -0,0 +1,181 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 ARM Ltd.
+ */
+
+#include <linux/kvm_host.h>
+#include <kvm/arm_hypercalls.h>
+#include <kvm/arm_psci.h>
+
+#include <asm/rmi_smc.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_rme.h>
+#include <asm/kvm_mmu.h>
+
+typedef int (*exit_handler_fn)(struct kvm_vcpu *vcpu);
+
+static int rec_exit_reason_notimpl(struct kvm_vcpu *vcpu)
+{
+	struct realm_rec *rec = &vcpu->arch.rec;
+
+	pr_err("[vcpu %d] Unhandled exit reason from realm (ESR: %#llx)\n",
+	       vcpu->vcpu_id, rec->run->exit.esr);
+	return -ENXIO;
+}
+
+static int rec_exit_sync_dabt(struct kvm_vcpu *vcpu)
+{
+	return kvm_handle_guest_abort(vcpu);
+}
+
+static int rec_exit_sync_iabt(struct kvm_vcpu *vcpu)
+{
+	struct realm_rec *rec = &vcpu->arch.rec;
+
+	pr_err("[vcpu %d] Unhandled instruction abort (ESR: %#llx).\n",
+	       vcpu->vcpu_id, rec->run->exit.esr);
+	return -ENXIO;
+}
+
+static int rec_exit_sys_reg(struct kvm_vcpu *vcpu)
+{
+	struct realm_rec *rec = &vcpu->arch.rec;
+	unsigned long esr = kvm_vcpu_get_esr(vcpu);
+	int rt = kvm_vcpu_sys_get_rt(vcpu);
+	bool is_write = !(esr & 1);
+	int ret;
+
+	if (is_write)
+		vcpu_set_reg(vcpu, rt, rec->run->exit.gprs[0]);
+
+	ret = kvm_handle_sys_reg(vcpu);
+
+	if (ret >= 0 && !is_write)
+		rec->run->enter.gprs[0] = vcpu_get_reg(vcpu, rt);
+
+	return ret;
+}
+
+static exit_handler_fn rec_exit_handlers[] = {
+	[0 ... ESR_ELx_EC_MAX]	= rec_exit_reason_notimpl,
+	[ESR_ELx_EC_SYS64]	= rec_exit_sys_reg,
+	[ESR_ELx_EC_DABT_LOW]	= rec_exit_sync_dabt,
+	[ESR_ELx_EC_IABT_LOW]	= rec_exit_sync_iabt
+};
+
+static int rec_exit_psci(struct kvm_vcpu *vcpu)
+{
+	struct realm_rec *rec = &vcpu->arch.rec;
+	int i;
+	int ret;
+
+	for (i = 0; i < REC_RUN_GPRS; i++)
+		vcpu_set_reg(vcpu, i, rec->run->exit.gprs[i]);
+
+	ret = kvm_smccc_call_handler(vcpu);
+
+	for (i = 0; i < REC_RUN_GPRS; i++)
+		rec->run->enter.gprs[i] = vcpu_get_reg(vcpu, i);
+
+	return ret;
+}
+
+static int rec_exit_ripas_change(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct realm *realm = &kvm->arch.realm;
+	struct realm_rec *rec = &vcpu->arch.rec;
+	unsigned long base = rec->run->exit.ripas_base;
+	unsigned long top = rec->run->exit.ripas_top;
+	unsigned long ripas = rec->run->exit.ripas_value & 1;
+	unsigned long top_ipa;
+	int ret = -EINVAL;
+
+	if (realm_is_addr_protected(realm, base) &&
+	    realm_is_addr_protected(realm, top - 1)) {
+		kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_page_cache,
+					   kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu));
+		write_lock(&kvm->mmu_lock);
+		ret = realm_set_ipa_state(vcpu, base, top, ripas, &top_ipa);
+		write_unlock(&kvm->mmu_lock);
+	}
+
+	WARN(ret && ret != -ENOMEM,
+	     "Unable to satisfy SET_IPAS for %#lx - %#lx, ripas: %#lx\n",
+	     base, top, ripas);
+
+	/* Exit to VMM to complete the change */
+	kvm_prepare_memory_fault_exit(vcpu, base, top_ipa - base, false, false,
+				      ripas == 1);
+
+	return 0;
+}
+
+static void update_arch_timer_irq_lines(struct kvm_vcpu *vcpu)
+{
+	struct realm_rec *rec = &vcpu->arch.rec;
+
+	__vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = rec->run->exit.cntv_ctl;
+	__vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = rec->run->exit.cntv_cval;
+	__vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = rec->run->exit.cntp_ctl;
+	__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = rec->run->exit.cntp_cval;
+
+	kvm_realm_timers_update(vcpu);
+}
+
+/*
+ * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
+ * proper exit to userspace.
+ */
+int handle_rme_exit(struct kvm_vcpu *vcpu, int rec_run_ret)
+{
+	struct realm_rec *rec = &vcpu->arch.rec;
+	u8 esr_ec = ESR_ELx_EC(rec->run->exit.esr);
+	unsigned long status, index;
+
+	status = RMI_RETURN_STATUS(rec_run_ret);
+	index = RMI_RETURN_INDEX(rec_run_ret);
+
+	/*
+	 * If a PSCI_SYSTEM_OFF request raced with a vcpu executing, we might
+	 * see the following status code and index indicating an attempt to run
+	 * a REC when the RD state is SYSTEM_OFF.  In this case, we just need to
+	 * return to user space which can deal with the system event or will try
+	 * to run the KVM VCPU again, at which point we will no longer attempt
+	 * to enter the Realm because we will have a sleep request pending on
+	 * the VCPU as a result of KVM's PSCI handling.
+	 */
+	if (status == RMI_ERROR_REALM && index == 1) {
+		vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
+		return 0;
+	}
+
+	if (rec_run_ret)
+		return -ENXIO;
+
+	vcpu->arch.fault.esr_el2 = rec->run->exit.esr;
+	vcpu->arch.fault.far_el2 = rec->run->exit.far;
+	vcpu->arch.fault.hpfar_el2 = rec->run->exit.hpfar;
+
+	update_arch_timer_irq_lines(vcpu);
+
+	/* Reset the emulation flags for the next run of the REC */
+	rec->run->enter.flags = 0;
+
+	switch (rec->run->exit.exit_reason) {
+	case RMI_EXIT_SYNC:
+		return rec_exit_handlers[esr_ec](vcpu);
+	case RMI_EXIT_IRQ:
+	case RMI_EXIT_FIQ:
+		return 1;
+	case RMI_EXIT_PSCI:
+		return rec_exit_psci(vcpu);
+	case RMI_EXIT_RIPAS_CHANGE:
+		return rec_exit_ripas_change(vcpu);
+	}
+
+	kvm_pr_unimpl("Unsupported exit reason: %u\n",
+		      rec->run->exit.exit_reason);
+	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+	return 0;
+}
diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c
index 1fa9991d708b..fd4162af551d 100644
--- a/arch/arm64/kvm/rme.c
+++ b/arch/arm64/kvm/rme.c
@@ -899,6 +899,17 @@  void kvm_destroy_realm(struct kvm *kvm)
 	kvm_free_stage2_pgd(&kvm->arch.mmu);
 }
 
+int kvm_rec_enter(struct kvm_vcpu *vcpu)
+{
+	struct realm_rec *rec = &vcpu->arch.rec;
+
+	if (kvm_realm_state(vcpu->kvm) != REALM_STATE_ACTIVE)
+		return -EINVAL;
+
+	return rmi_rec_enter(virt_to_phys(rec->rec_page),
+			     virt_to_phys(rec->run));
+}
+
 static void free_rec_aux(struct page **aux_pages,
 			 unsigned int num_aux)
 {