@@ -80,6 +80,7 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___pkvm_teardown_shadow,
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
+ __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_sync_state,
};
#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
@@ -512,6 +512,7 @@ struct kvm_vcpu_arch {
#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */
#define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14)
#define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */
+#define KVM_ARM64_PKVM_STATE_DIRTY (1 << 16)
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
KVM_GUESTDBG_USE_SW_BP | \
@@ -448,6 +448,10 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_call_hyp(__vgic_v3_save_vmcr_aprs,
&vcpu->arch.vgic_cpu.vgic_v3);
kvm_call_hyp_nvhe(__pkvm_vcpu_put);
+
+ /* __pkvm_vcpu_put implies a sync of the state */
+ if (!kvm_vm_is_protected(vcpu->kvm))
+ vcpu->arch.flags |= KVM_ARM64_PKVM_STATE_DIRTY;
}
kvm_arch_vcpu_put_debug_state_flags(vcpu);
@@ -575,6 +579,9 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
return ret;
if (is_protected_kvm_enabled()) {
+ /* Start with the vcpu in a dirty state */
+ if (!kvm_vm_is_protected(vcpu->kvm))
+ vcpu->arch.flags |= KVM_ARM64_PKVM_STATE_DIRTY;
ret = kvm_shadow_create(kvm);
if (ret)
return ret;
@@ -203,6 +203,21 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu)
{
int handled;
+ /*
+ * If we run a non-protected VM when protection is enabled
+ * system-wide, resync the state from the hypervisor and mark
+ * it as dirty on the host side if it wasn't dirty already
+ * (which could happen if preemption has taken place).
+ */
+ if (is_protected_kvm_enabled() && !kvm_vm_is_protected(vcpu->kvm)) {
+ preempt_disable();
+ if (!(vcpu->arch.flags & KVM_ARM64_PKVM_STATE_DIRTY)) {
+ kvm_call_hyp_nvhe(__pkvm_vcpu_sync_state);
+ vcpu->arch.flags |= KVM_ARM64_PKVM_STATE_DIRTY;
+ }
+ preempt_enable();
+ }
+
/*
* See ARM ARM B1.14.1: "Hyp traps on instructions
* that fail their condition code check"
@@ -270,6 +285,13 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
/* For exit types that need handling before we can be preempted */
void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index)
{
+ /*
+ * We just exited, so the state is clean from a hypervisor
+ * perspective.
+ */
+ if (is_protected_kvm_enabled())
+ vcpu->arch.flags &= ~KVM_ARM64_PKVM_STATE_DIRTY;
+
if (ARM_SERROR_PENDING(exception_index)) {
if (this_cpu_has_cap(ARM64_HAS_RAS_EXTN)) {
u64 disr = kvm_vcpu_get_disr(vcpu);
@@ -140,6 +140,38 @@ static void sync_timer_state(struct kvm_shadow_vcpu_state *shadow_state)
__vcpu_sys_reg(shadow_vcpu, CNTV_CTL_EL0) = read_sysreg_el0(SYS_CNTV_CTL);
}
+static void __copy_vcpu_state(const struct kvm_vcpu *from_vcpu,
+ struct kvm_vcpu *to_vcpu)
+{
+ int i;
+
+ to_vcpu->arch.ctxt.regs = from_vcpu->arch.ctxt.regs;
+ to_vcpu->arch.ctxt.spsr_abt = from_vcpu->arch.ctxt.spsr_abt;
+ to_vcpu->arch.ctxt.spsr_und = from_vcpu->arch.ctxt.spsr_und;
+ to_vcpu->arch.ctxt.spsr_irq = from_vcpu->arch.ctxt.spsr_irq;
+ to_vcpu->arch.ctxt.spsr_fiq = from_vcpu->arch.ctxt.spsr_fiq;
+
+ /*
+ * Copy the sysregs, but don't mess with the timer state which
+ * is directly handled by EL1 and is expected to be preserved.
+ */
+ for (i = 1; i < NR_SYS_REGS; i++) {
+ if (i >= CNTVOFF_EL2 && i <= CNTP_CTL_EL0)
+ continue;
+ to_vcpu->arch.ctxt.sys_regs[i] = from_vcpu->arch.ctxt.sys_regs[i];
+ }
+}
+
+static void __sync_vcpu_state(struct kvm_shadow_vcpu_state *shadow_state)
+{
+ __copy_vcpu_state(&shadow_state->shadow_vcpu, shadow_state->host_vcpu);
+}
+
+static void __flush_vcpu_state(struct kvm_shadow_vcpu_state *shadow_state)
+{
+ __copy_vcpu_state(shadow_state->host_vcpu, &shadow_state->shadow_vcpu);
+}
+
static void flush_shadow_state(struct kvm_shadow_vcpu_state *shadow_state)
{
struct kvm_vcpu *shadow_vcpu = &shadow_state->shadow_vcpu;
@@ -147,7 +179,16 @@ static void flush_shadow_state(struct kvm_shadow_vcpu_state *shadow_state)
shadow_entry_exit_handler_fn ec_handler;
u8 esr_ec;
- shadow_vcpu->arch.ctxt = host_vcpu->arch.ctxt;
+ /*
+ * If we deal with a non-protected guest and the state is potentially
+ * dirty (from a host perspective), copy the state back into the shadow.
+ */
+ if (!shadow_state_is_protected(shadow_state)) {
+ unsigned long host_flags = READ_ONCE(host_vcpu->arch.flags);
+
+ if (host_flags & KVM_ARM64_PKVM_STATE_DIRTY)
+ __flush_vcpu_state(shadow_state);
+ }
shadow_vcpu->arch.sve_state = kern_hyp_va(host_vcpu->arch.sve_state);
shadow_vcpu->arch.sve_max_vl = host_vcpu->arch.sve_max_vl;
@@ -268,10 +309,31 @@ static void handle___pkvm_vcpu_put(struct kvm_cpu_context *host_ctxt)
shadow_state = pkvm_loaded_shadow_vcpu_state();
if (shadow_state) {
+ struct kvm_vcpu *host_vcpu = shadow_state->host_vcpu;
+
+ if (!shadow_state_is_protected(shadow_state) &&
+ !(READ_ONCE(host_vcpu->arch.flags) & KVM_ARM64_PKVM_STATE_DIRTY))
+ __sync_vcpu_state(shadow_state);
+
pkvm_put_shadow_vcpu_state(shadow_state);
}
}
+static void handle___pkvm_vcpu_sync_state(struct kvm_cpu_context *host_ctxt)
+{
+ struct kvm_shadow_vcpu_state *shadow_state;
+
+ if (!is_protected_kvm_enabled())
+ return;
+
+ shadow_state = pkvm_loaded_shadow_vcpu_state();
+
+ if (!shadow_state || shadow_state_is_protected(shadow_state))
+ return;
+
+ __sync_vcpu_state(shadow_state);
+}
+
static struct kvm_vcpu *__get_current_vcpu(struct kvm_vcpu *vcpu,
struct kvm_shadow_vcpu_state **state)
{
@@ -579,6 +641,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_teardown_shadow),
HANDLE_FUNC(__pkvm_vcpu_load),
HANDLE_FUNC(__pkvm_vcpu_put),
+ HANDLE_FUNC(__pkvm_vcpu_sync_state),
};
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)