@@ -62,6 +62,8 @@ extern void __kvm_flush_vm_context(void);
extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa);
extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
+extern void __kvm_tlb_vae2is(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding);
+extern void __kvm_tlb_el1_instr(struct kvm_s2_mmu *mmu, u64 val, u64 sys_encoding);
extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high);
extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
@@ -549,6 +549,42 @@
#define OP_AT_S12E0R sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
#define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
+/* TLBI instructions */
+#define TLBI_Op0 1
+#define TLBI_Op1_EL1 0 /* Accessible from EL1 or higher */
+#define TLBI_Op1_EL2 4 /* Accessible from EL2 or higher */
+#define TLBI_CRn 8
+#define tlbi_insn_el1(CRm, Op2) sys_insn(TLBI_Op0, TLBI_Op1_EL1, TLBI_CRn, (CRm), (Op2))
+#define tlbi_insn_el2(CRm, Op2) sys_insn(TLBI_Op0, TLBI_Op1_EL2, TLBI_CRn, (CRm), (Op2))
+
+#define OP_TLBI_VMALLE1IS tlbi_insn_el1(3, 0)
+#define OP_TLBI_VAE1IS tlbi_insn_el1(3, 1)
+#define OP_TLBI_ASIDE1IS tlbi_insn_el1(3, 2)
+#define OP_TLBI_VAAE1IS tlbi_insn_el1(3, 3)
+#define OP_TLBI_VALE1IS tlbi_insn_el1(3, 5)
+#define OP_TLBI_VAALE1IS tlbi_insn_el1(3, 7)
+#define OP_TLBI_VMALLE1 tlbi_insn_el1(7, 0)
+#define OP_TLBI_VAE1 tlbi_insn_el1(7, 1)
+#define OP_TLBI_ASIDE1 tlbi_insn_el1(7, 2)
+#define OP_TLBI_VAAE1 tlbi_insn_el1(7, 3)
+#define OP_TLBI_VALE1 tlbi_insn_el1(7, 5)
+#define OP_TLBI_VAALE1 tlbi_insn_el1(7, 7)
+
+#define OP_TLBI_IPAS2E1IS tlbi_insn_el2(0, 1)
+#define OP_TLBI_IPAS2LE1IS tlbi_insn_el2(0, 5)
+#define OP_TLBI_ALLE2IS tlbi_insn_el2(3, 0)
+#define OP_TLBI_VAE2IS tlbi_insn_el2(3, 1)
+#define OP_TLBI_ALLE1IS tlbi_insn_el2(3, 4)
+#define OP_TLBI_VALE2IS tlbi_insn_el2(3, 5)
+#define OP_TLBI_VMALLS12E1IS tlbi_insn_el2(3, 6)
+#define OP_TLBI_IPAS2E1 tlbi_insn_el2(4, 1)
+#define OP_TLBI_IPAS2LE1 tlbi_insn_el2(4, 5)
+#define OP_TLBI_ALLE2 tlbi_insn_el2(7, 0)
+#define OP_TLBI_VAE2 tlbi_insn_el2(7, 1)
+#define OP_TLBI_ALLE1 tlbi_insn_el2(7, 4)
+#define OP_TLBI_VALE2 tlbi_insn_el2(7, 5)
+#define OP_TLBI_VMALLS12E1 tlbi_insn_el2(7, 6)
+
/* Common SCTLR_ELx flags. */
#define SCTLR_ELx_DSSBS (BIT(44))
#define SCTLR_ELx_ENIA (BIT(31))
@@ -160,7 +160,7 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
* the EL1 virtual memory control register accesses
* as well as the AT S1 operations.
*/
- hcr |= HCR_TVM | HCR_TRVM | HCR_AT | HCR_NV1;
+ hcr |= HCR_TVM | HCR_TRVM | HCR_AT | HCR_TTLB | HCR_NV1;
} else {
/*
* For a guest hypervisor on v8.1 (VHE), allow to
@@ -190,7 +190,7 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
* trapped too.
*/
if (!vcpu_el2_tge_is_set(vcpu))
- hcr |= HCR_AT;
+ hcr |= HCR_AT | HCR_TTLB;
}
}
@@ -237,3 +237,84 @@ void __hyp_text __kvm_flush_vm_context(void)
dsb(ish);
}
+
+void __hyp_text __kvm_tlb_vae2is(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding)
+{
+ struct tlb_inv_context cxt;
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ /*
+ * Execute the EL1 version of TLBI VAE2* instruction, forcing
+ * an upgrade to the Inner Shareable domain in order to
+ * perform the invalidation on all CPUs.
+ */
+ switch (sys_encoding) {
+ case OP_TLBI_VAE2:
+ case OP_TLBI_VAE2IS:
+ __tlbi(vae1is, va);
+ break;
+ case OP_TLBI_VALE2:
+ case OP_TLBI_VALE2IS:
+ __tlbi(vale1is, va);
+ break;
+ default:
+ break;
+ }
+ dsb(ish);
+ isb();
+
+ __tlb_switch_to_host(mmu, &cxt);
+}
+
+void __hyp_text __kvm_tlb_el1_instr(struct kvm_s2_mmu *mmu, u64 val, u64 sys_encoding)
+{
+ struct tlb_inv_context cxt;
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ /*
+ * Execute the same instruction as the guest hypervisor did,
+ * expanding the scope of local TLB invalidations to the Inner
+ * Shareable domain so that it takes place on all CPUs. This
+ * is equivalent to having HCR_EL2.FB set.
+ */
+ switch (sys_encoding) {
+ case OP_TLBI_VMALLE1:
+ case OP_TLBI_VMALLE1IS:
+ __tlbi(vmalle1is);
+ break;
+ case OP_TLBI_VAE1:
+ case OP_TLBI_VAE1IS:
+ __tlbi(vae1is, val);
+ break;
+ case OP_TLBI_ASIDE1:
+ case OP_TLBI_ASIDE1IS:
+ __tlbi(aside1is, val);
+ break;
+ case OP_TLBI_VAAE1:
+ case OP_TLBI_VAAE1IS:
+ __tlbi(vaae1is, val);
+ break;
+ case OP_TLBI_VALE1:
+ case OP_TLBI_VALE1IS:
+ __tlbi(vale1is, val);
+ break;
+ case OP_TLBI_VAALE1:
+ case OP_TLBI_VAALE1IS:
+ __tlbi(vaale1is, val);
+ break;
+ default:
+ break;
+ }
+ dsb(ish);
+ isb();
+
+ __tlb_switch_to_host(mmu, &cxt);
+}
@@ -1683,6 +1683,11 @@ static bool forward_at_traps(struct kvm_vcpu *vcpu)
return forward_traps(vcpu, HCR_AT);
}
+static bool forward_ttlb_traps(struct kvm_vcpu *vcpu)
+{
+ return forward_traps(vcpu, HCR_TTLB);
+}
+
static bool access_elr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
@@ -2204,6 +2209,189 @@ static bool handle_s12w(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return handle_s12(vcpu, p, r, true);
}
+static bool handle_alle2is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ /*
+ * To emulate invalidating all EL2 regime stage 1 TLB entries for all
+ * PEs, executing TLBI VMALLE1IS is enough. But reuse the existing
+ * interface for the simplicity; invalidating stage 2 entries doesn't
+ * affect the correctness.
+ */
+ kvm_call_hyp(__kvm_tlb_flush_vmid, &vcpu->kvm->arch.mmu);
+ return true;
+}
+
+static bool handle_vae2is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ int sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+ /*
+ * Based on the same principle as TLBI ALLE2 instruction
+ * emulation, we emulate TLBI VAE2* instructions by executing
+ * corresponding TLBI VAE1* instructions with the virtual
+ * EL2's VMID assigned by the host hypervisor.
+ */
+ kvm_call_hyp(__kvm_tlb_vae2is, &vcpu->kvm->arch.mmu,
+ p->regval, sys_encoding);
+ return true;
+}
+
+static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ struct kvm_s2_mmu *mmu = &vcpu->kvm->arch.mmu;
+ spin_lock(&vcpu->kvm->mmu_lock);
+
+ /*
+ * Clear all mappings in the shadow page tables and invalidate the stage
+ * 1 and 2 TLB entries via kvm_tlb_flush_vmid_ipa().
+ */
+ kvm_nested_s2_clear(vcpu->kvm);
+
+ if (mmu->vmid.vmid_gen) {
+ /*
+ * Invalidate the stage 1 and 2 TLB entries for the host OS
+ * in a VM only if there is one.
+ */
+ kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+ }
+
+ spin_unlock(&vcpu->kvm->mmu_lock);
+
+ return true;
+}
+
+static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
+ struct kvm_s2_mmu *mmu;
+
+ spin_lock(&vcpu->kvm->mmu_lock);
+
+ mmu = lookup_s2_mmu(vcpu->kvm, vttbr, HCR_VM);
+ if (mmu)
+ kvm_unmap_stage2_range(mmu, 0, kvm_phys_size(vcpu->kvm));
+
+ mmu = lookup_s2_mmu(vcpu->kvm, vttbr, 0);
+ if (mmu)
+ kvm_unmap_stage2_range(mmu, 0, kvm_phys_size(vcpu->kvm));
+
+ spin_unlock(&vcpu->kvm->mmu_lock);
+
+ return true;
+}
+
+static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
+ u64 vtcr = vcpu_read_sys_reg(vcpu, VTCR_EL2);
+ struct kvm_s2_mmu *mmu;
+ u64 base_addr;
+ int max_size;
+
+ /*
+ * We drop a number of things from the supplied value:
+ *
+ * - NS bit: we're non-secure only.
+ *
+ * - TTL field: We already have the granule size from the
+ * VTCR_EL2.TG0 field, and the level is only relevant to the
+ * guest's S2PT.
+ *
+ * - IPA[51:48]: We don't support 52bit IPA just yet...
+ *
+ * And of course, adjust the IPA to be on an actual address.
+ */
+ base_addr = (p->regval & GENMASK_ULL(35, 0)) << 12;
+
+ /* Compute the maximum extent of the invalidation */
+ switch ((vtcr & VTCR_EL2_TG0_MASK)) {
+ case VTCR_EL2_TG0_4K:
+ max_size = SZ_1G;
+ break;
+ case VTCR_EL2_TG0_16K:
+ max_size = SZ_32M;
+ break;
+ case VTCR_EL2_TG0_64K:
+ /*
+ * No, we do not support 52bit IPA in nested yet. Once
+ * we do, this should be 4TB.
+ */
+ /* FIXME: remove the 52bit PA support from the IDregs */
+ max_size = SZ_512M;
+ break;
+ default:
+ BUG();
+ }
+
+ spin_lock(&vcpu->kvm->mmu_lock);
+
+ mmu = lookup_s2_mmu(vcpu->kvm, vttbr, HCR_VM);
+ if (mmu)
+ kvm_unmap_stage2_range(mmu, base_addr, max_size);
+
+ mmu = lookup_s2_mmu(vcpu->kvm, vttbr, 0);
+ if (mmu)
+ kvm_unmap_stage2_range(mmu, base_addr, max_size);
+
+ spin_unlock(&vcpu->kvm->mmu_lock);
+
+ return true;
+}
+
+static bool handle_tlbi_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+ /*
+ * If we're here, this is because we've trapped on a EL1 TLBI
+ * instruction that affects the EL1 translation regime while
+ * we're running in a context that doesn't allow us to let the
+ * HW do its thing (aka vEL2):
+ *
+ * - HCR_EL2.E2H == 0 : a non-VHE guest
+ * - HCR_EL2.{E2H,TGE} == { 1, 0 } : a VHE guest in guest mode
+ *
+ * We don't expect these helpers to ever be called when running
+ * in a vEL1 context.
+ */
+
+ WARN_ON(!vcpu_mode_el2(vcpu));
+
+ mutex_lock(&vcpu->kvm->lock);
+
+ if ((__vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) {
+ u64 virtual_vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
+ struct kvm_s2_mmu *mmu;
+
+ mmu = lookup_s2_mmu(vcpu->kvm, virtual_vttbr, HCR_VM);
+ if (mmu)
+ kvm_call_hyp(__kvm_tlb_el1_instr,
+ mmu, p->regval, sys_encoding);
+
+ mmu = lookup_s2_mmu(vcpu->kvm, virtual_vttbr, 0);
+ if (mmu)
+ kvm_call_hyp(__kvm_tlb_el1_instr,
+ mmu, p->regval, sys_encoding);
+ } else {
+ /*
+ * ARMv8.4-NV allows the guest to change TGE behind
+ * our back, so we always trap EL1 TLBIs from vEL2...
+ */
+ kvm_call_hyp(__kvm_tlb_el1_instr,
+ &vcpu->kvm->arch.mmu, p->regval, sys_encoding);
+ }
+
+ mutex_unlock(&vcpu->kvm->lock);
+
+ return true;
+}
+
/*
* AT instruction emulation
*
@@ -2286,12 +2474,40 @@ static struct sys_reg_desc sys_insn_descs[] = {
{ SYS_DESC(SYS_DC_CSW), access_dcsw },
{ SYS_DESC(SYS_DC_CISW), access_dcsw },
+ SYS_INSN_TO_DESC(TLBI_VMALLE1IS, handle_tlbi_el1, forward_ttlb_traps),
+ SYS_INSN_TO_DESC(TLBI_VAE1IS, handle_tlbi_el1, forward_ttlb_traps),
+ SYS_INSN_TO_DESC(TLBI_ASIDE1IS, handle_tlbi_el1, forward_ttlb_traps),
+ SYS_INSN_TO_DESC(TLBI_VAAE1IS, handle_tlbi_el1, forward_ttlb_traps),
+ SYS_INSN_TO_DESC(TLBI_VALE1IS, handle_tlbi_el1, forward_ttlb_traps),
+ SYS_INSN_TO_DESC(TLBI_VAALE1IS, handle_tlbi_el1, forward_ttlb_traps),
+ SYS_INSN_TO_DESC(TLBI_VMALLE1, handle_tlbi_el1, forward_ttlb_traps),
+ SYS_INSN_TO_DESC(TLBI_VAE1, handle_tlbi_el1, forward_ttlb_traps),
+ SYS_INSN_TO_DESC(TLBI_ASIDE1, handle_tlbi_el1, forward_ttlb_traps),
+ SYS_INSN_TO_DESC(TLBI_VAAE1, handle_tlbi_el1, forward_ttlb_traps),
+ SYS_INSN_TO_DESC(TLBI_VALE1, handle_tlbi_el1, forward_ttlb_traps),
+ SYS_INSN_TO_DESC(TLBI_VAALE1, handle_tlbi_el1, forward_ttlb_traps),
+
SYS_INSN_TO_DESC(AT_S1E2R, handle_s1e2, forward_nv_traps),
SYS_INSN_TO_DESC(AT_S1E2W, handle_s1e2, forward_nv_traps),
SYS_INSN_TO_DESC(AT_S12E1R, handle_s12r, forward_nv_traps),
SYS_INSN_TO_DESC(AT_S12E1W, handle_s12w, forward_nv_traps),
SYS_INSN_TO_DESC(AT_S12E0R, handle_s12r, forward_nv_traps),
SYS_INSN_TO_DESC(AT_S12E0W, handle_s12w, forward_nv_traps),
+
+ SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, handle_ipas2e1is, forward_nv_traps),
+ SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, handle_ipas2e1is, forward_nv_traps),
+ SYS_INSN_TO_DESC(TLBI_ALLE2IS, handle_alle2is, forward_nv_traps),
+ SYS_INSN_TO_DESC(TLBI_VAE2IS, handle_vae2is, forward_nv_traps),
+ SYS_INSN_TO_DESC(TLBI_ALLE1IS, handle_alle1is, forward_nv_traps),
+ SYS_INSN_TO_DESC(TLBI_VALE2IS, handle_vae2is, forward_nv_traps),
+ SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, handle_vmalls12e1is, forward_nv_traps),
+ SYS_INSN_TO_DESC(TLBI_IPAS2E1, handle_ipas2e1is, forward_nv_traps),
+ SYS_INSN_TO_DESC(TLBI_IPAS2LE1, handle_ipas2e1is, forward_nv_traps),
+ SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2is, forward_nv_traps),
+ SYS_INSN_TO_DESC(TLBI_VAE2, handle_vae2is, forward_nv_traps),
+ SYS_INSN_TO_DESC(TLBI_ALLE1, handle_alle1is, forward_nv_traps),
+ SYS_INSN_TO_DESC(TLBI_VALE2, handle_vae2is, forward_nv_traps),
+ SYS_INSN_TO_DESC(TLBI_VMALLS12E1, handle_vmalls12e1is, forward_nv_traps),
};
static bool trap_dbgidr(struct kvm_vcpu *vcpu,
@@ -55,7 +55,23 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
*/
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
- kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
+ struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
+
+ if (mmu == &kvm->arch.mmu) {
+ /*
+ * For a normal (i.e. non-nested) guest, flush entries for the
+ * given VMID *
+ */
+ kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+ } else {
+ /*
+ * When supporting nested virtualization, we can have multiple
+ * VMIDs in play for each VCPU in the VM, so it's really not
+ * worth it to try to quiesce the system and flush all the
+ * VMIDs that may be in use, instead just nuke the whole thing.
+ */
+ kvm_call_hyp(__kvm_flush_vm_context);
+ }
}
static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa)