@@ -34,6 +34,7 @@
#define SMCHYP_HVBAR_W 0xfffffff0
#ifndef __ASSEMBLY__
+struct kvm;
struct kvm_vcpu;
extern char __kvm_hyp_init[];
@@ -48,6 +49,7 @@ extern char __kvm_hyp_code_start[]; extern char __kvm_hyp_code_end[];
extern void __kvm_flush_vm_context(void);
+extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); #endif diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index d7e3398..d86ce39 100644
@@ -157,4 +157,22 @@ struct kvm_vcpu_stat { struct kvm_vcpu_init; int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_init *init);
+
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+struct kvm;
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int
+kvm_unmap_hva_range(struct kvm *kvm,
+ unsigned long start, unsigned long end); void
+kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+
+/* We do not have shadow page tables, hence the empty hooks */ static
+inline int kvm_age_hva(struct kvm *kvm, unsigned long hva) {
+ return 0;
+}
+
+static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ return 0;
+}
#endif /* __ARM_KVM_HOST_H__ */
@@ -33,4 +33,13 @@ int create_hyp_mappings(void *from, void *to); int create_hyp_io_mappings(void *from, void *to, phys_addr_t); void free_hyp_pmds(void);
+int kvm_alloc_stage2_pgd(struct kvm *kvm); void
+kvm_free_stage2_pgd(struct kvm *kvm); int kvm_phys_addr_ioremap(struct
+kvm *kvm, phys_addr_t guest_ipa,
+ phys_addr_t pa, unsigned long size);
+
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
+void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
+
#endif /* __ARM_KVM_MMU_H__ */
@@ -102,6 +102,15 @@
*/
#define L_PGD_SWAPPER (_AT(pgdval_t, 1) << 55) /* swapper_pg_dir entry */
+/*
+ * 2-nd stage PTE definitions for LPAE.
+ */
+#define L_PTE2_SHARED L_PTE_SHARED
+#define L_PTE2_READ (_AT(pteval_t, 1) << 6) /* HAP[0] */
+#define L_PTE2_WRITE (_AT(pteval_t, 1) << 7) /* HAP[1] */
+#define L_PTE2_NORM_WB (_AT(pteval_t, 3) << 4) /* MemAttr[3:2] */
+#define L_PTE2_INNER_WB (_AT(pteval_t, 3) << 2) /* MemAttr[1:0] */
+
#ifndef __ASSEMBLY__
#define pud_none(pud) (!pud_val(pud))
@@ -70,6 +70,7 @@ extern void __pgd_error(const char *file, int line, pgd_t);
extern pgprot_t pgprot_user;
extern pgprot_t pgprot_kernel;
+extern pgprot_t pgprot_guest;
#define _MOD_PROT(p, b) __pgprot(pgprot_val(p) | (b))
@@ -83,6 +84,9 @@ extern pgprot_t pgprot_kernel;
#define PAGE_KERNEL _MOD_PROT(pgprot_kernel, L_PTE_XN)
#define PAGE_KERNEL_EXEC pgprot_kernel
#define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_USER)
+#define PAGE_KVM_GUEST _MOD_PROT(pgprot_guest, L_PTE2_READ | \
+ L_PTE2_NORM_WB | L_PTE2_INNER_WB | \
+ L_PTE2_SHARED)
#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN)
#define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
@@ -36,6 +36,7 @@ config KVM_ARM_HOST
depends on KVM
depends on MMU
depends on CPU_V7 && ARM_VIRT_EXT
+ select MMU_NOTIFIER
---help---
Provides host support for ARM processors.
@@ -82,12 +82,34 @@ void kvm_arch_sync_events(struct kvm *kvm) { }
+/**
+ * kvm_arch_init_vm - initializes a VM data structure
+ * @kvm: pointer to the KVM struct
+ */
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) {
+ int ret = 0;
+
if (type)
return -EINVAL;
- return 0;
+ ret = kvm_alloc_stage2_pgd(kvm);
+ if (ret)
+ goto out_fail_alloc;
+ spin_lock_init(&kvm->arch.pgd_lock);
+
+ ret = create_hyp_mappings(kvm, kvm + 1);
+ if (ret)
+ goto out_free_stage2_pgd;
+
+ /* Mark the initial VMID generation invalid */
+ kvm->arch.vmid_gen = 0;
+
+ return ret;
+out_free_stage2_pgd:
+ kvm_free_stage2_pgd(kvm);
+out_fail_alloc:
+ return ret;
}
int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) @@ -105,10 +127,16 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
return 0;
}
+/**
+ * kvm_arch_destroy_vm - destroy the VM data structure
+ * @kvm: pointer to the KVM struct
+ */
void kvm_arch_destroy_vm(struct kvm *kvm) {
int i;
+ kvm_free_stage2_pgd(kvm);
+
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
if (kvm->vcpus[i]) {
kvm_arch_vcpu_free(kvm->vcpus[i]);
@@ -184,7 +212,13 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
if (err)
goto free_vcpu;
+ err = create_hyp_mappings(vcpu, vcpu + 1);
+ if (err)
+ goto vcpu_uninit;
+
return vcpu;
+vcpu_uninit:
+ kvm_vcpu_uninit(vcpu);
free_vcpu:
kmem_cache_free(kvm_vcpu_cache, vcpu);
out:
@@ -193,6 +227,8 @@ out:
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) {
+ kvm_mmu_free_memory_caches(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
}
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) diff --git a/arch/arm/kvm/exports.c b/arch/arm/kvm/exports.c index 8ebdf07..f39f823 100644
@@ -33,5 +33,6 @@ EXPORT_SYMBOL_GPL(__kvm_hyp_code_end);
EXPORT_SYMBOL_GPL(__kvm_vcpu_run);
EXPORT_SYMBOL_GPL(__kvm_flush_vm_context);
+EXPORT_SYMBOL_GPL(__kvm_tlb_flush_vmid);
EXPORT_SYMBOL_GPL(smp_send_reschedule);
@@ -31,6 +31,14 @@ __kvm_hyp_code_start:
.globl __kvm_hyp_code_start
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@ Flush per-VMID TLBs
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+ENTRY(__kvm_tlb_flush_vmid)
+ bx lr
+ENDPROC(__kvm_tlb_flush_vmid)
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@ Flush TLBs and instruction caches of current CPU for all VMIDs @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@ -23,10 +23,43 @@
#include <asm/pgalloc.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_mmu.h>
+#include <asm/kvm_asm.h>
#include <asm/mach/map.h>
static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
+static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
+ int min, int max)
+{
+ void *page;
+
+ BUG_ON(max > KVM_NR_MEM_OBJS);
+ if (cache->nobjs >= min)
+ return 0;
+ while (cache->nobjs < max) {
+ page = (void *)__get_free_page(PGALLOC_GFP);
+ if (!page)
+ return -ENOMEM;
+ cache->objects[cache->nobjs++] = page;
+ }
+ return 0;
+}
+
+static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) {
+ while (mc->nobjs)
+ free_page((unsigned long)mc->objects[--mc->nobjs]); }
+
+static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) {
+ void *p;
+
+ BUG_ON(!mc || !mc->nobjs);
+ p = mc->objects[--mc->nobjs];
+ return p;
+}
+
static void free_ptes(pmd_t *pmd, unsigned long addr) {
pte_t *pte;
@@ -200,7 +233,347 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr)
return __create_hyp_mappings(from, to, &pfn); }
+/**
+ * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
+ * @kvm: The KVM struct pointer for the VM.
+ *
+ * Allocates the 1st level table only of size defined by PGD2_ORDER
+(can
+ * support either full 40-bit input addresses or limited to 32-bit
+input
+ * addresses). Clears the allocated pages.
+ *
+ * Note we don't need locking here as this is only called when the VM
+is
+ * created, which can only be done once.
+ */
+int kvm_alloc_stage2_pgd(struct kvm *kvm) {
+ pgd_t *pgd;
+
+ if (kvm->arch.pgd != NULL) {
+ kvm_err("kvm_arch already initialized?\n");
+ return -EINVAL;
+ }
+
+ pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD2_ORDER);
+ if (!pgd)
+ return -ENOMEM;
+
+ memset(pgd, 0, PTRS_PER_PGD2 * sizeof(pgd_t));
+ kvm->arch.pgd = pgd;
+
+ return 0;
+}
+
+static void free_guest_pages(pte_t *pte, unsigned long addr) {
+ unsigned int i;
+ struct page *page, *pte_page;
+
+ pte_page = virt_to_page(pte);
+
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ if (pte_present(*pte)) {
+ unsigned long pfn = pte_pfn(*pte);
+
+ if (pfn_valid(pfn)) { /* Skip over device memory */
+ page = pfn_to_page(pfn);
+ put_page(page);
+ }
+ put_page(pte_page);
+ }
+ pte++;
+ }
+}
+
+static void free_stage2_ptes(pmd_t *pmd, unsigned long addr) {
+ unsigned int i;
+ pte_t *pte;
+ struct page *page, *pmd_page;
+
+ pmd_page = virt_to_page(pmd);
+
+ for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) {
+ BUG_ON(pmd_sect(*pmd));
+ if (!pmd_none(*pmd) && pmd_table(*pmd)) {
+ pte = pte_offset_kernel(pmd, addr);
+ free_guest_pages(pte, addr);
+ page = virt_to_page((void *)pte);
+ WARN_ON(page_count(page) != 1);
+ pte_free_kernel(NULL, pte);
+
+ put_page(pmd_page);
+ }
+ pmd++;
+ }
+}
+
+/**
+ * kvm_free_stage2_pgd - free all stage-2 tables
+ * @kvm: The KVM struct pointer for the VM.
+ *
+ * Walks the level-1 page table pointed to by kvm->arch.pgd and frees
+all
+ * underlying level-2 and level-3 tables before freeing the actual
+level-1 table
+ * and setting the struct pointer to NULL.
+ *
+ * Note we don't need locking here as this is only called when the VM
+is
+ * destroyed, which can only be done once.
+ */
+void kvm_free_stage2_pgd(struct kvm *kvm) {
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ unsigned long long i, addr;
+ struct page *page, *pud_page;
+
+ if (kvm->arch.pgd == NULL)
+ return;
+
+ /*
+ * We do this slightly different than other places, since we need more
+ * than 32 bits and for instance pgd_addr_end converts to unsigned long.
+ */
+ addr = 0;
+ for (i = 0; i < PTRS_PER_PGD2; i++) {
+ addr = i * (unsigned long long)PGDIR_SIZE;
+ pgd = kvm->arch.pgd + i;
+ pud = pud_offset(pgd, addr);
+ pud_page = virt_to_page(pud);
+
+ if (pud_none(*pud))
+ continue;
+
+ BUG_ON(pud_bad(*pud));
+
+ pmd = pmd_offset(pud, addr);
+ free_stage2_ptes(pmd, addr);
+ page = virt_to_page((void *)pmd);
+ WARN_ON(page_count(page) != 1);
+ pmd_free(NULL, pmd);
+ put_page(pud_page);
+ }
+
+ WARN_ON(page_count(pud_page) != 1);
+ free_pages((unsigned long)kvm->arch.pgd, PGD2_ORDER);
+ kvm->arch.pgd = NULL;
+}
+
+/*
+ * Clear a stage-2 PTE, lowering the various ref-counts. Also takes
+ * care of invalidating the TLBs. Must be called while holding
+ * pgd_lock, otherwise another faulting VCPU may come in and mess
+ * things behind our back.
+ */
+static void stage2_clear_pte(struct kvm *kvm, phys_addr_t addr) {
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ struct page *page;
+
+ kvm_debug("Clearing PTE&%08llx\n", addr);
+ pgd = kvm->arch.pgd + pgd_index(addr);
+ pud = pud_offset(pgd, addr);
+ BUG_ON(pud_none(*pud));
+
+ pmd = pmd_offset(pud, addr);
+ BUG_ON(pmd_none(*pmd));
+
+ pte = pte_offset_kernel(pmd, addr);
+ set_pte_ext(pte, __pte(0), 0);
+
+ page = virt_to_page(pte);
+ put_page(page);
+ if (page_count(page) != 1) {
+ __kvm_tlb_flush_vmid(kvm);
+ return;
+ }
+
+ /* Need to remove pte page */
+ pmd_clear(pmd);
+ __kvm_tlb_flush_vmid(kvm);
+ pte_free_kernel(NULL, (pte_t *)((unsigned long)pte & PAGE_MASK));
+
+ page = virt_to_page(pmd);
+ put_page(page);
+ if (page_count(page) != 1)
+ return;
+
+ /*
+ * Need to remove pmd page. This is the worst case, and we end
+ * up invalidating the TLB twice. No big deal.
+ */
+ pud_clear(pud);
+ __kvm_tlb_flush_vmid(kvm);
+ pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK));
+
+ page = virt_to_page(pud);
+ put_page(page);
+}
+
+static void stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+ phys_addr_t addr, const pte_t *new_pte) {
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ /* Create 2nd stage page table mapping - Level 1 */
+ pgd = kvm->arch.pgd + pgd_index(addr);
+ pud = pud_offset(pgd, addr);
+ if (pud_none(*pud)) {
+ if (!cache)
+ return; /* ignore calls from kvm_set_spte_hva */
+ pmd = mmu_memory_cache_alloc(cache);
+ pud_populate(NULL, pud, pmd);
+ pmd += pmd_index(addr);
+ get_page(virt_to_page(pud));
+ } else
+ pmd = pmd_offset(pud, addr);
+
+ /* Create 2nd stage page table mapping - Level 2 */
+ if (pmd_none(*pmd)) {
+ if (!cache)
+ return; /* ignore calls from kvm_set_spte_hva */
+ pte = mmu_memory_cache_alloc(cache);
+ clean_pte_table(pte);
+ pmd_populate_kernel(NULL, pmd, pte);
+ pte += pte_index(addr);
+ get_page(virt_to_page(pmd));
+ } else
+ pte = pte_offset_kernel(pmd, addr);
+
+ /* Create 2nd stage page table mapping - Level 3 */
+ BUG_ON(pte_none(pte));
+ set_pte_ext(pte, *new_pte, 0);
+ get_page(virt_to_page(pte));
+}
+
+/**
+ * kvm_phys_addr_ioremap - map a device range to guest IPA
+ *
+ * @kvm: The KVM pointer
+ * @guest_ipa: The IPA at which to insert the mapping
+ * @pa: The physical address of the device
+ * @size: The size of the mapping
+ */
+int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
+ phys_addr_t pa, unsigned long size) {
+ phys_addr_t addr, end;
+ pgprot_t prot;
+ int ret = 0;
+ unsigned long pfn;
+ struct kvm_mmu_memory_cache cache = { 0, };
+
+ end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK;
+ prot = __pgprot(get_mem_type_prot_pte(MT_DEVICE) | L_PTE_USER |
+ L_PTE2_READ | L_PTE2_WRITE);
+ pfn = __phys_to_pfn(pa);
+
+ for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
+ pte_t pte = pfn_pte(pfn, prot);
+
+ ret = mmu_topup_memory_cache(&cache, 2, 2);
+ if (ret)
+ goto out;
+ spin_lock(&kvm->arch.pgd_lock);
+ stage2_set_pte(kvm, &cache, addr, &pte);
+ spin_unlock(&kvm->arch.pgd_lock);
+
+ pfn++;
+ }
+
+out:
+ mmu_free_memory_cache(&cache);
+ return ret;
+}
+
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) {
return -EINVAL;
}
+
+static bool hva_to_gpa(struct kvm *kvm, unsigned long hva, gpa_t *gpa)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ bool found = false;
+
+ slots = kvm_memslots(kvm);
+
+ /* we only care about the pages that the guest sees */
+ kvm_for_each_memslot(memslot, slots) {
+ unsigned long start = memslot->userspace_addr;
+ unsigned long end;
+
+ end = start + (memslot->npages << PAGE_SHIFT);
+ if (hva >= start && hva < end) {
+ gpa_t gpa_offset = hva - start;
+ *gpa = (memslot->base_gfn << PAGE_SHIFT) + gpa_offset;
+ found = true;
+ /* no overlapping memslots allowed: break */
+ break;
+ }
+ }
+
+ return found;
+}
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) {
+ bool found;
+ gpa_t gpa;
+
+ if (!kvm->arch.pgd)
+ return 0;
+
+ found = hva_to_gpa(kvm, hva, &gpa);
+ if (found) {
+ spin_lock(&kvm->arch.pgd_lock);
+ stage2_clear_pte(kvm, gpa);
+ spin_unlock(&kvm->arch.pgd_lock);
+ }
+ return 0;
+}
+
+int kvm_unmap_hva_range(struct kvm *kvm,
+ unsigned long start, unsigned long end) {
+ unsigned long addr;
+ int ret;
+
+ BUG_ON((start | end) & (~PAGE_MASK));
+
+ for (addr = start; addr < end; addr += PAGE_SIZE) {
+ ret = kvm_unmap_hva(kvm, addr);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) {
+ gpa_t gpa;
+ bool found;
+
+ if (!kvm->arch.pgd)
+ return;
+
+ found = hva_to_gpa(kvm, hva, &gpa);
+ if (found) {
+ spin_lock(&kvm->arch.pgd_lock);
+ stage2_set_pte(kvm, NULL, gpa, &pte);
+ spin_unlock(&kvm->arch.pgd_lock);
+ __kvm_tlb_flush_vmid(kvm);
+ }
+}
+
+void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) {
+ mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
+}
@@ -56,9 +56,11 @@ static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK; static unsigned int ecc_mask __initdata = 0; pgprot_t pgprot_user; pgprot_t pgprot_kernel;
+pgprot_t pgprot_guest;
EXPORT_SYMBOL(pgprot_user);
EXPORT_SYMBOL(pgprot_kernel);
+EXPORT_SYMBOL(pgprot_guest);
struct cachepolicy {
const char policy[16];
@@ -514,6 +516,7 @@ static void __init build_mem_type_table(void)
pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |