diff mbox

[v4,08/10] ARM: KVM: Handle guest faults in KVM

Message ID 20110806103957.27198.28802.stgit@localhost6.localdomain6 (mailing list archive)
State New, archived
Headers show

Commit Message

Christoffer Dall Aug. 6, 2011, 10:39 a.m. UTC
Handles the guest faults in KVM by mapping in corresponding user pages
in the 2nd stage page tables.

Introduces new ARM-specific kernel memory types, PAGE_KVM_GUEST and
pgprot_guest variables used to map 2nd stage memory for KVM guests.

Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
 arch/arm/include/asm/pgtable-3level.h |    9 +++
 arch/arm/include/asm/pgtable.h        |    4 +
 arch/arm/kvm/arm_mmu.c                |  107 ++++++++++++++++++++++++++++++++-
 arch/arm/mm/mmu.c                     |    3 +
 4 files changed, 121 insertions(+), 2 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Avi Kivity Aug. 9, 2011, 11:24 a.m. UTC | #1
On 08/06/2011 01:39 PM, Christoffer Dall wrote:
> Handles the guest faults in KVM by mapping in corresponding user pages
> in the 2nd stage page tables.
>
> Introduces new ARM-specific kernel memory types, PAGE_KVM_GUEST and
> pgprot_guest variables used to map 2nd stage memory for KVM guests.
>
>
>
> +static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
> +			  gfn_t gfn, struct kvm_memory_slot *memslot)
> +{
> +	pfn_t pfn;
> +	pgd_t *pgd;
> +	pud_t *pud;
> +	pmd_t *pmd;
> +	pte_t *pte, new_pte;
> +
> +	pfn = gfn_to_pfn(vcpu->kvm, gfn);
> +
> +	if (is_error_pfn(pfn)) {
> +		kvm_err(-EFAULT, "Guest gfn %u (0x%08lx) does not have "
> +				"corresponding host mapping",
> +				gfn, gfn<<  PAGE_SHIFT);
> +		return -EFAULT;
> +	}
> +
> +	/* Create 2nd stage page table mapping - Level 1 */
> +	pgd = vcpu->kvm->arch.pgd + pgd_index(fault_ipa);
> +	pud = pud_offset(pgd, fault_ipa);
> +	if (pud_none(*pud)) {
> +		pmd = pmd_alloc_one(NULL, fault_ipa);
> +		if (!pmd) {
> +			kvm_err(-ENOMEM, "Cannot allocate 2nd stage pmd");
> +			return -ENOMEM;
> +		}
> +		pud_populate(NULL, pud, pmd);
> +		pmd += pmd_index(fault_ipa);

Don't we need locking here?  Another vcpu may have executed pud_populate 
concurrently.

> +	} else
> +		pmd = pmd_offset(pud, fault_ipa);
> +
> +	/* Create 2nd stage page table mapping - Level 2 */
> +	if (pmd_none(*pmd)) {
> +		pte = pte_alloc_one_kernel(NULL, fault_ipa);
> +		if (!pte) {
> +			kvm_err(-ENOMEM, "Cannot allocate 2nd stage pte");
> +			return -ENOMEM;
> +		}
> +		pmd_populate_kernel(NULL, pmd, pte);
> +		pte += pte_index(fault_ipa);
> +	} else
> +		pte = pte_offset_kernel(pmd, fault_ipa);
> +
> +	/* Create 2nd stage page table mapping - Level 3 */
> +	new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
> +	set_pte_ext(pte, new_pte, 0);
> +
> +	return 0;
> +}
> +
> +#define HSR_ABT_FS	(0x3f)
>
Christoffer Dall Aug. 9, 2011, 11:35 a.m. UTC | #2
On Aug 9, 2011, at 1:24 PM, Avi Kivity wrote:

> On 08/06/2011 01:39 PM, Christoffer Dall wrote:
>> Handles the guest faults in KVM by mapping in corresponding user pages
>> in the 2nd stage page tables.
>> 
>> Introduces new ARM-specific kernel memory types, PAGE_KVM_GUEST and
>> pgprot_guest variables used to map 2nd stage memory for KVM guests.
>> 
>> 
>> 
>> +static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>> +			  gfn_t gfn, struct kvm_memory_slot *memslot)
>> +{
>> +	pfn_t pfn;
>> +	pgd_t *pgd;
>> +	pud_t *pud;
>> +	pmd_t *pmd;
>> +	pte_t *pte, new_pte;
>> +
>> +	pfn = gfn_to_pfn(vcpu->kvm, gfn);
>> +
>> +	if (is_error_pfn(pfn)) {
>> +		kvm_err(-EFAULT, "Guest gfn %u (0x%08lx) does not have "
>> +				"corresponding host mapping",
>> +				gfn, gfn<<  PAGE_SHIFT);
>> +		return -EFAULT;
>> +	}
>> +
>> +	/* Create 2nd stage page table mapping - Level 1 */
>> +	pgd = vcpu->kvm->arch.pgd + pgd_index(fault_ipa);
>> +	pud = pud_offset(pgd, fault_ipa);
>> +	if (pud_none(*pud)) {
>> +		pmd = pmd_alloc_one(NULL, fault_ipa);
>> +		if (!pmd) {
>> +			kvm_err(-ENOMEM, "Cannot allocate 2nd stage pmd");
>> +			return -ENOMEM;
>> +		}
>> +		pud_populate(NULL, pud, pmd);
>> +		pmd += pmd_index(fault_ipa);
> 
> Don't we need locking here?  Another vcpu may have executed pud_populate concurrently.

Absolutely, but there is no SMP support yet and only a single VCPU is supported. Again, focus area in next patch series and thanks for the pinpoint.

> 
>> +	} else
>> +		pmd = pmd_offset(pud, fault_ipa);
>> +
>> +	/* Create 2nd stage page table mapping - Level 2 */
>> +	if (pmd_none(*pmd)) {
>> +		pte = pte_alloc_one_kernel(NULL, fault_ipa);
>> +		if (!pte) {
>> +			kvm_err(-ENOMEM, "Cannot allocate 2nd stage pte");
>> +			return -ENOMEM;
>> +		}
>> +		pmd_populate_kernel(NULL, pmd, pte);
>> +		pte += pte_index(fault_ipa);
>> +	} else
>> +		pte = pte_offset_kernel(pmd, fault_ipa);
>> +
>> +	/* Create 2nd stage page table mapping - Level 3 */
>> +	new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
>> +	set_pte_ext(pte, new_pte, 0);
>> +
>> +	return 0;
>> +}
>> +
>> +#define HSR_ABT_FS	(0x3f)
>> 
> 
> -- 
> error compiling committee.c: too many arguments to function
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index a6261f5..d8c5c14 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -104,4 +104,13 @@ 
  */
 #define L_PGD_SWAPPER		(_AT(pgdval_t, 1) << 55)	/* swapper_pg_dir entry */
 
+/*
+ * 2-nd stage PTE definitions for LPAE.
+ */
+#define L_PTE2_READ		(_AT(pteval_t, 1) << 6)	/* HAP[0] */
+#define L_PTE2_WRITE		(_AT(pteval_t, 1) << 7)	/* HAP[1] */
+#define L_PTE2_NORM_WB		(_AT(pteval_t, 3) << 4)	/* MemAttr[3:2] */
+#define L_PTE2_INNER_WB		(_AT(pteval_t, 3) << 2)	/* MemAttr[1:0] */
+
+
 #endif /* _ASM_PGTABLE_3LEVEL_H */
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index db3b6e8..0e0ca21 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -76,6 +76,7 @@  extern void __pgd_error(const char *file, int line, pgd_t);
 
 extern pgprot_t		pgprot_user;
 extern pgprot_t		pgprot_kernel;
+extern pgprot_t		pgprot_guest;
 
 #define _MOD_PROT(p, b)	__pgprot(pgprot_val(p) | (b))
 
@@ -89,6 +90,9 @@  extern pgprot_t		pgprot_kernel;
 #define PAGE_KERNEL		_MOD_PROT(pgprot_kernel, L_PTE_XN)
 #define PAGE_KERNEL_EXEC	pgprot_kernel
 #define PAGE_HYP		_MOD_PROT(pgprot_kernel, L_PTE_USER)
+#define PAGE_KVM_GUEST		_MOD_PROT(pgprot_guest, L_PTE2_READ | \
+					  L_PTE2_WRITE | L_PTE2_NORM_WB | \
+					  L_PTE2_INNER_WB)
 
 #define __PAGE_NONE		__pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN)
 #define __PAGE_SHARED		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/kvm/arm_mmu.c b/arch/arm/kvm/arm_mmu.c
index 5af0a7c..6040aff 100644
--- a/arch/arm/kvm/arm_mmu.c
+++ b/arch/arm/kvm/arm_mmu.c
@@ -290,8 +290,111 @@  void kvm_free_stage2_pgd(struct kvm *kvm)
 	kvm->arch.pgd = NULL;
 }
 
+static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+			  gfn_t gfn, struct kvm_memory_slot *memslot)
+{
+	pfn_t pfn;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte, new_pte;
+
+	pfn = gfn_to_pfn(vcpu->kvm, gfn);
+
+	if (is_error_pfn(pfn)) {
+		kvm_err(-EFAULT, "Guest gfn %u (0x%08lx) does not have "
+				"corresponding host mapping",
+				gfn, gfn << PAGE_SHIFT);
+		return -EFAULT;
+	}
+
+	/* Create 2nd stage page table mapping - Level 1 */
+	pgd = vcpu->kvm->arch.pgd + pgd_index(fault_ipa);
+	pud = pud_offset(pgd, fault_ipa);
+	if (pud_none(*pud)) {
+		pmd = pmd_alloc_one(NULL, fault_ipa);
+		if (!pmd) {
+			kvm_err(-ENOMEM, "Cannot allocate 2nd stage pmd");
+			return -ENOMEM;
+		}
+		pud_populate(NULL, pud, pmd);
+		pmd += pmd_index(fault_ipa);
+	} else
+		pmd = pmd_offset(pud, fault_ipa);
+
+	/* Create 2nd stage page table mapping - Level 2 */
+	if (pmd_none(*pmd)) {
+		pte = pte_alloc_one_kernel(NULL, fault_ipa);
+		if (!pte) {
+			kvm_err(-ENOMEM, "Cannot allocate 2nd stage pte");
+			return -ENOMEM;
+		}
+		pmd_populate_kernel(NULL, pmd, pte);
+		pte += pte_index(fault_ipa);
+	} else
+		pte = pte_offset_kernel(pmd, fault_ipa);
+
+	/* Create 2nd stage page table mapping - Level 3 */
+	new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
+	set_pte_ext(pte, new_pte, 0);
+
+	return 0;
+}
+
+#define HSR_ABT_FS	(0x3f)
+#define HPFAR_MASK	(~0xf)
+
+/**
+ * kvm_handle_guest_abort - handles all 2nd stage aborts
+ * @vcpu:	the VCPU pointer
+ * @run:	the kvm_run structure
+ *
+ * Any abort that gets to the host is almost guaranteed to be caused by a
+ * missing second stage translation table entry, which can mean that either the
+ * guest simply needs more memory and we must allocate an appropriate page or it
+ * can mean that the guest tried to access I/O memory, which is emulated by user
+ * space. The distinction is based on the IPA causing the fault and whether this
+ * memory region has been registered as standard RAM by user space.
+ */
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	KVMARM_NOT_IMPLEMENTED();
-	return -EINVAL;
+	unsigned long hsr_ec;
+	unsigned long fault_status;
+	phys_addr_t fault_ipa;
+	struct kvm_memory_slot *memslot = NULL;
+	bool is_iabt;
+	gfn_t gfn;
+
+	hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT;
+	is_iabt = (hsr_ec == HSR_EC_IABT);
+
+	/* Check that the second stage fault is a translation fault */
+	fault_status = vcpu->arch.hsr & HSR_ABT_FS;
+	if ((fault_status & 0x3c) != 0x4) {
+		kvm_err(-EFAULT, "Unsupported fault status: %x",
+				fault_status & 0x3c);
+		return -EFAULT;
+	}
+
+	fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8;
+
+	gfn = fault_ipa >> PAGE_SHIFT;
+	if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+		if (is_iabt) {
+			kvm_err(-EFAULT, "Inst. abort on I/O address");
+			return -EFAULT;
+		}
+
+		kvm_msg("I/O address abort...");
+		KVMARM_NOT_IMPLEMENTED();
+		return -EINVAL;
+	}
+
+	memslot = gfn_to_memslot(vcpu->kvm, gfn);
+	if (!memslot->user_alloc) {
+		kvm_err(-EINVAL, "non user-alloc memslots not supported");
+		return -EINVAL;
+	}
+
+	return user_mem_abort(vcpu, fault_ipa, gfn, memslot);
 }
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 749475e..c025e65 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -55,9 +55,11 @@  static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
 static unsigned int ecc_mask __initdata = 0;
 pgprot_t pgprot_user;
 pgprot_t pgprot_kernel;
+pgprot_t pgprot_guest;
 
 EXPORT_SYMBOL(pgprot_user);
 EXPORT_SYMBOL(pgprot_kernel);
+EXPORT_SYMBOL(pgprot_guest);
 
 struct cachepolicy {
 	const char	policy[16];
@@ -497,6 +499,7 @@  static void __init build_mem_type_table(void)
 	pgprot_user   = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
 	pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
 				 L_PTE_DIRTY | kern_pgprot);
+	pgprot_guest  = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG);
 
 	mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
 	mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;