Message ID | 20110603150414.17011.72525.stgit@ubuntu (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 06/03/2011 06:04 PM, Christoffer Dall wrote: > Handles the guest faults in KVM by mapping in corresponding user pages > in the 2nd stage page tables. > > > > +static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > + gfn_t gfn, struct kvm_memory_slot *memslot) > +{ > + pfn_t pfn; > + pgd_t *pgd; > + pmd_t *pmd; > + pte_t *pte, new_pte; > + > + pfn = gfn_to_pfn(vcpu->kvm, gfn); > + > + if (is_error_pfn(pfn)) { > + kvm_err(-EFAULT, "Guest gfn %u (0x%08lx) does not have " > + "corresponding host mapping", > + gfn, gfn<< PAGE_SHIFT); > + return -EFAULT; > + } > + > + /* Create 2nd stage page table mapping - Level 1 */ > + pgd = vcpu->kvm->arch.pgd + pgd_index(fault_ipa); > + if (pgd_none(*pgd)) { > + pmd = pmd_alloc_one(NULL, fault_ipa); > + if (!pmd) { > + kvm_err(-ENOMEM, "Cannot allocate 2nd stage pmd"); > + return -ENOMEM; > + } > + pgd_populate(NULL, pgd, pmd); > + pmd += pmd_index(fault_ipa); > + } else > + pmd = pmd_offset(pgd, fault_ipa); > + > + /* Create 2nd stage page table mapping - Level 2 */ > + if (pmd_none(*pmd)) { > + pte = pte_alloc_one_kernel(NULL, fault_ipa); > + if (!pte) { > + kvm_err(-ENOMEM, "Cannot allocate 2nd stage pte"); > + return -ENOMEM; > + } > + pmd_populate_kernel(NULL, pmd, pte); > + pte += pte_index(fault_ipa); > + } else > + pte = pte_offset_kernel(pmd, fault_ipa); > + > + /* Create 2nd stage page table mapping - Level 3 */ > + new_pte = pfn_pte(pfn, PAGE_KVM_GUEST); > + set_pte_ext(pte, new_pte, 0); > + > + return 0; > +} > + > +#define HSR_ABT_FS (0x3f) > +#define HPFAR_MASK (~0xf) > int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) > { > + unsigned long hsr_ec; > + unsigned long fault_status; > + phys_addr_t fault_ipa; > + struct kvm_memory_slot *memslot = NULL; > + bool is_iabt; > + gfn_t gfn; > + > + hsr_ec = vcpu->arch.hsr>> HSR_EC_SHIFT; > + is_iabt = (hsr_ec == HSR_EC_IABT); > + > + /* Check that the second stage fault is a translation fault */ > + fault_status = vcpu->arch.hsr& HSR_ABT_FS; > + if ((fault_status& 0x3c) != 0x4) { > + kvm_err(-EFAULT, "Unsupported fault status: %x", > + fault_status& 0x3c); > + return -EFAULT; > + } > + > + fault_ipa = ((phys_addr_t)vcpu->arch.hpfar& HPFAR_MASK)<< 8; > + > + gfn = fault_ipa>> PAGE_SHIFT; > + if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) > + goto io_mem_abort; > + > + memslot = gfn_to_memslot(vcpu->kvm, gfn); > + if (memslot->user_alloc) > + return user_mem_abort(vcpu, fault_ipa, gfn, memslot); Non-user_alloc should not exist for ARM (and are not supported for x86 these days, except for a few implementation internal slots). > + > +io_mem_abort: > + if (is_iabt) { > + kvm_err(-EFAULT, "Inst. abort on I/O address"); > + return -EFAULT; > + } > + > + kvm_msg("I/O address abort..."); > KVMARM_NOT_IMPLEMENTED(); > return -EINVAL; > } Okay, this is about a zillion times simpler than x86. Congratulations. What are your thoughts about mmu notifier support?
On Sun, Jun 5, 2011 at 2:48 PM, Avi Kivity <avi@redhat.com> wrote: > On 06/03/2011 06:04 PM, Christoffer Dall wrote: >> >> Handles the guest faults in KVM by mapping in corresponding user pages >> in the 2nd stage page tables. >> >> >> >> +static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, >> + gfn_t gfn, struct kvm_memory_slot *memslot) >> +{ >> + pfn_t pfn; >> + pgd_t *pgd; >> + pmd_t *pmd; >> + pte_t *pte, new_pte; >> + >> + pfn = gfn_to_pfn(vcpu->kvm, gfn); >> + >> + if (is_error_pfn(pfn)) { >> + kvm_err(-EFAULT, "Guest gfn %u (0x%08lx) does not have " >> + "corresponding host mapping", >> + gfn, gfn<< PAGE_SHIFT); >> + return -EFAULT; >> + } >> + >> + /* Create 2nd stage page table mapping - Level 1 */ >> + pgd = vcpu->kvm->arch.pgd + pgd_index(fault_ipa); >> + if (pgd_none(*pgd)) { >> + pmd = pmd_alloc_one(NULL, fault_ipa); >> + if (!pmd) { >> + kvm_err(-ENOMEM, "Cannot allocate 2nd stage pmd"); >> + return -ENOMEM; >> + } >> + pgd_populate(NULL, pgd, pmd); >> + pmd += pmd_index(fault_ipa); >> + } else >> + pmd = pmd_offset(pgd, fault_ipa); >> + >> + /* Create 2nd stage page table mapping - Level 2 */ >> + if (pmd_none(*pmd)) { >> + pte = pte_alloc_one_kernel(NULL, fault_ipa); >> + if (!pte) { >> + kvm_err(-ENOMEM, "Cannot allocate 2nd stage pte"); >> + return -ENOMEM; >> + } >> + pmd_populate_kernel(NULL, pmd, pte); >> + pte += pte_index(fault_ipa); >> + } else >> + pte = pte_offset_kernel(pmd, fault_ipa); >> + >> + /* Create 2nd stage page table mapping - Level 3 */ >> + new_pte = pfn_pte(pfn, PAGE_KVM_GUEST); >> + set_pte_ext(pte, new_pte, 0); >> + >> + return 0; >> +} >> + >> +#define HSR_ABT_FS (0x3f) >> +#define HPFAR_MASK (~0xf) >> int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) >> { >> + unsigned long hsr_ec; >> + unsigned long fault_status; >> + phys_addr_t fault_ipa; >> + struct kvm_memory_slot *memslot = NULL; >> + bool is_iabt; >> + gfn_t gfn; >> + >> + hsr_ec = vcpu->arch.hsr>> HSR_EC_SHIFT; >> + is_iabt = (hsr_ec == HSR_EC_IABT); >> + >> + /* Check that the second stage fault is a translation fault */ >> + fault_status = vcpu->arch.hsr& HSR_ABT_FS; >> + if ((fault_status& 0x3c) != 0x4) { >> + kvm_err(-EFAULT, "Unsupported fault status: %x", >> + fault_status& 0x3c); >> + return -EFAULT; >> + } >> + >> + fault_ipa = ((phys_addr_t)vcpu->arch.hpfar& HPFAR_MASK)<< 8; >> + >> + gfn = fault_ipa>> PAGE_SHIFT; >> + if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) >> + goto io_mem_abort; >> + >> + memslot = gfn_to_memslot(vcpu->kvm, gfn); >> + if (memslot->user_alloc) >> + return user_mem_abort(vcpu, fault_ipa, gfn, memslot); > > Non-user_alloc should not exist for ARM (and are not supported for x86 these > days, except for a few implementation internal slots). ok, I raise an error in when (!memslot->user_alloc) instead now. thanks. > >> + >> +io_mem_abort: >> + if (is_iabt) { >> + kvm_err(-EFAULT, "Inst. abort on I/O address"); >> + return -EFAULT; >> + } >> + >> + kvm_msg("I/O address abort..."); >> KVMARM_NOT_IMPLEMENTED(); >> return -EINVAL; >> } > > Okay, this is about a zillion times simpler than x86. Congratulations. Well, I need to handle the I/O aborts, but it's quite simple. What makes it much more complicated on x86? > > What are your thoughts about mmu notifier support? For what purpose? There is no swapping on ARM, so only case that jumps to my mind is for KSM. And I'm not quite there yet :) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 06/11/2011 01:37 PM, Christoffer Dall wrote: > > > > Okay, this is about a zillion times simpler than x86. Congratulations. > > Well, I need to handle the I/O aborts, but it's quite simple. What > makes it much more complicated on x86? - lack of nested paging on earlier processors - 97 different paging modes - lots of extra bits bringing in wierd functionality - lots of optimizations > > > > What are your thoughts about mmu notifier support? > > For what purpose? There is no swapping on ARM, so only case that jumps > to my mind is for KSM. And I'm not quite there yet :) Really? I imaging swapping will be needed for server workloads. mmu notifiers are also useful for transparent hugepages and page migrations. I imagine these will all follow if ARM servers take off.
>> > >> > What are your thoughts about mmu notifier support? >> >> For what purpose? There is no swapping on ARM, so only case that jumps >> to my mind is for KSM. And I'm not quite there yet :) > > Really? I imaging swapping will be needed for server workloads. mmu > notifiers are also useful for transparent hugepages and page migrations. I > imagine these will all follow if ARM servers take off. > You may be right, but I guess it depends how ARM servers are going to be used. I agree though, swapping could very well be useful in an ARM server scenario and at that time mmu notifier integration should be looked into, but it's not in my critical path yet. Thanks for the input. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 14a3e28..f90d120 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -103,4 +103,13 @@ */ #define L_PGD_SWAPPER (_AT(pgdval_t, 1) << 55) /* swapper_pg_dir entry */ +/* + * 2-nd stage PTE definitions for LPAE. + */ +#define L_PTE2_READ (_AT(pteval_t, 1) << 6) /* HAP[0] */ +#define L_PTE2_WRITE (_AT(pteval_t, 1) << 7) /* HAP[1] */ +#define L_PTE2_NORM_WB (_AT(pteval_t, 3) << 4) /* MemAttr[3:2] */ +#define L_PTE2_INNER_WB (_AT(pteval_t, 3) << 2) /* MemAttr[1:0] */ + + #endif /* _ASM_PGTABLE_3LEVEL_H */ diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 2906f35..c4e71ff 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -75,6 +75,7 @@ extern void __pgd_error(const char *file, int line, pgd_t); extern pgprot_t pgprot_user; extern pgprot_t pgprot_kernel; +extern pgprot_t pgprot_guest; #define _MOD_PROT(p, b) __pgprot(pgprot_val(p) | (b)) @@ -88,6 +89,9 @@ extern pgprot_t pgprot_kernel; #define PAGE_KERNEL _MOD_PROT(pgprot_kernel, L_PTE_XN) #define PAGE_KERNEL_EXEC pgprot_kernel #define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_USER) +#define PAGE_KVM_GUEST _MOD_PROT(pgprot_guest, L_PTE2_READ | \ + L_PTE2_WRITE | L_PTE2_NORM_WB | \ + L_PTE2_INNER_WB) #define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN) #define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN) diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 769fa97..9f485aa 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -139,6 +139,9 @@ int main(void) DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.regs.pc)); DEFINE(VCPU_CPSR, offsetof(struct kvm_vcpu, arch.regs.cpsr)); DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.hsr)); + DEFINE(VCPU_HDFAR, offsetof(struct kvm_vcpu, arch.hdfar)); + DEFINE(VCPU_HIFAR, offsetof(struct kvm_vcpu, arch.hifar)); + DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.hpfar)); DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); #endif return 0; diff --git a/arch/arm/kvm/arm_interrupts.S b/arch/arm/kvm/arm_interrupts.S index 6d3044c..689b337 100644 --- a/arch/arm/kvm/arm_interrupts.S +++ b/arch/arm/kvm/arm_interrupts.S @@ -483,7 +483,19 @@ guest_trap: stmia r1, {r3, r4, r5} sub r1, r1, #VCPU_USR_REG(0) - mov r0, #ARM_EXCEPTION_HVC + @ Check if we need the fault information + lsr r0, r0, #HSR_EC_SHIFT + cmp r0, #HSR_EC_IABT + beq 2f + cmp r0, #HSR_EC_DABT + beq 2f + b 1f +2: mrc p15, 4, r2, c6, c0, 0 @ HDFAR + mrc p15, 4, r3, c6, c0, 2 @ HIFAR + mrc p15, 4, r4, c6, c0, 4 @ HPFAR + add r5, r1, #VCPU_HDFAR + stmia r5, {r2, r3, r4} +1: mov r0, #ARM_EXCEPTION_HVC b __kvm_vcpu_return .align diff --git a/arch/arm/kvm/arm_mmu.c b/arch/arm/kvm/arm_mmu.c index 683f971..fe27e59 100644 --- a/arch/arm/kvm/arm_mmu.c +++ b/arch/arm/kvm/arm_mmu.c @@ -248,8 +248,94 @@ void kvm_free_stage2_pgd(struct kvm *kvm) KVMARM_NOT_IMPLEMENTED(); } +static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + gfn_t gfn, struct kvm_memory_slot *memslot) +{ + pfn_t pfn; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte, new_pte; + + pfn = gfn_to_pfn(vcpu->kvm, gfn); + + if (is_error_pfn(pfn)) { + kvm_err(-EFAULT, "Guest gfn %u (0x%08lx) does not have " + "corresponding host mapping", + gfn, gfn << PAGE_SHIFT); + return -EFAULT; + } + + /* Create 2nd stage page table mapping - Level 1 */ + pgd = vcpu->kvm->arch.pgd + pgd_index(fault_ipa); + if (pgd_none(*pgd)) { + pmd = pmd_alloc_one(NULL, fault_ipa); + if (!pmd) { + kvm_err(-ENOMEM, "Cannot allocate 2nd stage pmd"); + return -ENOMEM; + } + pgd_populate(NULL, pgd, pmd); + pmd += pmd_index(fault_ipa); + } else + pmd = pmd_offset(pgd, fault_ipa); + + /* Create 2nd stage page table mapping - Level 2 */ + if (pmd_none(*pmd)) { + pte = pte_alloc_one_kernel(NULL, fault_ipa); + if (!pte) { + kvm_err(-ENOMEM, "Cannot allocate 2nd stage pte"); + return -ENOMEM; + } + pmd_populate_kernel(NULL, pmd, pte); + pte += pte_index(fault_ipa); + } else + pte = pte_offset_kernel(pmd, fault_ipa); + + /* Create 2nd stage page table mapping - Level 3 */ + new_pte = pfn_pte(pfn, PAGE_KVM_GUEST); + set_pte_ext(pte, new_pte, 0); + + return 0; +} + +#define HSR_ABT_FS (0x3f) +#define HPFAR_MASK (~0xf) int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) { + unsigned long hsr_ec; + unsigned long fault_status; + phys_addr_t fault_ipa; + struct kvm_memory_slot *memslot = NULL; + bool is_iabt; + gfn_t gfn; + + hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT; + is_iabt = (hsr_ec == HSR_EC_IABT); + + /* Check that the second stage fault is a translation fault */ + fault_status = vcpu->arch.hsr & HSR_ABT_FS; + if ((fault_status & 0x3c) != 0x4) { + kvm_err(-EFAULT, "Unsupported fault status: %x", + fault_status & 0x3c); + return -EFAULT; + } + + fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8; + + gfn = fault_ipa >> PAGE_SHIFT; + if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) + goto io_mem_abort; + + memslot = gfn_to_memslot(vcpu->kvm, gfn); + if (memslot->user_alloc) + return user_mem_abort(vcpu, fault_ipa, gfn, memslot); + +io_mem_abort: + if (is_iabt) { + kvm_err(-EFAULT, "Inst. abort on I/O address"); + return -EFAULT; + } + + kvm_msg("I/O address abort..."); KVMARM_NOT_IMPLEMENTED(); return -EINVAL; } diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index d1da559..c5cbcd3 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -55,9 +55,11 @@ static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK; static unsigned int ecc_mask __initdata = 0; pgprot_t pgprot_user; pgprot_t pgprot_kernel; +pgprot_t pgprot_guest; EXPORT_SYMBOL(pgprot_user); EXPORT_SYMBOL(pgprot_kernel); +EXPORT_SYMBOL(pgprot_guest); struct cachepolicy { const char policy[16]; @@ -497,6 +499,7 @@ static void __init build_mem_type_table(void) pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot); pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | kern_pgprot); + pgprot_guest = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG); mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask; mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;