[v3,09/11] KVM: MMU: split kvm_mmu_pte_write function

Message ID	4E5C4CE8.90500@cn.fujitsu.com (mailing list archive)
State	New, archived
Headers	show Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.4) with ESMTP id p7U2ZfOR012293 for <patchwork-kvm@patchwork.kernel.org>; Tue, 30 Aug 2011 02:35:42 GMT Message-ID: <4E5C4CE8.90500@cn.fujitsu.com> Date: Tue, 30 Aug 2011 10:37:28 +0800 From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.17) Gecko/20110428 Fedora/3.1.10-1.fc15 Thunderbird/3.1.10 MIME-Version: 1.0 To: Avi Kivity <avi@redhat.com> CC: Marcelo Tosatti <mtosatti@redhat.com>, LKML <linux-kernel@vger.kernel.org>, KVM <kvm@vger.kernel.org> Subject: [PATCH v3 09/11] KVM: MMU: split kvm_mmu_pte_write function References: <4E5C4C20.3000403@cn.fujitsu.com> In-Reply-To: <4E5C4C20.3000403@cn.fujitsu.com> Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset=UTF-8 Sender: kvm-owner@vger.kernel.org Precedence: bulk

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9ac0dc8..cfe24fe 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3529,48 +3529,28 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) return !!(spte && (*spte & shadow_accessed_mask)); } -void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *new, int bytes) +static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, + const u8 *new, int *bytes) { - gfn_t gfn = gpa >> PAGE_SHIFT; - union kvm_mmu_page_role mask = { .word = 0 }; - struct kvm_mmu_page *sp; - struct hlist_node *node; - LIST_HEAD(invalid_list); - u64 entry, gentry, *spte; - unsigned pte_size, page_offset, misaligned, quadrant, offset; - int level, npte, r, flooded = 0; - bool remote_flush, local_flush, zap_page; - - /* - * If we don't have indirect shadow pages, it means no page is - * write-protected, so we can exit simply. - */ - if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) - return; - - zap_page = remote_flush = local_flush = false; - offset = offset_in_page(gpa); - - pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); + u64 gentry; + int r; /* * Assume that the pte write on a page table of the same type * as the current vcpu paging mode since we update the sptes only * when they have the same mode. */ - if (is_pae(vcpu) && bytes == 4) { + if (is_pae(vcpu) && *bytes == 4) { /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ - gpa &= ~(gpa_t)7; - bytes = 8; - - r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); + *gpa &= ~(gpa_t)7; + *bytes = 8; + r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8)); if (r) gentry = 0; new = (const u8 *)&gentry; } - switch (bytes) { + switch (*bytes) { case 4: gentry = *(const u32 *)new; break; @@ -3582,71 +3562,128 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, break; } - /* - * No need to care whether allocation memory is successful - * or not since pte prefetch is skiped if it does not have - * enough objects in the cache. - */ - mmu_topup_memory_caches(vcpu); - spin_lock(&vcpu->kvm->mmu_lock); - ++vcpu->kvm->stat.mmu_pte_write; - trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); + return gentry; +} + +/* + * If we're seeing too many writes to a page, it may no longer be a page table, + * or we may be forking, in which case it is better to unmap the page. + */ +static bool detect_write_flooding(struct kvm_vcpu *vcpu, gfn_t gfn) +{ + bool flooded = false; + if (gfn == vcpu->arch.last_pt_write_gfn && !last_updated_pte_accessed(vcpu)) { ++vcpu->arch.last_pt_write_count; if (vcpu->arch.last_pt_write_count >= 3) - flooded = 1; + flooded = true; } else { vcpu->arch.last_pt_write_gfn = gfn; vcpu->arch.last_pt_write_count = 1; vcpu->arch.last_pte_updated = NULL; } + return flooded; +} + +/* + * Misaligned accesses are too much trouble to fix up; also, they usually + * indicate a page is not used as a page table. + */ +static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa, + int bytes) +{ + unsigned offset, pte_size, misaligned; + + pgprintk("misaligned: gpa %llx bytes %d role %x\n", + gpa, bytes, sp->role.word); + + offset = offset_in_page(gpa); + pte_size = sp->role.cr4_pae ? 8 : 4; + misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); + misaligned |= bytes < 4; + + return misaligned; +} + +static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte) +{ + unsigned page_offset, quadrant; + u64 *spte; + int level; + + page_offset = offset_in_page(gpa); + level = sp->role.level; + *nspte = 1; + if (!sp->role.cr4_pae) { + page_offset <<= 1; /* 32->64 */ + /* + * A 32-bit pde maps 4MB while the shadow pdes map + * only 2MB. So we need to double the offset again + * and zap two pdes instead of one. + */ + if (level == PT32_ROOT_LEVEL) { + page_offset &= ~7; /* kill rounding error */ + page_offset <<= 1; + *nspte = 2; + } + quadrant = page_offset >> PAGE_SHIFT; + page_offset &= ~PAGE_MASK; + if (quadrant != sp->role.quadrant) + return NULL; + } + + spte = &sp->spt[page_offset / sizeof(*spte)]; + return spte; +} + +void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, + const u8 *new, int bytes) +{ + gfn_t gfn = gpa >> PAGE_SHIFT; + union kvm_mmu_page_role mask = { .word = 0 }; + struct kvm_mmu_page *sp; + struct hlist_node *node; + LIST_HEAD(invalid_list); + u64 entry, gentry, *spte; + int npte; + bool remote_flush, local_flush, zap_page, flooded, misaligned; + + /* + * If we don't have indirect shadow pages, it means no page is + * write-protected, so we can exit simply. + */ + if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) + return; + + zap_page = remote_flush = local_flush = false; + + pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); + + gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes); + mmu_topup_memory_caches(vcpu); + spin_lock(&vcpu->kvm->mmu_lock); + ++vcpu->kvm->stat.mmu_pte_write; + trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); + + flooded = detect_write_flooding(vcpu, gfn); mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { - pte_size = sp->role.cr4_pae ? 8 : 4; - misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); - misaligned |= bytes < 4; + misaligned = detect_write_misaligned(sp, gpa, bytes); + if (misaligned || flooded) { - /* - * Misaligned accesses are too much trouble to fix - * up; also, they usually indicate a page is not used - * as a page table. - * - * If we're seeing too many writes to a page, - * it may no longer be a page table, or we may be - * forking, in which case it is better to unmap the - * page. - */ - pgprintk("misaligned: gpa %llx bytes %d role %x\n", - gpa, bytes, sp->role.word); zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); ++vcpu->kvm->stat.mmu_flooded; continue; } - page_offset = offset; - level = sp->role.level; - npte = 1; - if (!sp->role.cr4_pae) { - page_offset <<= 1; /* 32->64 */ - /* - * A 32-bit pde maps 4MB while the shadow pdes map - * only 2MB. So we need to double the offset again - * and zap two pdes instead of one. - */ - if (level == PT32_ROOT_LEVEL) { - page_offset &= ~7; /* kill rounding error */ - page_offset <<= 1; - npte = 2; - } - quadrant = page_offset >> PAGE_SHIFT; - page_offset &= ~PAGE_MASK; - if (quadrant != sp->role.quadrant) - continue; - } + + spte = get_written_sptes(sp, gpa, &npte); + if (!spte) + continue; + local_flush = true; - spte = &sp->spt[page_offset / sizeof(*spte)]; while (npte--) { entry = *spte; mmu_page_zap_pte(vcpu->kvm, sp, spte);

[v3,09/11] KVM: MMU: split kvm_mmu_pte_write function

Commit Message

Patch