From patchwork Wed Jun 30 08:08:05 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xiao Guangrong X-Patchwork-Id: 108765 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.4/8.14.3) with ESMTP id o5U8CMci026427 for ; Wed, 30 Jun 2010 08:12:22 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753872Ab0F3IL6 (ORCPT ); Wed, 30 Jun 2010 04:11:58 -0400 Received: from cn.fujitsu.com ([222.73.24.84]:63829 "EHLO song.cn.fujitsu.com" rhost-flags-OK-FAIL-OK-OK) by vger.kernel.org with ESMTP id S1753791Ab0F3IL4 (ORCPT ); Wed, 30 Jun 2010 04:11:56 -0400 Received: from tang.cn.fujitsu.com (tang.cn.fujitsu.com [10.167.250.3]) by song.cn.fujitsu.com (Postfix) with ESMTP id 0EB00170127; Wed, 30 Jun 2010 16:11:51 +0800 (CST) Received: from fnst.cn.fujitsu.com (tang.cn.fujitsu.com [127.0.0.1]) by tang.cn.fujitsu.com (8.14.3/8.13.1) with ESMTP id o5U89GRE012096; Wed, 30 Jun 2010 16:09:16 +0800 Received: from [10.167.141.99] (unknown [10.167.141.99]) by fnst.cn.fujitsu.com (Postfix) with ESMTPA id C28AB1CC1E7; Wed, 30 Jun 2010 16:11:55 +0800 (CST) Message-ID: <4C2AFB65.2030807@cn.fujitsu.com> Date: Wed, 30 Jun 2010 16:08:05 +0800 From: Xiao Guangrong User-Agent: Thunderbird 2.0.0.24 (Windows/20100228) MIME-Version: 1.0 To: Avi Kivity CC: Marcelo Tosatti , LKML , KVM list Subject: [PATCH v3 9/11] KVM: MMU: prefetch ptes when intercepted guest #PF References: <4C2AF9FA.9020601@cn.fujitsu.com> In-Reply-To: <4C2AF9FA.9020601@cn.fujitsu.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Wed, 30 Jun 2010 08:12:22 +0000 (UTC) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6673484..fadfafe 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2002,6 +2002,88 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) { } +static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *sp, + u64 *start, u64 *end) +{ + gfn_t gfn; + struct page *pages[PTE_PREFETCH_NUM]; + + gfn = sp->gfn + start - sp->spt; + while (start < end) { + unsigned long addr; + int entry, j, ret; + + addr = gfn_to_hva_many(vcpu->kvm, gfn, &entry); + if (kvm_is_error_hva(addr)) + return -1; + + entry = min(entry, (int)(end - start)); + ret = __get_user_pages_fast(addr, entry, 1, pages); + if (ret <= 0) + return -1; + + for (j = 0; j < ret; j++, gfn++, start++) + mmu_set_spte(vcpu, start, ACC_ALL, + sp->role.access, 0, 0, 1, NULL, + sp->role.level, gfn, + page_to_pfn(pages[j]), true, false); + + if (ret < entry) + return -1; + } + return 0; +} + +static void __direct_pte_prefetch(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *sp, u64 *sptep) +{ + u64 *start = NULL; + int index, i, max; + + WARN_ON(!sp->role.direct); + + if (pte_prefetch_topup_memory_cache(vcpu)) + return; + + index = sptep - sp->spt; + i = index & ~(PTE_PREFETCH_NUM - 1); + max = index | (PTE_PREFETCH_NUM - 1); + + for (; i < max; i++) { + u64 *spte = sp->spt + i; + + if (*spte != shadow_trap_nonpresent_pte || spte == sptep) { + if (!start) + continue; + if (direct_pte_prefetch_many(vcpu, sp, start, spte) < 0) + break; + start = NULL; + } else if (!start) + start = spte; + } +} + +static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) +{ + struct kvm_mmu_page *sp; + + /* + * Since it's no accessed bit on EPT, it's no way to + * distinguish between actually accessed translations + * and prefetched, so disable pte prefetch if EPT is + * enabled. + */ + if (!shadow_accessed_mask) + return; + + sp = page_header(__pa(sptep)); + if (sp->role.level > PT_PAGE_TABLE_LEVEL) + return; + + __direct_pte_prefetch(vcpu, sp, sptep); +} + static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, int level, gfn_t gfn, pfn_t pfn) { @@ -2015,6 +2097,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, 0, write, 1, &pt_write, level, gfn, pfn, false, true); + direct_pte_prefetch(vcpu, iterator.sptep); ++vcpu->stat.pf_fixed; break; } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 3350c02..d8c3be8 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -291,6 +291,81 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, gpte_to_gfn(gpte), pfn, true, true); } +static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, u64 *sptep) +{ + struct kvm_mmu_page *sp; + pt_element_t gptep[PTE_PREFETCH_NUM]; + gpa_t first_pte_gpa; + int offset = 0, index, i, j, max; + + sp = page_header(__pa(sptep)); + index = sptep - sp->spt; + + if (sp->role.level > PT_PAGE_TABLE_LEVEL) + return; + + if (sp->role.direct) + return __direct_pte_prefetch(vcpu, sp, sptep); + + index = sptep - sp->spt; + i = index & ~(PTE_PREFETCH_NUM - 1); + max = index | (PTE_PREFETCH_NUM - 1); + + if (PTTYPE == 32) + offset = sp->role.quadrant << PT64_LEVEL_BITS; + + first_pte_gpa = gfn_to_gpa(sp->gfn) + + (offset + i) * sizeof(pt_element_t); + + if (kvm_read_guest_atomic(vcpu->kvm, first_pte_gpa, gptep, + sizeof(gptep)) < 0) + return; + + if (pte_prefetch_topup_memory_cache(vcpu)) + return; + + for (j = 0; i < max; i++, j++) { + pt_element_t gpte; + unsigned pte_access; + u64 *spte = sp->spt + i; + gfn_t gfn; + pfn_t pfn; + + if (spte == sptep) + continue; + + if (*spte != shadow_trap_nonpresent_pte) + continue; + + gpte = gptep[j]; + + if (is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL)) + break; + + if (!(gpte & PT_ACCESSED_MASK)) + continue; + + if (!is_present_gpte(gpte)) { + if (!sp->unsync) + __set_spte(spte, shadow_notrap_nonpresent_pte); + continue; + } + + gfn = gpte_to_gfn(gpte); + + pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn); + if (is_error_pfn(pfn)) { + kvm_release_pfn_clean(pfn); + break; + } + + pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); + mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, + is_dirty_gpte(gpte), NULL, sp->role.level, gfn, + pfn, true, false); + } +} + /* * Fetch a shadow pte for a specific level in the paging hierarchy. */ @@ -327,6 +402,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, user_fault, write_fault, dirty, ptwrite, level, gw->gfn, pfn, false, true); + FNAME(pte_prefetch)(vcpu, sptep); break; }