From patchwork Mon Mar 30 01:53:43 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Dong, Eddie" X-Patchwork-Id: 15078 Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id n2U1tPta012832 for ; Mon, 30 Mar 2009 01:55:26 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755856AbZC3BzY (ORCPT ); Sun, 29 Mar 2009 21:55:24 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1753719AbZC3BzY (ORCPT ); Sun, 29 Mar 2009 21:55:24 -0400 Received: from mga11.intel.com ([192.55.52.93]:62449 "EHLO mga11.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751133AbZC3BzX (ORCPT ); Sun, 29 Mar 2009 21:55:23 -0400 Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by fmsmga102.fm.intel.com with ESMTP; 29 Mar 2009 18:51:48 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.38,443,1233561600"; d="scan'208";a="443120116" Received: from pgsmsx602.gar.corp.intel.com ([10.221.43.81]) by fmsmga002.fm.intel.com with ESMTP; 29 Mar 2009 18:50:38 -0700 Received: from pgsmsx601.gar.corp.intel.com (10.221.43.69) by pgsmsx602.gar.corp.intel.com (10.221.43.81) with Microsoft SMTP Server (TLS) id 8.1.340.0; Mon, 30 Mar 2009 09:54:13 +0800 Received: from pdsmsx601.ccr.corp.intel.com (172.16.12.94) by pgsmsx601.gar.corp.intel.com (10.221.43.69) with Microsoft SMTP Server (TLS) id 8.1.340.0; Mon, 30 Mar 2009 09:54:13 +0800 Received: from pdsmsx503.ccr.corp.intel.com ([172.16.12.95]) by pdsmsx601.ccr.corp.intel.com ([172.16.12.94]) with mapi; Mon, 30 Mar 2009 09:53:44 +0800 From: "Dong, Eddie" To: Avi Kivity CC: "kvm@vger.kernel.org" , "Dong, Eddie" Date: Mon, 30 Mar 2009 09:53:43 +0800 Subject: RE: RFC: Add reserved bits check Thread-Topic: RFC: Add reserved bits check Thread-Index: AcmwWGazNA3uw50+Q6aDKED1Wo3cTQAgRGUg Message-ID: <9832F13BD22FB94A829F798DA4A8280501A3C01DDE@pdsmsx503.ccr.corp.intel.com> References: <9832F13BD22FB94A829F798DA4A8280501A21068EF@pdsmsx503.ccr.corp.intel.com> <9832F13BD22FB94A829F798DA4A8280501A2106E6A@pdsmsx503.ccr.corp.intel.com> <49CC9DAB.8090802@redhat.com> <9832F13BD22FB94A829F798DA4A8280501A2107197@pdsmsx503.ccr.corp.intel.com> <49CCE275.9070608@redhat.com> <9832F13BD22FB94A829F798DA4A8280501A21071B0@pdsmsx503.ccr.corp.intel.com> <49CF4C1B.3010404@redhat.com> In-Reply-To: <49CF4C1B.3010404@redhat.com> Accept-Language: en-US Content-Language: en-US X-MS-Has-Attach: yes X-MS-TNEF-Correlator: acceptlanguage: en-US MIME-Version: 1.0 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org >> +static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int >> level) +{ + int ps = 0; >> + >> + if (level == PT_DIRECTORY_LEVEL) >> + ps = !!(gpte & PT_PAGE_SIZE_MASK); >> > > No need for this. If you set rsvd_bits_mask[1][0] == > rsvd_bits_mask[0][0], then you get the same behaviour. The first > index is not the page size, it's just bit 7. Sure, fixed. > > You'll need to fill all the indexes for bit 7 == 1, but it's worth it, > with the 1GB pages patch. > >> + return (gpte & vcpu->arch.mmu.rsvd_bits_mask[ps][level-1]) != 0; +} >> + >> #define PTTYPE 64 >> #include "paging_tmpl.h" >> #undef PTTYPE >> >> +int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) >> +{ >> + struct kvm_cpuid_entry2 *best; >> + >> + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); + if (best) >> + return best->eax & 0xff; >> + return 32; >> +} >> + >> > > Best to return 36 if the cpu doesn't support cpuid 80000008 but does > support pae. Mmm, noticed a conflict information in SDM, but you are right :) One more modification is that RSVD bit error code won't update if P=0 after double checking with internal architect. Thanks and reposted. Eddie Emulate #PF error code of reserved bits violation. Signed-off-by: Eddie Dong diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 55fd4c5..4fe2742 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -261,6 +261,7 @@ struct kvm_mmu { union kvm_mmu_page_role base_role; u64 *pae_root; + u64 rsvd_bits_mask[2][4]; }; struct kvm_vcpu_arch { @@ -791,5 +792,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void); #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_age_hva(struct kvm *kvm, unsigned long hva); +int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ef060ec..0a6f109 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -126,6 +126,7 @@ module_param(oos_shadow, bool, 0644); #define PFERR_PRESENT_MASK (1U << 0) #define PFERR_WRITE_MASK (1U << 1) #define PFERR_USER_MASK (1U << 2) +#define PFERR_RSVD_MASK (1U << 3) #define PFERR_FETCH_MASK (1U << 4) #define PT_DIRECTORY_LEVEL 2 @@ -179,6 +180,11 @@ static u64 __read_mostly shadow_accessed_mask; static u64 __read_mostly shadow_dirty_mask; static u64 __read_mostly shadow_mt_mask; +static inline u64 rsvd_bits(int s, int e) +{ + return ((1ULL << (e - s + 1)) - 1) << s; +} + void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) { shadow_trap_nonpresent_pte = trap_pte; @@ -2155,6 +2161,14 @@ static void paging_free(struct kvm_vcpu *vcpu) nonpaging_free(vcpu); } +static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level) +{ + int bit7; + + bit7 = (gpte >> 7) & 1; + return (gpte & vcpu->arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0; +} + #define PTTYPE 64 #include "paging_tmpl.h" #undef PTTYPE @@ -2183,6 +2197,25 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) static int paging64_init_context(struct kvm_vcpu *vcpu) { + struct kvm_mmu *context = &vcpu->arch.mmu; + int maxphyaddr = cpuid_maxphyaddr(vcpu); + u64 exb_bit_rsvd = 0; + + if (!is_nx(vcpu)) + exb_bit_rsvd = rsvd_bits(63, 63); + + context->rsvd_bits_mask[0][3] = exb_bit_rsvd | + rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); + context->rsvd_bits_mask[0][2] = exb_bit_rsvd | + rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); + context->rsvd_bits_mask[0][1] = exb_bit_rsvd | + rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); + context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51); + context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; + context->rsvd_bits_mask[1][2] = context->rsvd_bits_mask[0][2]; + context->rsvd_bits_mask[1][1] = exb_bit_rsvd | + rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20); + context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL); } @@ -2190,6 +2223,16 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) { struct kvm_mmu *context = &vcpu->arch.mmu; + /* no rsvd bits for 2 level 4K page table entries */ + context->rsvd_bits_mask[0][1] = 0; + context->rsvd_bits_mask[0][0] = 0; + if (is_cpuid_PSE36()) + /* 36bits PSE 4MB page */ + context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); + else + /* 32 bits PSE 4MB page */ + context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); + context->rsvd_bits_mask[1][0] = 0; context->new_cr3 = paging_new_cr3; context->page_fault = paging32_page_fault; context->gva_to_gpa = paging32_gva_to_gpa; @@ -2205,6 +2248,22 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) static int paging32E_init_context(struct kvm_vcpu *vcpu) { + struct kvm_mmu *context = &vcpu->arch.mmu; + int maxphyaddr = cpuid_maxphyaddr(vcpu); + u64 exb_bit_rsvd = 0; + + if (!is_nx(vcpu)) + exb_bit_rsvd = rsvd_bits(63, 63); + + context->rsvd_bits_mask[0][1] = exb_bit_rsvd | + rsvd_bits(maxphyaddr, 62); /* PDE */ + context->rsvd_bits_mask[0][0] = exb_bit_rsvd | + rsvd_bits(maxphyaddr, 62); /* PTE */ + context->rsvd_bits_mask[1][1] = exb_bit_rsvd | + rsvd_bits(maxphyaddr, 62) | + rsvd_bits(13, 20); /* large page */ + context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; + return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 7314c09..0d9110a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, gfn_t table_gfn; unsigned index, pt_access, pte_access; gpa_t pte_gpa; + int rsvd_fault = 0; pgprintk("%s: addr %lx\n", __func__, addr); walk: @@ -157,6 +158,10 @@ walk: if (!is_present_pte(pte)) goto not_present; + rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level); + if (rsvd_fault) + goto access_error; + if (write_fault && !is_writeble_pte(pte)) if (user_fault || is_write_protection(vcpu)) goto access_error; @@ -233,6 +238,8 @@ err: walker->error_code |= PFERR_USER_MASK; if (fetch_fault) walker->error_code |= PFERR_FETCH_MASK; + if (rsvd_fault) + walker->error_code |= PFERR_RSVD_MASK; return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e96edda..bf6683a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2899,6 +2899,16 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, return best; } +int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); + if (best) + return best->eax & 0xff; + return 36; +} + void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) { u32 function, index;