From patchwork Thu May 27 09:47:54 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Sheng Yang X-Patchwork-Id: 102591 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.3/8.14.3) with ESMTP id o4R9lJHc026789 for ; Thu, 27 May 2010 09:47:20 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1757336Ab0E0JrR (ORCPT ); Thu, 27 May 2010 05:47:17 -0400 Received: from mga11.intel.com ([192.55.52.93]:28697 "EHLO mga11.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1757184Ab0E0JrQ (ORCPT ); Thu, 27 May 2010 05:47:16 -0400 Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by fmsmga102.fm.intel.com with ESMTP; 27 May 2010 02:44:28 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.53,310,1272870000"; d="scan'208";a="571009605" Received: from syang10-desktop.sh.intel.com (HELO syang10-desktop) ([10.239.36.64]) by fmsmga002.fm.intel.com with ESMTP; 27 May 2010 02:46:02 -0700 Received: from yasker by syang10-desktop with local (Exim 4.71) (envelope-from ) id 1OHZgz-0001IU-7Z; Thu, 27 May 2010 17:47:57 +0800 From: Sheng Yang To: Avi Kivity Cc: Marcelo Tosatti , kvm@vger.kernel.org, Dexuan Cui , Sheng Yang Subject: [PATCH v6] KVM: x86: Enable XSAVE/XRSTOR for guest Date: Thu, 27 May 2010 17:47:54 +0800 Message-Id: <1274953674-4961-1-git-send-email-sheng@linux.intel.com> X-Mailer: git-send-email 1.7.0.4 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Thu, 27 May 2010 09:47:20 +0000 (UTC) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d08bb4a..b16356b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -302,6 +302,7 @@ struct kvm_vcpu_arch { } update_pte; struct fpu guest_fpu; + u64 xcr0; gva_t mmio_fault_cr2; struct kvm_pio_request pio; @@ -605,6 +606,7 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); +void kvm_set_xcr0(struct kvm_vcpu *vcpu, u64 xcr0); int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 9e6779f..346ea66 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -266,6 +266,7 @@ enum vmcs_field { #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_WBINVD 54 +#define EXIT_REASON_XSETBV 55 /* * Interruption-information format diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index d2a98f8..6491ac8 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -71,4 +71,10 @@ static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) return kvm_read_cr4_bits(vcpu, ~0UL); } +static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) +{ + return (kvm_register_read(vcpu, VCPU_REGS_RAX) & -1u) + | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32); +} + #endif diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 99ae513..c55d57d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include "trace.h" @@ -3354,6 +3356,19 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu) return 1; } +static int handle_xsetbv(struct kvm_vcpu *vcpu) +{ + u64 new_bv = kvm_read_edx_eax(vcpu); + + if (kvm_register_read(vcpu, VCPU_REGS_RCX) != 0) { + kvm_inject_gp(vcpu, 0); + return 1; + } + kvm_set_xcr0(vcpu, new_bv); + skip_emulated_instruction(vcpu); + return 1; +} + static int handle_apic_access(struct kvm_vcpu *vcpu) { return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; @@ -3632,6 +3647,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, [EXIT_REASON_APIC_ACCESS] = handle_apic_access, [EXIT_REASON_WBINVD] = handle_wbinvd, + [EXIT_REASON_XSETBV] = handle_xsetbv, [EXIT_REASON_TASK_SWITCH] = handle_task_switch, [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7be1d36..e7acc9d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -64,6 +64,7 @@ (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ + | X86_CR4_OSXSAVE \ | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) @@ -149,6 +150,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; +u64 __read_mostly host_xcr0; + +static inline u32 bit(int bitno) +{ + return 1 << (bitno & 31); +} + static void kvm_on_user_return(struct user_return_notifier *urn) { unsigned slot; @@ -473,6 +481,52 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) } EXPORT_SYMBOL_GPL(kvm_lmsw); +int __kvm_set_xcr0(struct kvm_vcpu *vcpu, u64 xcr0) +{ + if (kvm_x86_ops->get_cpl(vcpu) != 0) + return 1; + if (!(xcr0 & XSTATE_FP)) + return 1; + if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) + return 1; + if (xcr0 & ~host_xcr0) + return 1; + vcpu->arch.xcr0 = xcr0; + xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0); + return 0; +} + +void kvm_set_xcr0(struct kvm_vcpu *vcpu, u64 xcr0) +{ + if (__kvm_set_xcr0(vcpu, xcr0)) + kvm_inject_gp(vcpu, 0); +} +EXPORT_SYMBOL_GPL(kvm_set_xcr0); + +static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 1, 0); + return best && (best->ecx & bit(X86_FEATURE_XSAVE)); +} + +static void update_cpuid(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 1, 0); + if (!best) + return; + + /* Update OSXSAVE bit */ + if (cpu_has_xsave && best->function == 0x1) { + best->ecx &= ~(bit(X86_FEATURE_OSXSAVE)); + if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) + best->ecx |= bit(X86_FEATURE_OSXSAVE); + } +} + int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long old_cr4 = kvm_read_cr4(vcpu); @@ -481,6 +535,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (cr4 & CR4_RESERVED_BITS) return 1; + if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE)) + return 1; + if (is_long_mode(vcpu)) { if (!(cr4 & X86_CR4_PAE)) return 1; @@ -497,6 +554,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if ((cr4 ^ old_cr4) & pdptr_bits) kvm_mmu_reset_context(vcpu); + if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) + update_cpuid(vcpu); + return 0; } @@ -665,11 +725,6 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) } EXPORT_SYMBOL_GPL(kvm_get_dr); -static inline u32 bit(int bitno) -{ - return 1 << (bitno & 31); -} - /* * List of msr numbers which we expose to userspace through KVM_GET_MSRS * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. @@ -1813,6 +1868,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, r = 0; kvm_apic_set_version(vcpu); kvm_x86_ops->cpuid_update(vcpu); + update_cpuid(vcpu); out_free: vfree(cpuid_entries); @@ -1836,6 +1892,7 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, vcpu->arch.cpuid_nent = cpuid->nent; kvm_apic_set_version(vcpu); kvm_x86_ops->cpuid_update(vcpu); + update_cpuid(vcpu); return 0; out: @@ -1916,7 +1973,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | 0 /* Reserved, DCA */ | F(XMM4_1) | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | - 0 /* Reserved, XSAVE, OSXSAVE */; + 0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */; /* cpuid 0x80000001.ecx */ const u32 kvm_supported_word6_x86_features = F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | @@ -1931,7 +1988,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, switch (function) { case 0: - entry->eax = min(entry->eax, (u32)0xb); + entry->eax = min(entry->eax, (u32)0xd); break; case 1: entry->edx &= kvm_supported_word0_x86_features; @@ -1989,6 +2046,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, } break; } + case 0xd: { + int i; + + entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + for (i = 1; *nent < maxnent; ++i) { + if (entry[i - 1].eax == 0 && i != 2) + break; + do_cpuid_1_ent(&entry[i], function, i); + entry[i].flags |= + KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + ++*nent; + } + break; + } case KVM_CPUID_SIGNATURE: { char signature[12] = "KVMKVMKVM\0\0"; u32 *sigptr = (u32 *)signature; @@ -4124,6 +4195,9 @@ int kvm_arch_init(void *opaque) perf_register_guest_info_callbacks(&kvm_guest_cbs); + if (cpu_has_xsave) + host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); + return 0; out: @@ -4522,6 +4596,24 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) } } +static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) +{ + if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) && + !vcpu->guest_xcr0_loaded) { + xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0); + vcpu->guest_xcr0_loaded = 1; + } +} + +static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) +{ + if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) && + vcpu->guest_xcr0_loaded) { + xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0); + vcpu->guest_xcr0_loaded = 0; + } +} + static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; @@ -4567,6 +4659,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops->prepare_guest_switch(vcpu); if (vcpu->fpu_active) kvm_load_guest_fpu(vcpu); + kvm_load_guest_xcr0(vcpu); atomic_set(&vcpu->guest_mode, 1); smp_wmb(); @@ -5118,6 +5211,11 @@ void fx_init(struct kvm_vcpu *vcpu) fpu_alloc(&vcpu->arch.guest_fpu); fpu_finit(&vcpu->arch.guest_fpu); + /* + * Ensure guest xcr0 is valid for loading + */ + vcpu->arch.xcr0 = XSTATE_FP; + vcpu->arch.cr0 |= X86_CR0_ET; } EXPORT_SYMBOL_GPL(fx_init); @@ -5132,6 +5230,12 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) if (vcpu->guest_fpu_loaded) return; + /* + * Restore all possible states in the guest, + * and assume host would use all available bits. + * Guest xcr0 would be loaded later. + */ + kvm_put_guest_xcr0(vcpu); vcpu->guest_fpu_loaded = 1; unlazy_fpu(current); fpu_restore_checking(&vcpu->arch.guest_fpu); @@ -5140,6 +5244,8 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { + kvm_put_guest_xcr0(vcpu); + if (!vcpu->guest_fpu_loaded) return; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4e8fdbf..3784d58 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -88,7 +88,7 @@ struct kvm_vcpu { int srcu_idx; int fpu_active; - int guest_fpu_loaded; + int guest_fpu_loaded, guest_xcr0_loaded; wait_queue_head_t wq; int sigset_active; sigset_t sigset;