Message ID | 1526957507-123937-1-git-send-email-jingqi.liu@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 22/05/2018 04:51, Jingqi Liu wrote: > A new control bit(bit 29) in the TEST_CTL MSR will be introduced > to enable detection of split locks. > > When bit 29 of the TEST_CTL(33H) MSR is set, the processor > causes an #AC exception to be issued instead of suppressing LOCK on > bus(during split lock access). A previous control bit (bit 31) > in this MSR causes the processor to disable LOCK# assertion for > split locked accesses when set. When bits 29 and 31 are both set, > bit 29 takes precedence. > > The release document ref below link: > https://software.intel.com/sites/default/files/managed/c5/15/\ > architecture-instruction-set-extensions-programming-reference.pdf > This patch has a dependency on https://lkml.org/lkml/2018/5/14/1157 Please follow the way spec_ctrl is implemented, so that there is no wrmsr/rdmsr unless the guest is using the feature (and also no wrmsr unless the guest and host settings don't match). How should the guest detect that the bits are available? Is there a CPUID bit? Paolo > Signed-off-by: Jingqi Liu <jingqi.liu@intel.com> > --- > arch/x86/kvm/vmx.c | 13 +++++++++++++ > 1 file changed, 13 insertions(+) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 3f16965..07986e0 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -610,6 +610,8 @@ struct vcpu_vmx { > > u64 arch_capabilities; > u64 spec_ctrl; > + u64 guest_split_lock_ctrl; > + u64 host_split_lock_ctrl; > > u32 vm_entry_controls_shadow; > u32 vm_exit_controls_shadow; > @@ -6013,6 +6015,8 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) > vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); > vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); > > + vmx->guest_split_lock_ctrl = 0; > + > if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) > vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); > > @@ -6062,6 +6066,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) > > vmx->rmode.vm86_active = 0; > vmx->spec_ctrl = 0; > + vmx->guest_split_lock_ctrl = 0; > > vcpu->arch.microcode_version = 0x100000000ULL; > vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); > @@ -9725,6 +9730,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) > if (vmx->spec_ctrl) > native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); > > + vmx->host_split_lock_ctrl = native_read_msr(MSR_TEST_CTL); > + native_wrmsrl(MSR_TEST_CTL, vmx->guest_split_lock_ctrl); > + > vmx->__launched = vmx->loaded_vmcs->launched; > > evmcs_rsp = static_branch_unlikely(&enable_evmcs) ? > @@ -9874,6 +9882,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) > if (vmx->spec_ctrl) > native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); > > + vmx->guest_split_lock_ctrl = native_read_msr(MSR_TEST_CTL); > + native_wrmsrl(MSR_TEST_CTL, vmx->host_split_lock_ctrl); > + > /* Eliminate branch target predictions from guest mode */ > vmexit_fill_RSB(); > > @@ -10037,6 +10048,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) > vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); > vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); > vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); > + vmx_disable_intercept_for_msr(msr_bitmap, MSR_TEST_CTL, MSR_TYPE_RW); > + > vmx->msr_bitmap_mode = 0; > > vmx->loaded_vmcs = &vmx->vmcs01; >
On Tue, May 22, 2018 at 10:51:47AM +0800, Jingqi Liu wrote: > A new control bit(bit 29) in the TEST_CTL MSR will be introduced > to enable detection of split locks. > > When bit 29 of the TEST_CTL(33H) MSR is set, the processor > causes an #AC exception to be issued instead of suppressing LOCK on > bus(during split lock access). A previous control bit (bit 31) > in this MSR causes the processor to disable LOCK# assertion for > split locked accesses when set. When bits 29 and 31 are both set, > bit 29 takes precedence. Migration? > > The release document ref below link: > https://software.intel.com/sites/default/files/managed/c5/15/\ > architecture-instruction-set-extensions-programming-reference.pdf > This patch has a dependency on https://lkml.org/lkml/2018/5/14/1157 > > Signed-off-by: Jingqi Liu <jingqi.liu@intel.com> > --- > arch/x86/kvm/vmx.c | 13 +++++++++++++ > 1 file changed, 13 insertions(+) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 3f16965..07986e0 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -610,6 +610,8 @@ struct vcpu_vmx { > > u64 arch_capabilities; > u64 spec_ctrl; > + u64 guest_split_lock_ctrl; > + u64 host_split_lock_ctrl; > > u32 vm_entry_controls_shadow; > u32 vm_exit_controls_shadow; > @@ -6013,6 +6015,8 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) > vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); > vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); > > + vmx->guest_split_lock_ctrl = 0; > + > if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) > vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); > > @@ -6062,6 +6066,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) > > vmx->rmode.vm86_active = 0; > vmx->spec_ctrl = 0; > + vmx->guest_split_lock_ctrl = 0; > > vcpu->arch.microcode_version = 0x100000000ULL; > vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); > @@ -9725,6 +9730,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) > if (vmx->spec_ctrl) > native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); > > + vmx->host_split_lock_ctrl = native_read_msr(MSR_TEST_CTL); > + native_wrmsrl(MSR_TEST_CTL, vmx->guest_split_lock_ctrl); > + > vmx->__launched = vmx->loaded_vmcs->launched; > > evmcs_rsp = static_branch_unlikely(&enable_evmcs) ? > @@ -9874,6 +9882,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) > if (vmx->spec_ctrl) > native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); > > + vmx->guest_split_lock_ctrl = native_read_msr(MSR_TEST_CTL); > + native_wrmsrl(MSR_TEST_CTL, vmx->host_split_lock_ctrl); > + > /* Eliminate branch target predictions from guest mode */ > vmexit_fill_RSB(); > > @@ -10037,6 +10048,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) > vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); > vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); > vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); > + vmx_disable_intercept_for_msr(msr_bitmap, MSR_TEST_CTL, MSR_TYPE_RW); > + > vmx->msr_bitmap_mode = 0; > > vmx->loaded_vmcs = &vmx->vmcs01; > -- > 1.8.3.1 >
Hi Jingqi, Thank you for the patch! Yet something to improve: [auto build test ERROR on kvm/linux-next] [also build test ERROR on v4.17-rc6 next-20180517] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Jingqi-Liu/KVM-x86-Expose-the-split-lock-detection-feature-to-guest-VM/20180522-004806 base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next config: i386-randconfig-x010-201820 (attached as .config) compiler: gcc-7 (Debian 7.3.0-16) 7.3.0 reproduce: # save the attached .config to linux build tree make ARCH=i386 All errors (new ones prefixed by >>): arch/x86/kvm/vmx.c: In function 'vmx_vcpu_run': >> arch/x86/kvm/vmx.c:9756:46: error: 'MSR_TEST_CTL' undeclared (first use in this function); did you mean 'MSR_THERM2_CTL'? vmx->host_split_lock_ctrl = native_read_msr(MSR_TEST_CTL); ^~~~~~~~~~~~ MSR_THERM2_CTL arch/x86/kvm/vmx.c:9756:46: note: each undeclared identifier is reported only once for each function it appears in arch/x86/kvm/vmx.c: In function 'vmx_create_vcpu': arch/x86/kvm/vmx.c:10074:44: error: 'MSR_TEST_CTL' undeclared (first use in this function); did you mean 'MSR_THERM2_CTL'? vmx_disable_intercept_for_msr(msr_bitmap, MSR_TEST_CTL, MSR_TYPE_RW); ^~~~~~~~~~~~ MSR_THERM2_CTL vim +9756 arch/x86/kvm/vmx.c 9687 9688 static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) 9689 { 9690 struct vcpu_vmx *vmx = to_vmx(vcpu); 9691 unsigned long cr3, cr4, evmcs_rsp; 9692 9693 /* Record the guest's net vcpu time for enforced NMI injections. */ 9694 if (unlikely(!enable_vnmi && 9695 vmx->loaded_vmcs->soft_vnmi_blocked)) 9696 vmx->loaded_vmcs->entry_time = ktime_get(); 9697 9698 /* Don't enter VMX if guest state is invalid, let the exit handler 9699 start emulation until we arrive back to a valid state */ 9700 if (vmx->emulation_required) 9701 return; 9702 9703 if (vmx->ple_window_dirty) { 9704 vmx->ple_window_dirty = false; 9705 vmcs_write32(PLE_WINDOW, vmx->ple_window); 9706 } 9707 9708 if (vmx->nested.sync_shadow_vmcs) { 9709 copy_vmcs12_to_shadow(vmx); 9710 vmx->nested.sync_shadow_vmcs = false; 9711 } 9712 9713 if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) 9714 vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); 9715 if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) 9716 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); 9717 9718 cr3 = __get_current_cr3_fast(); 9719 if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) { 9720 vmcs_writel(HOST_CR3, cr3); 9721 vmx->loaded_vmcs->vmcs_host_cr3 = cr3; 9722 } 9723 9724 cr4 = cr4_read_shadow(); 9725 if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) { 9726 vmcs_writel(HOST_CR4, cr4); 9727 vmx->loaded_vmcs->vmcs_host_cr4 = cr4; 9728 } 9729 9730 /* When single-stepping over STI and MOV SS, we must clear the 9731 * corresponding interruptibility bits in the guest state. Otherwise 9732 * vmentry fails as it then expects bit 14 (BS) in pending debug 9733 * exceptions being set, but that's not correct for the guest debugging 9734 * case. */ 9735 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 9736 vmx_set_interrupt_shadow(vcpu, 0); 9737 9738 if (static_cpu_has(X86_FEATURE_PKU) && 9739 kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && 9740 vcpu->arch.pkru != vmx->host_pkru) 9741 __write_pkru(vcpu->arch.pkru); 9742 9743 atomic_switch_perf_msrs(vmx); 9744 9745 vmx_arm_hv_timer(vcpu); 9746 9747 /* 9748 * If this vCPU has touched SPEC_CTRL, restore the guest's value if 9749 * it's non-zero. Since vmentry is serialising on affected CPUs, there 9750 * is no need to worry about the conditional branch over the wrmsr 9751 * being speculatively taken. 9752 */ 9753 if (vmx->spec_ctrl) 9754 native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); 9755 > 9756 vmx->host_split_lock_ctrl = native_read_msr(MSR_TEST_CTL); 9757 native_wrmsrl(MSR_TEST_CTL, vmx->guest_split_lock_ctrl); 9758 9759 vmx->__launched = vmx->loaded_vmcs->launched; 9760 9761 evmcs_rsp = static_branch_unlikely(&enable_evmcs) ? 9762 (unsigned long)¤t_evmcs->host_rsp : 0; 9763 9764 asm( 9765 /* Store host registers */ 9766 "push %%" _ASM_DX "; push %%" _ASM_BP ";" 9767 "push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */ 9768 "push %%" _ASM_CX " \n\t" 9769 "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t" 9770 "je 1f \n\t" 9771 "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t" 9772 /* Avoid VMWRITE when Enlightened VMCS is in use */ 9773 "test %%" _ASM_SI ", %%" _ASM_SI " \n\t" 9774 "jz 2f \n\t" 9775 "mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t" 9776 "jmp 1f \n\t" 9777 "2: \n\t" 9778 __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" 9779 "1: \n\t" 9780 /* Reload cr2 if changed */ 9781 "mov %c[cr2](%0), %%" _ASM_AX " \n\t" 9782 "mov %%cr2, %%" _ASM_DX " \n\t" 9783 "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t" 9784 "je 3f \n\t" 9785 "mov %%" _ASM_AX", %%cr2 \n\t" 9786 "3: \n\t" 9787 /* Check if vmlaunch of vmresume is needed */ 9788 "cmpl $0, %c[launched](%0) \n\t" 9789 /* Load guest registers. Don't clobber flags. */ 9790 "mov %c[rax](%0), %%" _ASM_AX " \n\t" 9791 "mov %c[rbx](%0), %%" _ASM_BX " \n\t" 9792 "mov %c[rdx](%0), %%" _ASM_DX " \n\t" 9793 "mov %c[rsi](%0), %%" _ASM_SI " \n\t" 9794 "mov %c[rdi](%0), %%" _ASM_DI " \n\t" 9795 "mov %c[rbp](%0), %%" _ASM_BP " \n\t" 9796 #ifdef CONFIG_X86_64 9797 "mov %c[r8](%0), %%r8 \n\t" 9798 "mov %c[r9](%0), %%r9 \n\t" 9799 "mov %c[r10](%0), %%r10 \n\t" 9800 "mov %c[r11](%0), %%r11 \n\t" 9801 "mov %c[r12](%0), %%r12 \n\t" 9802 "mov %c[r13](%0), %%r13 \n\t" 9803 "mov %c[r14](%0), %%r14 \n\t" 9804 "mov %c[r15](%0), %%r15 \n\t" 9805 #endif 9806 "mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */ 9807 9808 /* Enter guest mode */ 9809 "jne 1f \n\t" 9810 __ex(ASM_VMX_VMLAUNCH) "\n\t" 9811 "jmp 2f \n\t" 9812 "1: " __ex(ASM_VMX_VMRESUME) "\n\t" 9813 "2: " 9814 /* Save guest registers, load host registers, keep flags */ 9815 "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t" 9816 "pop %0 \n\t" 9817 "setbe %c[fail](%0)\n\t" 9818 "mov %%" _ASM_AX ", %c[rax](%0) \n\t" 9819 "mov %%" _ASM_BX ", %c[rbx](%0) \n\t" 9820 __ASM_SIZE(pop) " %c[rcx](%0) \n\t" 9821 "mov %%" _ASM_DX ", %c[rdx](%0) \n\t" 9822 "mov %%" _ASM_SI ", %c[rsi](%0) \n\t" 9823 "mov %%" _ASM_DI ", %c[rdi](%0) \n\t" 9824 "mov %%" _ASM_BP ", %c[rbp](%0) \n\t" 9825 #ifdef CONFIG_X86_64 9826 "mov %%r8, %c[r8](%0) \n\t" 9827 "mov %%r9, %c[r9](%0) \n\t" 9828 "mov %%r10, %c[r10](%0) \n\t" 9829 "mov %%r11, %c[r11](%0) \n\t" 9830 "mov %%r12, %c[r12](%0) \n\t" 9831 "mov %%r13, %c[r13](%0) \n\t" 9832 "mov %%r14, %c[r14](%0) \n\t" 9833 "mov %%r15, %c[r15](%0) \n\t" 9834 "xor %%r8d, %%r8d \n\t" 9835 "xor %%r9d, %%r9d \n\t" 9836 "xor %%r10d, %%r10d \n\t" 9837 "xor %%r11d, %%r11d \n\t" 9838 "xor %%r12d, %%r12d \n\t" 9839 "xor %%r13d, %%r13d \n\t" 9840 "xor %%r14d, %%r14d \n\t" 9841 "xor %%r15d, %%r15d \n\t" 9842 #endif 9843 "mov %%cr2, %%" _ASM_AX " \n\t" 9844 "mov %%" _ASM_AX ", %c[cr2](%0) \n\t" 9845 9846 "xor %%eax, %%eax \n\t" 9847 "xor %%ebx, %%ebx \n\t" 9848 "xor %%esi, %%esi \n\t" 9849 "xor %%edi, %%edi \n\t" 9850 "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" 9851 ".pushsection .rodata \n\t" 9852 ".global vmx_return \n\t" 9853 "vmx_return: " _ASM_PTR " 2b \n\t" 9854 ".popsection" 9855 : : "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp), 9856 [launched]"i"(offsetof(struct vcpu_vmx, __launched)), 9857 [fail]"i"(offsetof(struct vcpu_vmx, fail)), 9858 [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), 9859 [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])), 9860 [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])), 9861 [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])), 9862 [rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])), 9863 [rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])), 9864 [rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])), 9865 [rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])), 9866 #ifdef CONFIG_X86_64 9867 [r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])), 9868 [r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])), 9869 [r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])), 9870 [r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])), 9871 [r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])), 9872 [r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])), 9873 [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])), 9874 [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])), 9875 #endif 9876 [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)), 9877 [wordsize]"i"(sizeof(ulong)) 9878 : "cc", "memory" 9879 #ifdef CONFIG_X86_64 9880 , "rax", "rbx", "rdi" 9881 , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" 9882 #else 9883 , "eax", "ebx", "edi" 9884 #endif 9885 ); 9886 9887 /* 9888 * We do not use IBRS in the kernel. If this vCPU has used the 9889 * SPEC_CTRL MSR it may have left it on; save the value and 9890 * turn it off. This is much more efficient than blindly adding 9891 * it to the atomic save/restore list. Especially as the former 9892 * (Saving guest MSRs on vmexit) doesn't even exist in KVM. 9893 * 9894 * For non-nested case: 9895 * If the L01 MSR bitmap does not intercept the MSR, then we need to 9896 * save it. 9897 * 9898 * For nested case: 9899 * If the L02 MSR bitmap does not intercept the MSR, then we need to 9900 * save it. 9901 */ 9902 if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) 9903 vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); 9904 9905 if (vmx->spec_ctrl) 9906 native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); 9907 9908 vmx->guest_split_lock_ctrl = native_read_msr(MSR_TEST_CTL); 9909 native_wrmsrl(MSR_TEST_CTL, vmx->host_split_lock_ctrl); 9910 9911 /* Eliminate branch target predictions from guest mode */ 9912 vmexit_fill_RSB(); 9913 9914 /* All fields are clean at this point */ 9915 if (static_branch_unlikely(&enable_evmcs)) 9916 current_evmcs->hv_clean_fields |= 9917 HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; 9918 9919 /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ 9920 if (vmx->host_debugctlmsr) 9921 update_debugctlmsr(vmx->host_debugctlmsr); 9922 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
On 5/21/2018 10:32 PM, Paolo Bonzini wrote: > On 22/05/2018 04:51, Jingqi Liu wrote: >> A new control bit(bit 29) in the TEST_CTL MSR will be introduced >> to enable detection of split locks. >> >> When bit 29 of the TEST_CTL(33H) MSR is set, the processor >> causes an #AC exception to be issued instead of suppressing LOCK on >> bus(during split lock access). A previous control bit (bit 31) >> in this MSR causes the processor to disable LOCK# assertion for >> split locked accesses when set. When bits 29 and 31 are both set, >> bit 29 takes precedence. >> >> The release document ref below link: >> https://software.intel.com/sites/default/files/managed/c5/15/\ >> architecture-instruction-set-extensions-programming-reference.pdf >> This patch has a dependency on https://lkml.org/lkml/2018/5/14/1157 > Please follow the way spec_ctrl is implemented, so that there is no > wrmsr/rdmsr unless the guest is using the feature (and also no wrmsr > unless the guest and host settings don't match). > > How should the guest detect that the bits are available? Is there a > CPUID bit? > > Paolo Thanks for your review. The bit is in a MSR register(33H), and there isn't a corresponding CPUID bit. This patch has a dependency on https://lkml.org/lkml/2018/5/14/1157, which could enable or disable this feature in kernel. The bit could be modified in guest or host, so need to rdmsr before vmentry and after vmexit. And wrmsr if the guest and host settings don't match. Will improve in next version. Thanks Jingqi >> Signed-off-by: Jingqi Liu <jingqi.liu@intel.com> >> --- >> arch/x86/kvm/vmx.c | 13 +++++++++++++ >> 1 file changed, 13 insertions(+) >> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index 3f16965..07986e0 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -610,6 +610,8 @@ struct vcpu_vmx { >> >> u64 arch_capabilities; >> u64 spec_ctrl; >> + u64 guest_split_lock_ctrl; >> + u64 host_split_lock_ctrl; >> >> u32 vm_entry_controls_shadow; >> u32 vm_exit_controls_shadow; >> @@ -6013,6 +6015,8 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) >> vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); >> vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); >> >> + vmx->guest_split_lock_ctrl = 0; >> + >> if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) >> vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); >> >> @@ -6062,6 +6066,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) >> >> vmx->rmode.vm86_active = 0; >> vmx->spec_ctrl = 0; >> + vmx->guest_split_lock_ctrl = 0; >> >> vcpu->arch.microcode_version = 0x100000000ULL; >> vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); >> @@ -9725,6 +9730,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) >> if (vmx->spec_ctrl) >> native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); >> >> + vmx->host_split_lock_ctrl = native_read_msr(MSR_TEST_CTL); >> + native_wrmsrl(MSR_TEST_CTL, vmx->guest_split_lock_ctrl); >> + >> vmx->__launched = vmx->loaded_vmcs->launched; >> >> evmcs_rsp = static_branch_unlikely(&enable_evmcs) ? >> @@ -9874,6 +9882,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) >> if (vmx->spec_ctrl) >> native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); >> >> + vmx->guest_split_lock_ctrl = native_read_msr(MSR_TEST_CTL); >> + native_wrmsrl(MSR_TEST_CTL, vmx->host_split_lock_ctrl); >> + >> /* Eliminate branch target predictions from guest mode */ >> vmexit_fill_RSB(); >> >> @@ -10037,6 +10048,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) >> vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); >> vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); >> vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); >> + vmx_disable_intercept_for_msr(msr_bitmap, MSR_TEST_CTL, MSR_TYPE_RW); >> + >> vmx->msr_bitmap_mode = 0; >> >> vmx->loaded_vmcs = &vmx->vmcs01; >>
On 5/21/2018 10:32 PM, Paolo Bonzini wrote: > On 22/05/2018 04:51, Jingqi Liu wrote: >> A new control bit(bit 29) in the TEST_CTL MSR will be introduced >> to enable detection of split locks. >> >> When bit 29 of the TEST_CTL(33H) MSR is set, the processor >> causes an #AC exception to be issued instead of suppressing LOCK on >> bus(during split lock access). A previous control bit (bit 31) >> in this MSR causes the processor to disable LOCK# assertion for >> split locked accesses when set. When bits 29 and 31 are both set, >> bit 29 takes precedence. >> >> The release document ref below link: >> https://software.intel.com/sites/default/files/managed/c5/15/\ >> architecture-instruction-set-extensions-programming-reference.pdf >> This patch has a dependency on https://lkml.org/lkml/2018/5/14/1157 > Please follow the way spec_ctrl is implemented, Hi Paolo, It could not follow the way spec_ctrl is implemented. The guest will use the setting in TEST_CTL MSR which is set by host. If the bit is set in the host, an #AC exception will be caused when the guest starts. Actually, the bit is disabled when guest starts. And it can be enabled in guest kernel. Thanks Jingqi > so that there is no > wrmsr/rdmsr unless the guest is using the feature (and also no wrmsr > unless the guest and host settings don't match). > > How should the guest detect that the bits are available? Is there a > CPUID bit? > > Paolo > >> Signed-off-by: Jingqi Liu <jingqi.liu@intel.com> >> --- >> arch/x86/kvm/vmx.c | 13 +++++++++++++ >> 1 file changed, 13 insertions(+) >> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index 3f16965..07986e0 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -610,6 +610,8 @@ struct vcpu_vmx { >> >> u64 arch_capabilities; >> u64 spec_ctrl; >> + u64 guest_split_lock_ctrl; >> + u64 host_split_lock_ctrl; >> >> u32 vm_entry_controls_shadow; >> u32 vm_exit_controls_shadow; >> @@ -6013,6 +6015,8 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) >> vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); >> vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); >> >> + vmx->guest_split_lock_ctrl = 0; >> + >> if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) >> vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); >> >> @@ -6062,6 +6066,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) >> >> vmx->rmode.vm86_active = 0; >> vmx->spec_ctrl = 0; >> + vmx->guest_split_lock_ctrl = 0; >> >> vcpu->arch.microcode_version = 0x100000000ULL; >> vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); >> @@ -9725,6 +9730,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) >> if (vmx->spec_ctrl) >> native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); >> >> + vmx->host_split_lock_ctrl = native_read_msr(MSR_TEST_CTL); >> + native_wrmsrl(MSR_TEST_CTL, vmx->guest_split_lock_ctrl); >> + >> vmx->__launched = vmx->loaded_vmcs->launched; >> >> evmcs_rsp = static_branch_unlikely(&enable_evmcs) ? >> @@ -9874,6 +9882,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) >> if (vmx->spec_ctrl) >> native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); >> >> + vmx->guest_split_lock_ctrl = native_read_msr(MSR_TEST_CTL); >> + native_wrmsrl(MSR_TEST_CTL, vmx->host_split_lock_ctrl); >> + >> /* Eliminate branch target predictions from guest mode */ >> vmexit_fill_RSB(); >> >> @@ -10037,6 +10048,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) >> vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); >> vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); >> vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); >> + vmx_disable_intercept_for_msr(msr_bitmap, MSR_TEST_CTL, MSR_TYPE_RW); >> + >> vmx->msr_bitmap_mode = 0; >> >> vmx->loaded_vmcs = &vmx->vmcs01; >>
On 22/05/2018 08:20, Liu, Jingqi wrote: >> >> >> How should the guest detect that the bits are available? Is there a >> CPUID bit? >> >> Paolo > Thanks for your review. > The bit is in a MSR register(33H), and there isn't a corresponding > CPUID bit. > This patch has a dependency on https://lkml.org/lkml/2018/5/14/1157, > which could enable or disable this feature in kernel. > The bit could be modified in guest or host, so need to rdmsr before > vmentry and after vmexit. Yes, but only do that after the first time the guest uses the MSR, or perhaps we could use some trick to limit the cost of vmexits for guests that write to the MSR very rarely. Maybe even require userspace to do a ioctl, for example KVM_ENABLE_CAP, in order to let the guest see the 0x33 MSR (in which case, the guest would pay the price on every vmentry/vmexit). Another optimization possibility is to use a static key so that, if no guest can see the 0x33 MSR, the cost is really zero. Note that this is not premature optimization. vmexit time is really the hottest path in KVM, even removing a local_irq_save/restore can provide a measurable improvement there! So you cannot add 200 clock cycles or worse for an MSR that is essentially a debugging tool. Paolo > And wrmsr if the guest and host settings don't match. > Will improve in next version.
On 22/05/2018 09:16, Liu, Jingqi wrote: > Hi Paolo, It could not follow the way spec_ctrl is implemented. The > guest will use the setting in TEST_CTL MSR which is set by host. If > the bit is set in the host, an #AC exception will be caused when > the guest starts. Actually, the bit is disabled when guest starts. > And it can be enabled in guest kernel. Check out the new code that was added for SSBD. It now supports different SPEC_CTRL settings in the guest and in the host. Paolo
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3f16965..07986e0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -610,6 +610,8 @@ struct vcpu_vmx { u64 arch_capabilities; u64 spec_ctrl; + u64 guest_split_lock_ctrl; + u64 host_split_lock_ctrl; u32 vm_entry_controls_shadow; u32 vm_exit_controls_shadow; @@ -6013,6 +6015,8 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); + vmx->guest_split_lock_ctrl = 0; + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); @@ -6062,6 +6066,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx->rmode.vm86_active = 0; vmx->spec_ctrl = 0; + vmx->guest_split_lock_ctrl = 0; vcpu->arch.microcode_version = 0x100000000ULL; vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); @@ -9725,6 +9730,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (vmx->spec_ctrl) native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + vmx->host_split_lock_ctrl = native_read_msr(MSR_TEST_CTL); + native_wrmsrl(MSR_TEST_CTL, vmx->guest_split_lock_ctrl); + vmx->__launched = vmx->loaded_vmcs->launched; evmcs_rsp = static_branch_unlikely(&enable_evmcs) ? @@ -9874,6 +9882,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (vmx->spec_ctrl) native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); + vmx->guest_split_lock_ctrl = native_read_msr(MSR_TEST_CTL); + native_wrmsrl(MSR_TEST_CTL, vmx->host_split_lock_ctrl); + /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); @@ -10037,6 +10048,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_TEST_CTL, MSR_TYPE_RW); + vmx->msr_bitmap_mode = 0; vmx->loaded_vmcs = &vmx->vmcs01;
A new control bit(bit 29) in the TEST_CTL MSR will be introduced to enable detection of split locks. When bit 29 of the TEST_CTL(33H) MSR is set, the processor causes an #AC exception to be issued instead of suppressing LOCK on bus(during split lock access). A previous control bit (bit 31) in this MSR causes the processor to disable LOCK# assertion for split locked accesses when set. When bits 29 and 31 are both set, bit 29 takes precedence. The release document ref below link: https://software.intel.com/sites/default/files/managed/c5/15/\ architecture-instruction-set-extensions-programming-reference.pdf This patch has a dependency on https://lkml.org/lkml/2018/5/14/1157 Signed-off-by: Jingqi Liu <jingqi.liu@intel.com> --- arch/x86/kvm/vmx.c | 13 +++++++++++++ 1 file changed, 13 insertions(+)