Message ID | 1358952446-17416-3-git-send-email-yang.z.zhang@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Jan 23, 2013 at 10:47:25PM +0800, Yang Zhang wrote: > From: Yang Zhang <yang.z.zhang@Intel.com> > > basically to benefit from apicv, we need to enable virtualized x2apic mode. > Currently, we only enable it when guest is really using x2apic. > > Also, clear MSR bitmap for corresponding x2apic MSRs when guest enabled x2apic: > 0x800 - 0x8ff: no read intercept for apicv register virtualization, > except APIC ID and TMCCT which need software's assistance to > get right value. > > Signed-off-by: Kevin Tian <kevin.tian@intel.com> > Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com> > --- > arch/x86/include/asm/kvm_host.h | 1 + > arch/x86/include/asm/vmx.h | 1 + > arch/x86/kvm/lapic.c | 14 ++- > arch/x86/kvm/svm.c | 6 + > arch/x86/kvm/vmx.c | 203 +++++++++++++++++++++++++++++++++++---- > 5 files changed, 201 insertions(+), 24 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index f75e1fe..e1306c1 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -692,6 +692,7 @@ struct kvm_x86_ops { > void (*enable_nmi_window)(struct kvm_vcpu *vcpu); > void (*enable_irq_window)(struct kvm_vcpu *vcpu); > void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); > + void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); > int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); > int (*get_tdp_level)(void); > u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > index 44c3f7e..0a54df0 100644 > --- a/arch/x86/include/asm/vmx.h > +++ b/arch/x86/include/asm/vmx.h > @@ -139,6 +139,7 @@ > #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 > #define SECONDARY_EXEC_ENABLE_EPT 0x00000002 > #define SECONDARY_EXEC_RDTSCP 0x00000008 > +#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 > #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 > #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 > #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index 0664c13..83a9547 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -1303,6 +1303,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) > > void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) > { > + u64 old_value = vcpu->arch.apic_base; > struct kvm_lapic *apic = vcpu->arch.apic; > > if (!apic) { > @@ -1324,11 +1325,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) > value &= ~MSR_IA32_APICBASE_BSP; > > vcpu->arch.apic_base = value; > - if (apic_x2apic_mode(apic)) { > - u32 id = kvm_apic_id(apic); > - u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); > - kvm_apic_set_ldr(apic, ldr); > + if ((old_value ^ value) & X2APIC_ENABLE) { > + if (value & X2APIC_ENABLE) { > + u32 id = kvm_apic_id(apic); > + u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); > + kvm_apic_set_ldr(apic, ldr); > + kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); > + } else > + kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); > } > + > apic->base_address = apic->vcpu->arch.apic_base & > MSR_IA32_APICBASE_BASE; > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index d29d3cd..38407e9 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -3571,6 +3571,11 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) > set_cr_intercept(svm, INTERCEPT_CR8_WRITE); > } > > +static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) > +{ > + return; > +} > + > static int svm_nmi_allowed(struct kvm_vcpu *vcpu) > { > struct vcpu_svm *svm = to_svm(vcpu); > @@ -4290,6 +4295,7 @@ static struct kvm_x86_ops svm_x86_ops = { > .enable_nmi_window = enable_nmi_window, > .enable_irq_window = enable_irq_window, > .update_cr8_intercept = update_cr8_intercept, > + .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, > > .set_tss_addr = svm_set_tss_addr, > .get_tdp_level = get_npt_level, > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 8a8116a..c2bc989 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -643,6 +643,8 @@ static unsigned long *vmx_io_bitmap_a; > static unsigned long *vmx_io_bitmap_b; > static unsigned long *vmx_msr_bitmap_legacy; > static unsigned long *vmx_msr_bitmap_longmode; > +static unsigned long *vmx_msr_bitmap_legacy_x2apic; > +static unsigned long *vmx_msr_bitmap_longmode_x2apic; > > static bool cpu_has_load_ia32_efer; > static bool cpu_has_load_perf_global_ctrl; > @@ -767,6 +769,12 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void) > SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; > } > > +static inline bool cpu_has_vmx_virtualize_x2apic_mode(void) > +{ > + return vmcs_config.cpu_based_2nd_exec_ctrl & > + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; > +} > + > static inline bool cpu_has_vmx_apic_register_virt(void) > { > return vmcs_config.cpu_based_2nd_exec_ctrl & > @@ -1830,6 +1838,24 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) > vmx->guest_msrs[from] = tmp; > } > > +static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) > +{ > + unsigned long *msr_bitmap; > + > + if (vcpu->arch.apic_base & X2APIC_ENABLE) if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->apic)) > + if (is_long_mode(vcpu)) > + msr_bitmap = vmx_msr_bitmap_longmode_x2apic; > + else > + msr_bitmap = vmx_msr_bitmap_legacy_x2apic; > + else > + if (is_long_mode(vcpu)) > + msr_bitmap = vmx_msr_bitmap_longmode; > + else > + msr_bitmap = vmx_msr_bitmap_legacy; > + > + vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); > +} > + > /* > * Set up the vmcs to automatically save and restore system > * msrs. Don't touch the 64-bit msrs if the guest is in legacy > @@ -1838,7 +1864,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) > static void setup_msrs(struct vcpu_vmx *vmx) > { > int save_nmsrs, index; > - unsigned long *msr_bitmap; > > save_nmsrs = 0; > #ifdef CONFIG_X86_64 > @@ -1870,14 +1895,8 @@ static void setup_msrs(struct vcpu_vmx *vmx) > > vmx->save_nmsrs = save_nmsrs; > > - if (cpu_has_vmx_msr_bitmap()) { > - if (is_long_mode(&vmx->vcpu)) > - msr_bitmap = vmx_msr_bitmap_longmode; > - else > - msr_bitmap = vmx_msr_bitmap_legacy; > - > - vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); > - } > + if (cpu_has_vmx_msr_bitmap()) > + vmx_set_msr_bitmap(&vmx->vcpu); > } > > /* > @@ -2543,6 +2562,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) > if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { > min2 = 0; > opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | > + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | > SECONDARY_EXEC_WBINVD_EXITING | > SECONDARY_EXEC_ENABLE_VPID | > SECONDARY_EXEC_ENABLE_EPT | > @@ -2564,7 +2584,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) > > if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) > _cpu_based_2nd_exec_control &= ~( > - SECONDARY_EXEC_APIC_REGISTER_VIRT); > + SECONDARY_EXEC_APIC_REGISTER_VIRT | > + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); > > if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { > /* CR3 accesses and invlpg don't need to cause VM Exits when EPT > @@ -3724,7 +3745,10 @@ static void free_vpid(struct vcpu_vmx *vmx) > spin_unlock(&vmx_vpid_lock); > } > > -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) > +#define MSR_TYPE_R 1 > +#define MSR_TYPE_W 2 > +static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, > + u32 msr, int type) > { > int f = sizeof(unsigned long); > > @@ -3737,20 +3761,93 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) > * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. > */ > if (msr <= 0x1fff) { > - __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */ > - __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */ > + if (type & MSR_TYPE_R) > + /* read-low */ > + __clear_bit(msr, msr_bitmap + 0x000 / f); > + > + if (type & MSR_TYPE_W) > + /* write-low */ > + __clear_bit(msr, msr_bitmap + 0x800 / f); > + > } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { > msr &= 0x1fff; > - __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */ > - __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */ > + if (type & MSR_TYPE_R) > + /* read-high */ > + __clear_bit(msr, msr_bitmap + 0x400 / f); > + > + if (type & MSR_TYPE_W) > + /* write-high */ > + __clear_bit(msr, msr_bitmap + 0xc00 / f); > + > + } > +} > + > +static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, > + u32 msr, int type) > +{ > + int f = sizeof(unsigned long); > + > + if (!cpu_has_vmx_msr_bitmap()) > + return; > + > + /* > + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals > + * have the write-low and read-high bitmap offsets the wrong way round. > + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. > + */ > + if (msr <= 0x1fff) { > + if (type & MSR_TYPE_R) > + /* read-low */ > + __set_bit(msr, msr_bitmap + 0x000 / f); > + > + if (type & MSR_TYPE_W) > + /* write-low */ > + __set_bit(msr, msr_bitmap + 0x800 / f); > + > + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { > + msr &= 0x1fff; > + if (type & MSR_TYPE_R) > + /* read-high */ > + __set_bit(msr, msr_bitmap + 0x400 / f); > + > + if (type & MSR_TYPE_W) > + /* write-high */ > + __set_bit(msr, msr_bitmap + 0xc00 / f); > + > } > } > > static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) > { > if (!longmode_only) > - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr); > - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr); > + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, > + msr, MSR_TYPE_R | MSR_TYPE_W); > + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, > + msr, MSR_TYPE_R | MSR_TYPE_W); > +} > + > +static void vmx_enable_intercept_msr_read_x2apic(u32 msr) > +{ > + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, > + msr, MSR_TYPE_R); > + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, > + msr, MSR_TYPE_R); > +} > + > +static void vmx_disable_intercept_msr_read_x2apic(u32 msr) > +{ > + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, > + msr, MSR_TYPE_R); > + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, > + msr, MSR_TYPE_R); > +} > + > +static void vmx_disable_intercept_msr_write_x2apic(u32 msr) > +{ > + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, > + msr, MSR_TYPE_W); > + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, > + msr, MSR_TYPE_W); > } > > /* > @@ -3848,6 +3945,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) > exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; > if (!enable_apicv_reg || !irqchip_in_kernel(vmx->vcpu.kvm)) > exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT; > + exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; > return exec_control; > } > > @@ -6103,6 +6201,40 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) > vmcs_write32(TPR_THRESHOLD, irr); > } > > +static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) > +{ > + u32 exec_control, sec_exec_control; > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + > + /* There is not point to enable virtualize x2apic without enable > + * apicv > + */ > + if (!cpu_has_vmx_virtualize_x2apic_mode() || !enable_apicv_reg) > + return; > + > + if (set) { Just add vm_need_tpr_shadow() to the if above. > + exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); > + /* virtualize x2apic mode relies on tpr shadow */ > + if (!(exec_control & CPU_BASED_TPR_SHADOW)) > + return; > + } > + > + sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); > + > + if (set) { > + sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; > + sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; > + } else { > + sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; > + if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) How enable_apicv_reg can be true without virtualized apic access? > + sec_exec_control |= > + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; > + } > + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); > + > + vmx_set_msr_bitmap(vcpu); > +} > + > static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) > { > u32 exit_intr_info; > @@ -7366,6 +7498,7 @@ static struct kvm_x86_ops vmx_x86_ops = { > .enable_nmi_window = enable_nmi_window, > .enable_irq_window = enable_irq_window, > .update_cr8_intercept = update_cr8_intercept, > + .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, > > .set_tss_addr = vmx_set_tss_addr, > .get_tdp_level = get_ept_level, > @@ -7398,7 +7531,7 @@ static struct kvm_x86_ops vmx_x86_ops = { > > static int __init vmx_init(void) > { > - int r, i; > + int r, i, msr; > > rdmsrl_safe(MSR_EFER, &host_efer); > > @@ -7419,11 +7552,19 @@ static int __init vmx_init(void) > if (!vmx_msr_bitmap_legacy) > goto out1; > > + vmx_msr_bitmap_legacy_x2apic = > + (unsigned long *)__get_free_page(GFP_KERNEL); > + if (!vmx_msr_bitmap_legacy_x2apic) > + goto out2; > > vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); > if (!vmx_msr_bitmap_longmode) > - goto out2; > + goto out3; > > + vmx_msr_bitmap_longmode_x2apic = > + (unsigned long *)__get_free_page(GFP_KERNEL); > + if (!vmx_msr_bitmap_longmode_x2apic) > + goto out4; > > /* > * Allow direct access to the PC debug port (it is often used for I/O > @@ -7455,6 +7596,24 @@ static int __init vmx_init(void) > vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); > vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); > vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); > + memcpy(vmx_msr_bitmap_legacy_x2apic, > + vmx_msr_bitmap_legacy, PAGE_SIZE); > + memcpy(vmx_msr_bitmap_longmode_x2apic, > + vmx_msr_bitmap_longmode, PAGE_SIZE); > + > + if (enable_apicv_reg) { > + for (msr = 0x800; msr <= 0x8ff; msr++) > + vmx_disable_intercept_msr_read_x2apic(msr); > + > + /* According SDM, in x2apic mode, the whole id reg is used. > + * But in KVM, it only use the highest eight bits. Need to > + * intercept it */ > + vmx_enable_intercept_msr_read_x2apic(0x802); > + /* TMCCT */ > + vmx_enable_intercept_msr_read_x2apic(0x839); > + /* TPR */ > + vmx_disable_intercept_msr_write_x2apic(0x808); > + } > > if (enable_ept) { > kvm_mmu_set_mask_ptes(0ull, > @@ -7468,8 +7627,10 @@ static int __init vmx_init(void) > > return 0; > > -out3: > +out4: > free_page((unsigned long)vmx_msr_bitmap_longmode); > +out3: > + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); > out2: > free_page((unsigned long)vmx_msr_bitmap_legacy); > out1: > @@ -7481,6 +7642,8 @@ out: > > static void __exit vmx_exit(void) > { > + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); > + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); > free_page((unsigned long)vmx_msr_bitmap_legacy); > free_page((unsigned long)vmx_msr_bitmap_longmode); > free_page((unsigned long)vmx_io_bitmap_b); > -- > 1.7.1 -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Gleb Natapov wrote on 2013-01-24: > On Wed, Jan 23, 2013 at 10:47:25PM +0800, Yang Zhang wrote: >> From: Yang Zhang <yang.z.zhang@Intel.com> >> >> basically to benefit from apicv, we need to enable virtualized x2apic mode. >> Currently, we only enable it when guest is really using x2apic. >> >> Also, clear MSR bitmap for corresponding x2apic MSRs when guest enabled >> x2apic: 0x800 - 0x8ff: no read intercept for apicv register >> virtualization, >> except APIC ID and TMCCT which need software's >> assistance to get right value. >> Signed-off-by: Kevin Tian <kevin.tian@intel.com> >> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com> >> --- >> arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/asm/vmx.h >> | 1 + arch/x86/kvm/lapic.c | 14 ++- >> arch/x86/kvm/svm.c | 6 + arch/x86/kvm/vmx.c >> | 203 +++++++++++++++++++++++++++++++++++---- 5 files changed, 201 >> insertions(+), 24 deletions(-) >> diff --git a/arch/x86/include/asm/kvm_host.h >> b/arch/x86/include/asm/kvm_host.h index f75e1fe..e1306c1 100644 --- >> a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h >> @@ -692,6 +692,7 @@ struct kvm_x86_ops { >> void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void >> (*enable_irq_window)(struct kvm_vcpu *vcpu); void >> (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); >> + void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); >> int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int >> (*get_tdp_level)(void); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, >> gfn_t gfn, bool is_mmio); >> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h >> index 44c3f7e..0a54df0 100644 >> --- a/arch/x86/include/asm/vmx.h >> +++ b/arch/x86/include/asm/vmx.h >> @@ -139,6 +139,7 @@ >> #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 #define >> SECONDARY_EXEC_ENABLE_EPT 0x00000002 #define >> SECONDARY_EXEC_RDTSCP 0x00000008 +#define >> SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 #define >> SECONDARY_EXEC_ENABLE_VPID 0x00000020 #define >> SECONDARY_EXEC_WBINVD_EXITING 0x00000040 #define >> SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 >> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c >> index 0664c13..83a9547 100644 >> --- a/arch/x86/kvm/lapic.c >> +++ b/arch/x86/kvm/lapic.c >> @@ -1303,6 +1303,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) >> >> void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) { + u64 >> old_value = vcpu->arch.apic_base; struct kvm_lapic *apic = >> vcpu->arch.apic; >> >> if (!apic) { >> @@ -1324,11 +1325,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, > u64 value) >> value &= ~MSR_IA32_APICBASE_BSP; >> >> vcpu->arch.apic_base = value; >> - if (apic_x2apic_mode(apic)) { >> - u32 id = kvm_apic_id(apic); >> - u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); >> - kvm_apic_set_ldr(apic, ldr); >> + if ((old_value ^ value) & X2APIC_ENABLE) { >> + if (value & X2APIC_ENABLE) { >> + u32 id = kvm_apic_id(apic); >> + u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); >> + kvm_apic_set_ldr(apic, ldr); >> + kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); >> + } else >> + kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); >> } + apic->base_address = apic->vcpu->arch.apic_base & >> MSR_IA32_APICBASE_BASE; >> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c >> index d29d3cd..38407e9 100644 >> --- a/arch/x86/kvm/svm.c >> +++ b/arch/x86/kvm/svm.c >> @@ -3571,6 +3571,11 @@ static void update_cr8_intercept(struct kvm_vcpu > *vcpu, int tpr, int irr) >> set_cr_intercept(svm, INTERCEPT_CR8_WRITE); >> } >> +static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) >> +{ >> + return; >> +} >> + >> static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm >> *svm = to_svm(vcpu); @@ -4290,6 +4295,7 @@ static struct kvm_x86_ops >> svm_x86_ops = { .enable_nmi_window = enable_nmi_window, >> .enable_irq_window = enable_irq_window, .update_cr8_intercept = >> update_cr8_intercept, >> + .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, >> >> .set_tss_addr = svm_set_tss_addr, >> .get_tdp_level = get_npt_level, >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index 8a8116a..c2bc989 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -643,6 +643,8 @@ static unsigned long *vmx_io_bitmap_a; >> static unsigned long *vmx_io_bitmap_b; >> static unsigned long *vmx_msr_bitmap_legacy; >> static unsigned long *vmx_msr_bitmap_longmode; >> +static unsigned long *vmx_msr_bitmap_legacy_x2apic; >> +static unsigned long *vmx_msr_bitmap_longmode_x2apic; >> >> static bool cpu_has_load_ia32_efer; >> static bool cpu_has_load_perf_global_ctrl; >> @@ -767,6 +769,12 @@ static inline bool > cpu_has_vmx_virtualize_apic_accesses(void) >> SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; >> } >> +static inline bool cpu_has_vmx_virtualize_x2apic_mode(void) >> +{ >> + return vmcs_config.cpu_based_2nd_exec_ctrl & >> + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; >> +} >> + >> static inline bool cpu_has_vmx_apic_register_virt(void) >> { >> return vmcs_config.cpu_based_2nd_exec_ctrl & >> @@ -1830,6 +1838,24 @@ static void move_msr_up(struct vcpu_vmx *vmx, int > from, int to) >> vmx->guest_msrs[from] = tmp; >> } >> +static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) >> +{ >> + unsigned long *msr_bitmap; >> + >> + if (vcpu->arch.apic_base & X2APIC_ENABLE) >> if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->apic)) Sure. >> + if (is_long_mode(vcpu)) >> + msr_bitmap = vmx_msr_bitmap_longmode_x2apic; >> + else >> + msr_bitmap = vmx_msr_bitmap_legacy_x2apic; >> + else >> + if (is_long_mode(vcpu)) >> + msr_bitmap = vmx_msr_bitmap_longmode; >> + else >> + msr_bitmap = vmx_msr_bitmap_legacy; >> + >> + vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); >> +} >> + >> /* >> * Set up the vmcs to automatically save and restore system >> * msrs. Don't touch the 64-bit msrs if the guest is in legacy >> @@ -1838,7 +1864,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int > from, int to) >> static void setup_msrs(struct vcpu_vmx *vmx) >> { >> int save_nmsrs, index; >> - unsigned long *msr_bitmap; >> >> save_nmsrs = 0; >> #ifdef CONFIG_X86_64 >> @@ -1870,14 +1895,8 @@ static void setup_msrs(struct vcpu_vmx *vmx) >> >> vmx->save_nmsrs = save_nmsrs; >> - if (cpu_has_vmx_msr_bitmap()) { >> - if (is_long_mode(&vmx->vcpu)) >> - msr_bitmap = vmx_msr_bitmap_longmode; >> - else >> - msr_bitmap = vmx_msr_bitmap_legacy; >> - >> - vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); >> - } >> + if (cpu_has_vmx_msr_bitmap()) >> + vmx_set_msr_bitmap(&vmx->vcpu); >> } >> >> /* >> @@ -2543,6 +2562,7 @@ static __init int setup_vmcs_config(struct > vmcs_config *vmcs_conf) >> if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) >> { min2 = 0; opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | >> + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | >> SECONDARY_EXEC_WBINVD_EXITING | SECONDARY_EXEC_ENABLE_VPID | >> SECONDARY_EXEC_ENABLE_EPT | >> @@ -2564,7 +2584,8 @@ static __init int setup_vmcs_config(struct >> vmcs_config *vmcs_conf) >> >> if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) >> _cpu_based_2nd_exec_control &= ~( >> - SECONDARY_EXEC_APIC_REGISTER_VIRT); >> + SECONDARY_EXEC_APIC_REGISTER_VIRT | >> + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); >> >> if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { /* >> CR3 accesses and invlpg don't need to cause VM Exits when EPT @@ >> -3724,7 +3745,10 @@ static void free_vpid(struct vcpu_vmx *vmx) >> spin_unlock(&vmx_vpid_lock); } >> -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, >> u32 msr) +#define MSR_TYPE_R 1 +#define MSR_TYPE_W 2 +static void >> __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 >> msr, int type) >> { >> int f = sizeof(unsigned long); >> @@ -3737,20 +3761,93 @@ static void > __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) >> * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. >> */ >> if (msr <= 0x1fff) { >> - __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */ >> - __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */ >> + if (type & MSR_TYPE_R) >> + /* read-low */ >> + __clear_bit(msr, msr_bitmap + 0x000 / f); >> + >> + if (type & MSR_TYPE_W) >> + /* write-low */ >> + __clear_bit(msr, msr_bitmap + 0x800 / f); >> + >> } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { >> msr &= 0x1fff; >> - __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */ >> - __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */ >> + if (type & MSR_TYPE_R) >> + /* read-high */ >> + __clear_bit(msr, msr_bitmap + 0x400 / f); >> + >> + if (type & MSR_TYPE_W) >> + /* write-high */ >> + __clear_bit(msr, msr_bitmap + 0xc00 / f); >> + >> + } >> +} >> + >> +static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, >> + u32 msr, int type) >> +{ >> + int f = sizeof(unsigned long); >> + >> + if (!cpu_has_vmx_msr_bitmap()) >> + return; >> + >> + /* >> + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals >> + * have the write-low and read-high bitmap offsets the wrong way round. >> + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. >> + */ >> + if (msr <= 0x1fff) { >> + if (type & MSR_TYPE_R) >> + /* read-low */ >> + __set_bit(msr, msr_bitmap + 0x000 / f); >> + >> + if (type & MSR_TYPE_W) >> + /* write-low */ >> + __set_bit(msr, msr_bitmap + 0x800 / f); >> + >> + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { >> + msr &= 0x1fff; >> + if (type & MSR_TYPE_R) >> + /* read-high */ >> + __set_bit(msr, msr_bitmap + 0x400 / f); >> + >> + if (type & MSR_TYPE_W) >> + /* write-high */ >> + __set_bit(msr, msr_bitmap + 0xc00 / f); >> + >> } >> } >> >> static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) >> { >> if (!longmode_only) >> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr); >> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr); >> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, >> + msr, MSR_TYPE_R | MSR_TYPE_W); >> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, >> + msr, MSR_TYPE_R | MSR_TYPE_W); >> +} >> + >> +static void vmx_enable_intercept_msr_read_x2apic(u32 msr) >> +{ >> + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, >> + msr, MSR_TYPE_R); >> + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, >> + msr, MSR_TYPE_R); >> +} >> + >> +static void vmx_disable_intercept_msr_read_x2apic(u32 msr) >> +{ >> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, >> + msr, MSR_TYPE_R); >> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, >> + msr, MSR_TYPE_R); >> +} >> + >> +static void vmx_disable_intercept_msr_write_x2apic(u32 msr) >> +{ >> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, >> + msr, MSR_TYPE_W); >> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, >> + msr, MSR_TYPE_W); >> } >> >> /* >> @@ -3848,6 +3945,7 @@ static u32 vmx_secondary_exec_control(struct > vcpu_vmx *vmx) >> exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; if >> (!enable_apicv_reg || !irqchip_in_kernel(vmx->vcpu.kvm)) >> exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT; + exec_control >> &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; return exec_control; } >> @@ -6103,6 +6201,40 @@ static void update_cr8_intercept(struct kvm_vcpu > *vcpu, int tpr, int irr) >> vmcs_write32(TPR_THRESHOLD, irr); >> } >> +static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) >> +{ >> + u32 exec_control, sec_exec_control; >> + struct vcpu_vmx *vmx = to_vmx(vcpu); >> + >> + /* There is not point to enable virtualize x2apic without enable >> + * apicv >> + */ >> + if (!cpu_has_vmx_virtualize_x2apic_mode() || !enable_apicv_reg) >> + return; >> + >> + if (set) { > Just add vm_need_tpr_shadow() to the if above. Sure. >> + exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + /* >> virtualize x2apic mode relies on tpr shadow */ + if (!(exec_control & >> CPU_BASED_TPR_SHADOW)) + return; + } + + sec_exec_control = >> vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + + if (set) { >> + sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; >> + sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; + } else >> { + sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; + if >> (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) > How enable_apicv_reg can be true without virtualized apic access? Right. This check always true here. will drop it. Best regards, Yang -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f75e1fe..e1306c1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -692,6 +692,7 @@ struct kvm_x86_ops { void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); + void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 44c3f7e..0a54df0 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -139,6 +139,7 @@ #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 #define SECONDARY_EXEC_ENABLE_EPT 0x00000002 #define SECONDARY_EXEC_RDTSCP 0x00000008 +#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0664c13..83a9547 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1303,6 +1303,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) { + u64 old_value = vcpu->arch.apic_base; struct kvm_lapic *apic = vcpu->arch.apic; if (!apic) { @@ -1324,11 +1325,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) value &= ~MSR_IA32_APICBASE_BSP; vcpu->arch.apic_base = value; - if (apic_x2apic_mode(apic)) { - u32 id = kvm_apic_id(apic); - u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); - kvm_apic_set_ldr(apic, ldr); + if ((old_value ^ value) & X2APIC_ENABLE) { + if (value & X2APIC_ENABLE) { + u32 id = kvm_apic_id(apic); + u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); + kvm_apic_set_ldr(apic, ldr); + kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); + } else + kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); } + apic->base_address = apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_BASE; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d29d3cd..38407e9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3571,6 +3571,11 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) set_cr_intercept(svm, INTERCEPT_CR8_WRITE); } +static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) +{ + return; +} + static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4290,6 +4295,7 @@ static struct kvm_x86_ops svm_x86_ops = { .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, + .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, .set_tss_addr = svm_set_tss_addr, .get_tdp_level = get_npt_level, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8a8116a..c2bc989 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -643,6 +643,8 @@ static unsigned long *vmx_io_bitmap_a; static unsigned long *vmx_io_bitmap_b; static unsigned long *vmx_msr_bitmap_legacy; static unsigned long *vmx_msr_bitmap_longmode; +static unsigned long *vmx_msr_bitmap_legacy_x2apic; +static unsigned long *vmx_msr_bitmap_longmode_x2apic; static bool cpu_has_load_ia32_efer; static bool cpu_has_load_perf_global_ctrl; @@ -767,6 +769,12 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void) SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; } +static inline bool cpu_has_vmx_virtualize_x2apic_mode(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; +} + static inline bool cpu_has_vmx_apic_register_virt(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -1830,6 +1838,24 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) vmx->guest_msrs[from] = tmp; } +static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) +{ + unsigned long *msr_bitmap; + + if (vcpu->arch.apic_base & X2APIC_ENABLE) + if (is_long_mode(vcpu)) + msr_bitmap = vmx_msr_bitmap_longmode_x2apic; + else + msr_bitmap = vmx_msr_bitmap_legacy_x2apic; + else + if (is_long_mode(vcpu)) + msr_bitmap = vmx_msr_bitmap_longmode; + else + msr_bitmap = vmx_msr_bitmap_legacy; + + vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); +} + /* * Set up the vmcs to automatically save and restore system * msrs. Don't touch the 64-bit msrs if the guest is in legacy @@ -1838,7 +1864,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) static void setup_msrs(struct vcpu_vmx *vmx) { int save_nmsrs, index; - unsigned long *msr_bitmap; save_nmsrs = 0; #ifdef CONFIG_X86_64 @@ -1870,14 +1895,8 @@ static void setup_msrs(struct vcpu_vmx *vmx) vmx->save_nmsrs = save_nmsrs; - if (cpu_has_vmx_msr_bitmap()) { - if (is_long_mode(&vmx->vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode; - else - msr_bitmap = vmx_msr_bitmap_legacy; - - vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); - } + if (cpu_has_vmx_msr_bitmap()) + vmx_set_msr_bitmap(&vmx->vcpu); } /* @@ -2543,6 +2562,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { min2 = 0; opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_WBINVD_EXITING | SECONDARY_EXEC_ENABLE_VPID | SECONDARY_EXEC_ENABLE_EPT | @@ -2564,7 +2584,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) _cpu_based_2nd_exec_control &= ~( - SECONDARY_EXEC_APIC_REGISTER_VIRT); + SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { /* CR3 accesses and invlpg don't need to cause VM Exits when EPT @@ -3724,7 +3745,10 @@ static void free_vpid(struct vcpu_vmx *vmx) spin_unlock(&vmx_vpid_lock); } -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) +#define MSR_TYPE_R 1 +#define MSR_TYPE_W 2 +static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) { int f = sizeof(unsigned long); @@ -3737,20 +3761,93 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. */ if (msr <= 0x1fff) { - __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */ - __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */ + if (type & MSR_TYPE_R) + /* read-low */ + __clear_bit(msr, msr_bitmap + 0x000 / f); + + if (type & MSR_TYPE_W) + /* write-low */ + __clear_bit(msr, msr_bitmap + 0x800 / f); + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { msr &= 0x1fff; - __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */ - __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */ + if (type & MSR_TYPE_R) + /* read-high */ + __clear_bit(msr, msr_bitmap + 0x400 / f); + + if (type & MSR_TYPE_W) + /* write-high */ + __clear_bit(msr, msr_bitmap + 0xc00 / f); + + } +} + +static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) +{ + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return; + + /* + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals + * have the write-low and read-high bitmap offsets the wrong way round. + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. + */ + if (msr <= 0x1fff) { + if (type & MSR_TYPE_R) + /* read-low */ + __set_bit(msr, msr_bitmap + 0x000 / f); + + if (type & MSR_TYPE_W) + /* write-low */ + __set_bit(msr, msr_bitmap + 0x800 / f); + + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + if (type & MSR_TYPE_R) + /* read-high */ + __set_bit(msr, msr_bitmap + 0x400 / f); + + if (type & MSR_TYPE_W) + /* write-high */ + __set_bit(msr, msr_bitmap + 0xc00 / f); + } } static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) { if (!longmode_only) - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr); + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, + msr, MSR_TYPE_R | MSR_TYPE_W); + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, + msr, MSR_TYPE_R | MSR_TYPE_W); +} + +static void vmx_enable_intercept_msr_read_x2apic(u32 msr) +{ + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, + msr, MSR_TYPE_R); + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, + msr, MSR_TYPE_R); +} + +static void vmx_disable_intercept_msr_read_x2apic(u32 msr) +{ + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, + msr, MSR_TYPE_R); + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, + msr, MSR_TYPE_R); +} + +static void vmx_disable_intercept_msr_write_x2apic(u32 msr) +{ + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, + msr, MSR_TYPE_W); + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, + msr, MSR_TYPE_W); } /* @@ -3848,6 +3945,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; if (!enable_apicv_reg || !irqchip_in_kernel(vmx->vcpu.kvm)) exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT; + exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; return exec_control; } @@ -6103,6 +6201,40 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) vmcs_write32(TPR_THRESHOLD, irr); } +static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) +{ + u32 exec_control, sec_exec_control; + struct vcpu_vmx *vmx = to_vmx(vcpu); + + /* There is not point to enable virtualize x2apic without enable + * apicv + */ + if (!cpu_has_vmx_virtualize_x2apic_mode() || !enable_apicv_reg) + return; + + if (set) { + exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + /* virtualize x2apic mode relies on tpr shadow */ + if (!(exec_control & CPU_BASED_TPR_SHADOW)) + return; + } + + sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + + if (set) { + sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; + } else { + sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; + if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) + sec_exec_control |= + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + } + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); + + vmx_set_msr_bitmap(vcpu); +} + static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) { u32 exit_intr_info; @@ -7366,6 +7498,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, + .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, @@ -7398,7 +7531,7 @@ static struct kvm_x86_ops vmx_x86_ops = { static int __init vmx_init(void) { - int r, i; + int r, i, msr; rdmsrl_safe(MSR_EFER, &host_efer); @@ -7419,11 +7552,19 @@ static int __init vmx_init(void) if (!vmx_msr_bitmap_legacy) goto out1; + vmx_msr_bitmap_legacy_x2apic = + (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_msr_bitmap_legacy_x2apic) + goto out2; vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_msr_bitmap_longmode) - goto out2; + goto out3; + vmx_msr_bitmap_longmode_x2apic = + (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_msr_bitmap_longmode_x2apic) + goto out4; /* * Allow direct access to the PC debug port (it is often used for I/O @@ -7455,6 +7596,24 @@ static int __init vmx_init(void) vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); + memcpy(vmx_msr_bitmap_legacy_x2apic, + vmx_msr_bitmap_legacy, PAGE_SIZE); + memcpy(vmx_msr_bitmap_longmode_x2apic, + vmx_msr_bitmap_longmode, PAGE_SIZE); + + if (enable_apicv_reg) { + for (msr = 0x800; msr <= 0x8ff; msr++) + vmx_disable_intercept_msr_read_x2apic(msr); + + /* According SDM, in x2apic mode, the whole id reg is used. + * But in KVM, it only use the highest eight bits. Need to + * intercept it */ + vmx_enable_intercept_msr_read_x2apic(0x802); + /* TMCCT */ + vmx_enable_intercept_msr_read_x2apic(0x839); + /* TPR */ + vmx_disable_intercept_msr_write_x2apic(0x808); + } if (enable_ept) { kvm_mmu_set_mask_ptes(0ull, @@ -7468,8 +7627,10 @@ static int __init vmx_init(void) return 0; -out3: +out4: free_page((unsigned long)vmx_msr_bitmap_longmode); +out3: + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); out2: free_page((unsigned long)vmx_msr_bitmap_legacy); out1: @@ -7481,6 +7642,8 @@ out: static void __exit vmx_exit(void) { + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); free_page((unsigned long)vmx_msr_bitmap_legacy); free_page((unsigned long)vmx_msr_bitmap_longmode); free_page((unsigned long)vmx_io_bitmap_b);