diff mbox

[RFC] kvm: x86: vmx: move some vmx setting from vmx_init() to hardware_setup()

Message ID 1414142304-6635-1-git-send-email-tiejun.chen@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tiejun Chen Oct. 24, 2014, 9:18 a.m. UTC
Instead of vmx_init(), actually it would make reasonable sense to do
anything specific to vmx hardware setting in vmx_x86_ops->hardware_setup().

Signed-off-by: Tiejun Chen <tiejun.chen@intel.com>
---
 arch/x86/kvm/vmx.c | 720 +++++++++++++++++++++++++++--------------------------
 1 file changed, 361 insertions(+), 359 deletions(-)

Comments

Paolo Bonzini Oct. 24, 2014, 10:48 a.m. UTC | #1
On 10/24/2014 11:18 AM, Tiejun Chen wrote:
> Instead of vmx_init(), actually it would make reasonable sense to do
> anything specific to vmx hardware setting in vmx_x86_ops->hardware_setup().
> 
> Signed-off-by: Tiejun Chen <tiejun.chen@intel.com>

Please split this patch in multiple parts.  It is quite hard to review
this way.

Paolo

> ---
>  arch/x86/kvm/vmx.c | 720 +++++++++++++++++++++++++++--------------------------
>  1 file changed, 361 insertions(+), 359 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 04fa1b8..9270076 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3106,10 +3106,302 @@ static __init int alloc_kvm_area(void)
>  	return 0;
>  }
>  
> +#define MSR_TYPE_R	1
> +#define MSR_TYPE_W	2
> +static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
> +						u32 msr, int type)
> +{
> +	int f = sizeof(unsigned long);
> +
> +	if (!cpu_has_vmx_msr_bitmap())
> +		return;
> +
> +	/*
> +	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
> +	 * have the write-low and read-high bitmap offsets the wrong way round.
> +	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
> +	 */
> +	if (msr <= 0x1fff) {
> +		if (type & MSR_TYPE_R)
> +			/* read-low */
> +			__clear_bit(msr, msr_bitmap + 0x000 / f);
> +
> +		if (type & MSR_TYPE_W)
> +			/* write-low */
> +			__clear_bit(msr, msr_bitmap + 0x800 / f);
> +
> +	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
> +		msr &= 0x1fff;
> +		if (type & MSR_TYPE_R)
> +			/* read-high */
> +			__clear_bit(msr, msr_bitmap + 0x400 / f);
> +
> +		if (type & MSR_TYPE_W)
> +			/* write-high */
> +			__clear_bit(msr, msr_bitmap + 0xc00 / f);
> +
> +	}
> +}
> +
> +static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
> +						u32 msr, int type)
> +{
> +	int f = sizeof(unsigned long);
> +
> +	if (!cpu_has_vmx_msr_bitmap())
> +		return;
> +
> +	/*
> +	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
> +	 * have the write-low and read-high bitmap offsets the wrong way round.
> +	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
> +	 */
> +	if (msr <= 0x1fff) {
> +		if (type & MSR_TYPE_R)
> +			/* read-low */
> +			__set_bit(msr, msr_bitmap + 0x000 / f);
> +
> +		if (type & MSR_TYPE_W)
> +			/* write-low */
> +			__set_bit(msr, msr_bitmap + 0x800 / f);
> +
> +	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
> +		msr &= 0x1fff;
> +		if (type & MSR_TYPE_R)
> +			/* read-high */
> +			__set_bit(msr, msr_bitmap + 0x400 / f);
> +
> +		if (type & MSR_TYPE_W)
> +			/* write-high */
> +			__set_bit(msr, msr_bitmap + 0xc00 / f);
> +
> +	}
> +}
> +
> +static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
> +{
> +	if (!longmode_only)
> +		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
> +						msr, MSR_TYPE_R | MSR_TYPE_W);
> +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
> +						msr, MSR_TYPE_R | MSR_TYPE_W);
> +}
> +
> +static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
> +{
> +	__vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> +			msr, MSR_TYPE_R);
> +	__vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> +			msr, MSR_TYPE_R);
> +}
> +
> +static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
> +{
> +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> +			msr, MSR_TYPE_R);
> +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> +			msr, MSR_TYPE_R);
> +}
> +
> +static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
> +{
> +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> +			msr, MSR_TYPE_W);
> +	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> +			msr, MSR_TYPE_W);
> +}
> +
> +static int vmx_vm_has_apicv(struct kvm *kvm)
> +{
> +	return enable_apicv && irqchip_in_kernel(kvm);
> +}
> +
> +static void ept_set_mmio_spte_mask(void)
> +{
> +	/*
> +	 * EPT Misconfigurations can be generated if the value of bits 2:0
> +	 * of an EPT paging-structure entry is 110b (write/execute).
> +	 * Also, magic bits (0x3ull << 62) is set to quickly identify mmio
> +	 * spte.
> +	 */
> +	kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
> +}
> +
> +static int __grow_ple_window(int val)
> +{
> +	if (ple_window_grow < 1)
> +		return ple_window;
> +
> +	val = min(val, ple_window_actual_max);
> +
> +	if (ple_window_grow < ple_window)
> +		val *= ple_window_grow;
> +	else
> +		val += ple_window_grow;
> +
> +	return val;
> +}
> +
> +static int __shrink_ple_window(int val, int modifier, int minimum)
> +{
> +	if (modifier < 1)
> +		return ple_window;
> +
> +	if (modifier < ple_window)
> +		val /= modifier;
> +	else
> +		val -= modifier;
> +
> +	return max(val, minimum);
> +}
> +
> +static void grow_ple_window(struct kvm_vcpu *vcpu)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	int old = vmx->ple_window;
> +
> +	vmx->ple_window = __grow_ple_window(old);
> +
> +	if (vmx->ple_window != old)
> +		vmx->ple_window_dirty = true;
> +
> +	trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
> +}
> +
> +static void shrink_ple_window(struct kvm_vcpu *vcpu)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	int old = vmx->ple_window;
> +
> +	vmx->ple_window = __shrink_ple_window(old,
> +	                                      ple_window_shrink, ple_window);
> +
> +	if (vmx->ple_window != old)
> +		vmx->ple_window_dirty = true;
> +
> +	trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
> +}
> +
> +/*
> + * ple_window_actual_max is computed to be one grow_ple_window() below
> + * ple_window_max. (See __grow_ple_window for the reason.)
> + * This prevents overflows, because ple_window_max is int.
> + * ple_window_max effectively rounded down to a multiple of ple_window_grow in
> + * this process.
> + * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
> + */
> +static void update_ple_window_actual_max(void)
> +{
> +	ple_window_actual_max =
> +			__shrink_ple_window(max(ple_window_max, ple_window),
> +			                    ple_window_grow, INT_MIN);
> +}
> +
> +
>  static __init int hardware_setup(void)
>  {
> -	if (setup_vmcs_config(&vmcs_config) < 0)
> -		return -EIO;
> +	int r = -ENOMEM, i, msr;
> +
> +	rdmsrl_safe(MSR_EFER, &host_efer);
> +
> +	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
> +		kvm_define_shared_msr(i, vmx_msr_index[i]);
> +
> +	vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
> +	if (!vmx_io_bitmap_a)
> +		return r;
> +
> +	vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
> +	if (!vmx_io_bitmap_b)
> +		goto out;
> +
> +	vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
> +	if (!vmx_msr_bitmap_legacy)
> +		goto out1;
> +
> +	vmx_msr_bitmap_legacy_x2apic =
> +				(unsigned long *)__get_free_page(GFP_KERNEL);
> +	if (!vmx_msr_bitmap_legacy_x2apic)
> +		goto out2;
> +
> +	vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
> +	if (!vmx_msr_bitmap_longmode)
> +		goto out3;
> +
> +	vmx_msr_bitmap_longmode_x2apic =
> +				(unsigned long *)__get_free_page(GFP_KERNEL);
> +	if (!vmx_msr_bitmap_longmode_x2apic)
> +		goto out4;
> +	vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
> +	if (!vmx_vmread_bitmap)
> +		goto out5;
> +
> +	vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
> +	if (!vmx_vmwrite_bitmap)
> +		goto out6;
> +
> +	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
> +	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
> +
> +	/*
> +	 * Allow direct access to the PC debug port (it is often used for I/O
> +	 * delays, but the vmexits simply slow things down).
> +	 */
> +	memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
> +	clear_bit(0x80, vmx_io_bitmap_a);
> +
> +	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
> +
> +	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
> +	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
> +
> +	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
> +	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
> +	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
> +	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
> +	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
> +	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
> +	vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
> +
> +	memcpy(vmx_msr_bitmap_legacy_x2apic,
> +			vmx_msr_bitmap_legacy, PAGE_SIZE);
> +	memcpy(vmx_msr_bitmap_longmode_x2apic,
> +			vmx_msr_bitmap_longmode, PAGE_SIZE);
> +
> +	if (enable_apicv) {
> +		for (msr = 0x800; msr <= 0x8ff; msr++)
> +			vmx_disable_intercept_msr_read_x2apic(msr);
> +
> +		/* According SDM, in x2apic mode, the whole id reg is used.
> +		 * But in KVM, it only use the highest eight bits. Need to
> +		 * intercept it */
> +		vmx_enable_intercept_msr_read_x2apic(0x802);
> +		/* TMCCT */
> +		vmx_enable_intercept_msr_read_x2apic(0x839);
> +		/* TPR */
> +		vmx_disable_intercept_msr_write_x2apic(0x808);
> +		/* EOI */
> +		vmx_disable_intercept_msr_write_x2apic(0x80b);
> +		/* SELF-IPI */
> +		vmx_disable_intercept_msr_write_x2apic(0x83f);
> +	}
> +
> +	if (enable_ept) {
> +		kvm_mmu_set_mask_ptes(0ull,
> +			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
> +			(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
> +			0ull, VMX_EPT_EXECUTABLE_MASK);
> +		ept_set_mmio_spte_mask();
> +		kvm_enable_tdp();
> +	} else
> +		kvm_disable_tdp();
> +
> +	update_ple_window_actual_max();
> +
> +	if (setup_vmcs_config(&vmcs_config) < 0) {
> +		r = -EIO;
> +		goto out7;
> +    }
>  
>  	if (boot_cpu_has(X86_FEATURE_NX))
>  		kvm_enable_efer_bits(EFER_NX);
> @@ -3169,10 +3461,38 @@ static __init int hardware_setup(void)
>  		nested_vmx_setup_ctls_msrs();
>  
>  	return alloc_kvm_area();
> +
> +out7:
> +	free_page((unsigned long)vmx_vmwrite_bitmap);
> +out6:
> +	free_page((unsigned long)vmx_vmread_bitmap);
> +out5:
> +	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
> +out4:
> +	free_page((unsigned long)vmx_msr_bitmap_longmode);
> +out3:
> +	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
> +out2:
> +	free_page((unsigned long)vmx_msr_bitmap_legacy);
> +out1:
> +	free_page((unsigned long)vmx_io_bitmap_b);
> +out:
> +	free_page((unsigned long)vmx_io_bitmap_a);
> +
> +    return r;
>  }
>  
>  static __exit void hardware_unsetup(void)
>  {
> +	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
> +	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
> +	free_page((unsigned long)vmx_msr_bitmap_legacy);
> +	free_page((unsigned long)vmx_msr_bitmap_longmode);
> +	free_page((unsigned long)vmx_io_bitmap_b);
> +	free_page((unsigned long)vmx_io_bitmap_a);
> +	free_page((unsigned long)vmx_vmwrite_bitmap);
> +	free_page((unsigned long)vmx_vmread_bitmap);
> +
>  	free_kvm_area();
>  }
>  
> @@ -4057,162 +4377,52 @@ static int alloc_apic_access_page(struct kvm *kvm)
>  	kvm->arch.apic_access_page_done = true;
>  out:
>  	mutex_unlock(&kvm->slots_lock);
> -	return r;
> -}
> -
> -static int alloc_identity_pagetable(struct kvm *kvm)
> -{
> -	/* Called with kvm->slots_lock held. */
> -
> -	struct kvm_userspace_memory_region kvm_userspace_mem;
> -	int r = 0;
> -
> -	BUG_ON(kvm->arch.ept_identity_pagetable_done);
> -
> -	kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
> -	kvm_userspace_mem.flags = 0;
> -	kvm_userspace_mem.guest_phys_addr =
> -		kvm->arch.ept_identity_map_addr;
> -	kvm_userspace_mem.memory_size = PAGE_SIZE;
> -	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
> -
> -	return r;
> -}
> -
> -static void allocate_vpid(struct vcpu_vmx *vmx)
> -{
> -	int vpid;
> -
> -	vmx->vpid = 0;
> -	if (!enable_vpid)
> -		return;
> -	spin_lock(&vmx_vpid_lock);
> -	vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
> -	if (vpid < VMX_NR_VPIDS) {
> -		vmx->vpid = vpid;
> -		__set_bit(vpid, vmx_vpid_bitmap);
> -	}
> -	spin_unlock(&vmx_vpid_lock);
> -}
> -
> -static void free_vpid(struct vcpu_vmx *vmx)
> -{
> -	if (!enable_vpid)
> -		return;
> -	spin_lock(&vmx_vpid_lock);
> -	if (vmx->vpid != 0)
> -		__clear_bit(vmx->vpid, vmx_vpid_bitmap);
> -	spin_unlock(&vmx_vpid_lock);
> -}
> -
> -#define MSR_TYPE_R	1
> -#define MSR_TYPE_W	2
> -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
> -						u32 msr, int type)
> -{
> -	int f = sizeof(unsigned long);
> -
> -	if (!cpu_has_vmx_msr_bitmap())
> -		return;
> -
> -	/*
> -	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
> -	 * have the write-low and read-high bitmap offsets the wrong way round.
> -	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
> -	 */
> -	if (msr <= 0x1fff) {
> -		if (type & MSR_TYPE_R)
> -			/* read-low */
> -			__clear_bit(msr, msr_bitmap + 0x000 / f);
> -
> -		if (type & MSR_TYPE_W)
> -			/* write-low */
> -			__clear_bit(msr, msr_bitmap + 0x800 / f);
> -
> -	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
> -		msr &= 0x1fff;
> -		if (type & MSR_TYPE_R)
> -			/* read-high */
> -			__clear_bit(msr, msr_bitmap + 0x400 / f);
> -
> -		if (type & MSR_TYPE_W)
> -			/* write-high */
> -			__clear_bit(msr, msr_bitmap + 0xc00 / f);
> -
> -	}
> -}
> -
> -static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
> -						u32 msr, int type)
> -{
> -	int f = sizeof(unsigned long);
> -
> -	if (!cpu_has_vmx_msr_bitmap())
> -		return;
> -
> -	/*
> -	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
> -	 * have the write-low and read-high bitmap offsets the wrong way round.
> -	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
> -	 */
> -	if (msr <= 0x1fff) {
> -		if (type & MSR_TYPE_R)
> -			/* read-low */
> -			__set_bit(msr, msr_bitmap + 0x000 / f);
> -
> -		if (type & MSR_TYPE_W)
> -			/* write-low */
> -			__set_bit(msr, msr_bitmap + 0x800 / f);
> -
> -	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
> -		msr &= 0x1fff;
> -		if (type & MSR_TYPE_R)
> -			/* read-high */
> -			__set_bit(msr, msr_bitmap + 0x400 / f);
> -
> -		if (type & MSR_TYPE_W)
> -			/* write-high */
> -			__set_bit(msr, msr_bitmap + 0xc00 / f);
> -
> -	}
> -}
> -
> -static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
> -{
> -	if (!longmode_only)
> -		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
> -						msr, MSR_TYPE_R | MSR_TYPE_W);
> -	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
> -						msr, MSR_TYPE_R | MSR_TYPE_W);
> -}
> -
> -static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
> -{
> -	__vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> -			msr, MSR_TYPE_R);
> -	__vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> -			msr, MSR_TYPE_R);
> +	return r;
>  }
>  
> -static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
> +static int alloc_identity_pagetable(struct kvm *kvm)
>  {
> -	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> -			msr, MSR_TYPE_R);
> -	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> -			msr, MSR_TYPE_R);
> +	/* Called with kvm->slots_lock held. */
> +
> +	struct kvm_userspace_memory_region kvm_userspace_mem;
> +	int r = 0;
> +
> +	BUG_ON(kvm->arch.ept_identity_pagetable_done);
> +
> +	kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
> +	kvm_userspace_mem.flags = 0;
> +	kvm_userspace_mem.guest_phys_addr =
> +		kvm->arch.ept_identity_map_addr;
> +	kvm_userspace_mem.memory_size = PAGE_SIZE;
> +	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
> +
> +	return r;
>  }
>  
> -static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
> +static void allocate_vpid(struct vcpu_vmx *vmx)
>  {
> -	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> -			msr, MSR_TYPE_W);
> -	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> -			msr, MSR_TYPE_W);
> +	int vpid;
> +
> +	vmx->vpid = 0;
> +	if (!enable_vpid)
> +		return;
> +	spin_lock(&vmx_vpid_lock);
> +	vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
> +	if (vpid < VMX_NR_VPIDS) {
> +		vmx->vpid = vpid;
> +		__set_bit(vpid, vmx_vpid_bitmap);
> +	}
> +	spin_unlock(&vmx_vpid_lock);
>  }
>  
> -static int vmx_vm_has_apicv(struct kvm *kvm)
> +static void free_vpid(struct vcpu_vmx *vmx)
>  {
> -	return enable_apicv && irqchip_in_kernel(kvm);
> +	if (!enable_vpid)
> +		return;
> +	spin_lock(&vmx_vpid_lock);
> +	if (vmx->vpid != 0)
> +		__clear_bit(vmx->vpid, vmx_vpid_bitmap);
> +	spin_unlock(&vmx_vpid_lock);
>  }
>  
>  /*
> @@ -4376,17 +4586,6 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
>  	return exec_control;
>  }
>  
> -static void ept_set_mmio_spte_mask(void)
> -{
> -	/*
> -	 * EPT Misconfigurations can be generated if the value of bits 2:0
> -	 * of an EPT paging-structure entry is 110b (write/execute).
> -	 * Also, magic bits (0x3ull << 62) is set to quickly identify mmio
> -	 * spte.
> -	 */
> -	kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
> -}
> -
>  /*
>   * Sets up the vmcs for emulated real mode.
>   */
> @@ -5706,76 +5905,6 @@ out:
>  	return ret;
>  }
>  
> -static int __grow_ple_window(int val)
> -{
> -	if (ple_window_grow < 1)
> -		return ple_window;
> -
> -	val = min(val, ple_window_actual_max);
> -
> -	if (ple_window_grow < ple_window)
> -		val *= ple_window_grow;
> -	else
> -		val += ple_window_grow;
> -
> -	return val;
> -}
> -
> -static int __shrink_ple_window(int val, int modifier, int minimum)
> -{
> -	if (modifier < 1)
> -		return ple_window;
> -
> -	if (modifier < ple_window)
> -		val /= modifier;
> -	else
> -		val -= modifier;
> -
> -	return max(val, minimum);
> -}
> -
> -static void grow_ple_window(struct kvm_vcpu *vcpu)
> -{
> -	struct vcpu_vmx *vmx = to_vmx(vcpu);
> -	int old = vmx->ple_window;
> -
> -	vmx->ple_window = __grow_ple_window(old);
> -
> -	if (vmx->ple_window != old)
> -		vmx->ple_window_dirty = true;
> -
> -	trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
> -}
> -
> -static void shrink_ple_window(struct kvm_vcpu *vcpu)
> -{
> -	struct vcpu_vmx *vmx = to_vmx(vcpu);
> -	int old = vmx->ple_window;
> -
> -	vmx->ple_window = __shrink_ple_window(old,
> -	                                      ple_window_shrink, ple_window);
> -
> -	if (vmx->ple_window != old)
> -		vmx->ple_window_dirty = true;
> -
> -	trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
> -}
> -
> -/*
> - * ple_window_actual_max is computed to be one grow_ple_window() below
> - * ple_window_max. (See __grow_ple_window for the reason.)
> - * This prevents overflows, because ple_window_max is int.
> - * ple_window_max effectively rounded down to a multiple of ple_window_grow in
> - * this process.
> - * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
> - */
> -static void update_ple_window_actual_max(void)
> -{
> -	ple_window_actual_max =
> -			__shrink_ple_window(max(ple_window_max, ple_window),
> -			                    ple_window_grow, INT_MIN);
> -}
> -
>  /*
>   * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
>   * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
> @@ -9158,150 +9287,23 @@ static struct kvm_x86_ops vmx_x86_ops = {
>  
>  static int __init vmx_init(void)
>  {
> -	int r, i, msr;
> -
> -	rdmsrl_safe(MSR_EFER, &host_efer);
> -
> -	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
> -		kvm_define_shared_msr(i, vmx_msr_index[i]);
> -
> -	vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
> -	if (!vmx_io_bitmap_a)
> -		return -ENOMEM;
> -
> -	r = -ENOMEM;
> -
> -	vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
> -	if (!vmx_io_bitmap_b)
> -		goto out;
> -
> -	vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
> -	if (!vmx_msr_bitmap_legacy)
> -		goto out1;
> -
> -	vmx_msr_bitmap_legacy_x2apic =
> -				(unsigned long *)__get_free_page(GFP_KERNEL);
> -	if (!vmx_msr_bitmap_legacy_x2apic)
> -		goto out2;
> -
> -	vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
> -	if (!vmx_msr_bitmap_longmode)
> -		goto out3;
> -
> -	vmx_msr_bitmap_longmode_x2apic =
> -				(unsigned long *)__get_free_page(GFP_KERNEL);
> -	if (!vmx_msr_bitmap_longmode_x2apic)
> -		goto out4;
> -	vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
> -	if (!vmx_vmread_bitmap)
> -		goto out5;
> -
> -	vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
> -	if (!vmx_vmwrite_bitmap)
> -		goto out6;
> -
> -	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
> -	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
> -
> -	/*
> -	 * Allow direct access to the PC debug port (it is often used for I/O
> -	 * delays, but the vmexits simply slow things down).
> -	 */
> -	memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
> -	clear_bit(0x80, vmx_io_bitmap_a);
> -
> -	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
> -
> -	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
> -	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
> -
> -	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
> +	int r = -ENOMEM;
>  
>  	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
>  		     __alignof__(struct vcpu_vmx), THIS_MODULE);
>  	if (r)
> -		goto out7;
> +		return r;
>  
>  #ifdef CONFIG_KEXEC
>  	rcu_assign_pointer(crash_vmclear_loaded_vmcss,
>  			   crash_vmclear_local_loaded_vmcss);
>  #endif
>  
> -	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
> -	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
> -	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
> -	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
> -	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
> -	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
> -	vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
> -
> -	memcpy(vmx_msr_bitmap_legacy_x2apic,
> -			vmx_msr_bitmap_legacy, PAGE_SIZE);
> -	memcpy(vmx_msr_bitmap_longmode_x2apic,
> -			vmx_msr_bitmap_longmode, PAGE_SIZE);
> -
> -	if (enable_apicv) {
> -		for (msr = 0x800; msr <= 0x8ff; msr++)
> -			vmx_disable_intercept_msr_read_x2apic(msr);
> -
> -		/* According SDM, in x2apic mode, the whole id reg is used.
> -		 * But in KVM, it only use the highest eight bits. Need to
> -		 * intercept it */
> -		vmx_enable_intercept_msr_read_x2apic(0x802);
> -		/* TMCCT */
> -		vmx_enable_intercept_msr_read_x2apic(0x839);
> -		/* TPR */
> -		vmx_disable_intercept_msr_write_x2apic(0x808);
> -		/* EOI */
> -		vmx_disable_intercept_msr_write_x2apic(0x80b);
> -		/* SELF-IPI */
> -		vmx_disable_intercept_msr_write_x2apic(0x83f);
> -	}
> -
> -	if (enable_ept) {
> -		kvm_mmu_set_mask_ptes(0ull,
> -			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
> -			(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
> -			0ull, VMX_EPT_EXECUTABLE_MASK);
> -		ept_set_mmio_spte_mask();
> -		kvm_enable_tdp();
> -	} else
> -		kvm_disable_tdp();
> -
> -	update_ple_window_actual_max();
> -
>  	return 0;
> -
> -out7:
> -	free_page((unsigned long)vmx_vmwrite_bitmap);
> -out6:
> -	free_page((unsigned long)vmx_vmread_bitmap);
> -out5:
> -	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
> -out4:
> -	free_page((unsigned long)vmx_msr_bitmap_longmode);
> -out3:
> -	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
> -out2:
> -	free_page((unsigned long)vmx_msr_bitmap_legacy);
> -out1:
> -	free_page((unsigned long)vmx_io_bitmap_b);
> -out:
> -	free_page((unsigned long)vmx_io_bitmap_a);
> -	return r;
>  }
>  
>  static void __exit vmx_exit(void)
>  {
> -	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
> -	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
> -	free_page((unsigned long)vmx_msr_bitmap_legacy);
> -	free_page((unsigned long)vmx_msr_bitmap_longmode);
> -	free_page((unsigned long)vmx_io_bitmap_b);
> -	free_page((unsigned long)vmx_io_bitmap_a);
> -	free_page((unsigned long)vmx_vmwrite_bitmap);
> -	free_page((unsigned long)vmx_vmread_bitmap);
> -
>  #ifdef CONFIG_KEXEC
>  	RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
>  	synchronize_rcu();
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 04fa1b8..9270076 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3106,10 +3106,302 @@  static __init int alloc_kvm_area(void)
 	return 0;
 }
 
+#define MSR_TYPE_R	1
+#define MSR_TYPE_W	2
+static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+						u32 msr, int type)
+{
+	int f = sizeof(unsigned long);
+
+	if (!cpu_has_vmx_msr_bitmap())
+		return;
+
+	/*
+	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
+	 * have the write-low and read-high bitmap offsets the wrong way round.
+	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
+	 */
+	if (msr <= 0x1fff) {
+		if (type & MSR_TYPE_R)
+			/* read-low */
+			__clear_bit(msr, msr_bitmap + 0x000 / f);
+
+		if (type & MSR_TYPE_W)
+			/* write-low */
+			__clear_bit(msr, msr_bitmap + 0x800 / f);
+
+	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+		msr &= 0x1fff;
+		if (type & MSR_TYPE_R)
+			/* read-high */
+			__clear_bit(msr, msr_bitmap + 0x400 / f);
+
+		if (type & MSR_TYPE_W)
+			/* write-high */
+			__clear_bit(msr, msr_bitmap + 0xc00 / f);
+
+	}
+}
+
+static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
+						u32 msr, int type)
+{
+	int f = sizeof(unsigned long);
+
+	if (!cpu_has_vmx_msr_bitmap())
+		return;
+
+	/*
+	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
+	 * have the write-low and read-high bitmap offsets the wrong way round.
+	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
+	 */
+	if (msr <= 0x1fff) {
+		if (type & MSR_TYPE_R)
+			/* read-low */
+			__set_bit(msr, msr_bitmap + 0x000 / f);
+
+		if (type & MSR_TYPE_W)
+			/* write-low */
+			__set_bit(msr, msr_bitmap + 0x800 / f);
+
+	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+		msr &= 0x1fff;
+		if (type & MSR_TYPE_R)
+			/* read-high */
+			__set_bit(msr, msr_bitmap + 0x400 / f);
+
+		if (type & MSR_TYPE_W)
+			/* write-high */
+			__set_bit(msr, msr_bitmap + 0xc00 / f);
+
+	}
+}
+
+static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
+{
+	if (!longmode_only)
+		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
+						msr, MSR_TYPE_R | MSR_TYPE_W);
+	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
+						msr, MSR_TYPE_R | MSR_TYPE_W);
+}
+
+static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
+{
+	__vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
+			msr, MSR_TYPE_R);
+	__vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
+			msr, MSR_TYPE_R);
+}
+
+static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
+{
+	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
+			msr, MSR_TYPE_R);
+	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
+			msr, MSR_TYPE_R);
+}
+
+static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
+{
+	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
+			msr, MSR_TYPE_W);
+	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
+			msr, MSR_TYPE_W);
+}
+
+static int vmx_vm_has_apicv(struct kvm *kvm)
+{
+	return enable_apicv && irqchip_in_kernel(kvm);
+}
+
+static void ept_set_mmio_spte_mask(void)
+{
+	/*
+	 * EPT Misconfigurations can be generated if the value of bits 2:0
+	 * of an EPT paging-structure entry is 110b (write/execute).
+	 * Also, magic bits (0x3ull << 62) is set to quickly identify mmio
+	 * spte.
+	 */
+	kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
+}
+
+static int __grow_ple_window(int val)
+{
+	if (ple_window_grow < 1)
+		return ple_window;
+
+	val = min(val, ple_window_actual_max);
+
+	if (ple_window_grow < ple_window)
+		val *= ple_window_grow;
+	else
+		val += ple_window_grow;
+
+	return val;
+}
+
+static int __shrink_ple_window(int val, int modifier, int minimum)
+{
+	if (modifier < 1)
+		return ple_window;
+
+	if (modifier < ple_window)
+		val /= modifier;
+	else
+		val -= modifier;
+
+	return max(val, minimum);
+}
+
+static void grow_ple_window(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	int old = vmx->ple_window;
+
+	vmx->ple_window = __grow_ple_window(old);
+
+	if (vmx->ple_window != old)
+		vmx->ple_window_dirty = true;
+
+	trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
+}
+
+static void shrink_ple_window(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	int old = vmx->ple_window;
+
+	vmx->ple_window = __shrink_ple_window(old,
+	                                      ple_window_shrink, ple_window);
+
+	if (vmx->ple_window != old)
+		vmx->ple_window_dirty = true;
+
+	trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
+}
+
+/*
+ * ple_window_actual_max is computed to be one grow_ple_window() below
+ * ple_window_max. (See __grow_ple_window for the reason.)
+ * This prevents overflows, because ple_window_max is int.
+ * ple_window_max effectively rounded down to a multiple of ple_window_grow in
+ * this process.
+ * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
+ */
+static void update_ple_window_actual_max(void)
+{
+	ple_window_actual_max =
+			__shrink_ple_window(max(ple_window_max, ple_window),
+			                    ple_window_grow, INT_MIN);
+}
+
+
 static __init int hardware_setup(void)
 {
-	if (setup_vmcs_config(&vmcs_config) < 0)
-		return -EIO;
+	int r = -ENOMEM, i, msr;
+
+	rdmsrl_safe(MSR_EFER, &host_efer);
+
+	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
+		kvm_define_shared_msr(i, vmx_msr_index[i]);
+
+	vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_io_bitmap_a)
+		return r;
+
+	vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_io_bitmap_b)
+		goto out;
+
+	vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_legacy)
+		goto out1;
+
+	vmx_msr_bitmap_legacy_x2apic =
+				(unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_legacy_x2apic)
+		goto out2;
+
+	vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_longmode)
+		goto out3;
+
+	vmx_msr_bitmap_longmode_x2apic =
+				(unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_longmode_x2apic)
+		goto out4;
+	vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_vmread_bitmap)
+		goto out5;
+
+	vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_vmwrite_bitmap)
+		goto out6;
+
+	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
+	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
+
+	/*
+	 * Allow direct access to the PC debug port (it is often used for I/O
+	 * delays, but the vmexits simply slow things down).
+	 */
+	memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
+	clear_bit(0x80, vmx_io_bitmap_a);
+
+	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
+
+	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
+	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
+
+	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
+	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
+	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
+
+	memcpy(vmx_msr_bitmap_legacy_x2apic,
+			vmx_msr_bitmap_legacy, PAGE_SIZE);
+	memcpy(vmx_msr_bitmap_longmode_x2apic,
+			vmx_msr_bitmap_longmode, PAGE_SIZE);
+
+	if (enable_apicv) {
+		for (msr = 0x800; msr <= 0x8ff; msr++)
+			vmx_disable_intercept_msr_read_x2apic(msr);
+
+		/* According SDM, in x2apic mode, the whole id reg is used.
+		 * But in KVM, it only use the highest eight bits. Need to
+		 * intercept it */
+		vmx_enable_intercept_msr_read_x2apic(0x802);
+		/* TMCCT */
+		vmx_enable_intercept_msr_read_x2apic(0x839);
+		/* TPR */
+		vmx_disable_intercept_msr_write_x2apic(0x808);
+		/* EOI */
+		vmx_disable_intercept_msr_write_x2apic(0x80b);
+		/* SELF-IPI */
+		vmx_disable_intercept_msr_write_x2apic(0x83f);
+	}
+
+	if (enable_ept) {
+		kvm_mmu_set_mask_ptes(0ull,
+			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
+			(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
+			0ull, VMX_EPT_EXECUTABLE_MASK);
+		ept_set_mmio_spte_mask();
+		kvm_enable_tdp();
+	} else
+		kvm_disable_tdp();
+
+	update_ple_window_actual_max();
+
+	if (setup_vmcs_config(&vmcs_config) < 0) {
+		r = -EIO;
+		goto out7;
+    }
 
 	if (boot_cpu_has(X86_FEATURE_NX))
 		kvm_enable_efer_bits(EFER_NX);
@@ -3169,10 +3461,38 @@  static __init int hardware_setup(void)
 		nested_vmx_setup_ctls_msrs();
 
 	return alloc_kvm_area();
+
+out7:
+	free_page((unsigned long)vmx_vmwrite_bitmap);
+out6:
+	free_page((unsigned long)vmx_vmread_bitmap);
+out5:
+	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+out4:
+	free_page((unsigned long)vmx_msr_bitmap_longmode);
+out3:
+	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+out2:
+	free_page((unsigned long)vmx_msr_bitmap_legacy);
+out1:
+	free_page((unsigned long)vmx_io_bitmap_b);
+out:
+	free_page((unsigned long)vmx_io_bitmap_a);
+
+    return r;
 }
 
 static __exit void hardware_unsetup(void)
 {
+	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+	free_page((unsigned long)vmx_msr_bitmap_legacy);
+	free_page((unsigned long)vmx_msr_bitmap_longmode);
+	free_page((unsigned long)vmx_io_bitmap_b);
+	free_page((unsigned long)vmx_io_bitmap_a);
+	free_page((unsigned long)vmx_vmwrite_bitmap);
+	free_page((unsigned long)vmx_vmread_bitmap);
+
 	free_kvm_area();
 }
 
@@ -4057,162 +4377,52 @@  static int alloc_apic_access_page(struct kvm *kvm)
 	kvm->arch.apic_access_page_done = true;
 out:
 	mutex_unlock(&kvm->slots_lock);
-	return r;
-}
-
-static int alloc_identity_pagetable(struct kvm *kvm)
-{
-	/* Called with kvm->slots_lock held. */
-
-	struct kvm_userspace_memory_region kvm_userspace_mem;
-	int r = 0;
-
-	BUG_ON(kvm->arch.ept_identity_pagetable_done);
-
-	kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
-	kvm_userspace_mem.flags = 0;
-	kvm_userspace_mem.guest_phys_addr =
-		kvm->arch.ept_identity_map_addr;
-	kvm_userspace_mem.memory_size = PAGE_SIZE;
-	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
-
-	return r;
-}
-
-static void allocate_vpid(struct vcpu_vmx *vmx)
-{
-	int vpid;
-
-	vmx->vpid = 0;
-	if (!enable_vpid)
-		return;
-	spin_lock(&vmx_vpid_lock);
-	vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
-	if (vpid < VMX_NR_VPIDS) {
-		vmx->vpid = vpid;
-		__set_bit(vpid, vmx_vpid_bitmap);
-	}
-	spin_unlock(&vmx_vpid_lock);
-}
-
-static void free_vpid(struct vcpu_vmx *vmx)
-{
-	if (!enable_vpid)
-		return;
-	spin_lock(&vmx_vpid_lock);
-	if (vmx->vpid != 0)
-		__clear_bit(vmx->vpid, vmx_vpid_bitmap);
-	spin_unlock(&vmx_vpid_lock);
-}
-
-#define MSR_TYPE_R	1
-#define MSR_TYPE_W	2
-static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
-						u32 msr, int type)
-{
-	int f = sizeof(unsigned long);
-
-	if (!cpu_has_vmx_msr_bitmap())
-		return;
-
-	/*
-	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
-	 * have the write-low and read-high bitmap offsets the wrong way round.
-	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
-	 */
-	if (msr <= 0x1fff) {
-		if (type & MSR_TYPE_R)
-			/* read-low */
-			__clear_bit(msr, msr_bitmap + 0x000 / f);
-
-		if (type & MSR_TYPE_W)
-			/* write-low */
-			__clear_bit(msr, msr_bitmap + 0x800 / f);
-
-	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
-		msr &= 0x1fff;
-		if (type & MSR_TYPE_R)
-			/* read-high */
-			__clear_bit(msr, msr_bitmap + 0x400 / f);
-
-		if (type & MSR_TYPE_W)
-			/* write-high */
-			__clear_bit(msr, msr_bitmap + 0xc00 / f);
-
-	}
-}
-
-static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
-						u32 msr, int type)
-{
-	int f = sizeof(unsigned long);
-
-	if (!cpu_has_vmx_msr_bitmap())
-		return;
-
-	/*
-	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
-	 * have the write-low and read-high bitmap offsets the wrong way round.
-	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
-	 */
-	if (msr <= 0x1fff) {
-		if (type & MSR_TYPE_R)
-			/* read-low */
-			__set_bit(msr, msr_bitmap + 0x000 / f);
-
-		if (type & MSR_TYPE_W)
-			/* write-low */
-			__set_bit(msr, msr_bitmap + 0x800 / f);
-
-	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
-		msr &= 0x1fff;
-		if (type & MSR_TYPE_R)
-			/* read-high */
-			__set_bit(msr, msr_bitmap + 0x400 / f);
-
-		if (type & MSR_TYPE_W)
-			/* write-high */
-			__set_bit(msr, msr_bitmap + 0xc00 / f);
-
-	}
-}
-
-static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
-{
-	if (!longmode_only)
-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
-						msr, MSR_TYPE_R | MSR_TYPE_W);
-	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
-						msr, MSR_TYPE_R | MSR_TYPE_W);
-}
-
-static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
-{
-	__vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
-			msr, MSR_TYPE_R);
-	__vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
-			msr, MSR_TYPE_R);
+	return r;
 }
 
-static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
+static int alloc_identity_pagetable(struct kvm *kvm)
 {
-	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
-			msr, MSR_TYPE_R);
-	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
-			msr, MSR_TYPE_R);
+	/* Called with kvm->slots_lock held. */
+
+	struct kvm_userspace_memory_region kvm_userspace_mem;
+	int r = 0;
+
+	BUG_ON(kvm->arch.ept_identity_pagetable_done);
+
+	kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
+	kvm_userspace_mem.flags = 0;
+	kvm_userspace_mem.guest_phys_addr =
+		kvm->arch.ept_identity_map_addr;
+	kvm_userspace_mem.memory_size = PAGE_SIZE;
+	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
+
+	return r;
 }
 
-static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
+static void allocate_vpid(struct vcpu_vmx *vmx)
 {
-	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
-			msr, MSR_TYPE_W);
-	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
-			msr, MSR_TYPE_W);
+	int vpid;
+
+	vmx->vpid = 0;
+	if (!enable_vpid)
+		return;
+	spin_lock(&vmx_vpid_lock);
+	vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
+	if (vpid < VMX_NR_VPIDS) {
+		vmx->vpid = vpid;
+		__set_bit(vpid, vmx_vpid_bitmap);
+	}
+	spin_unlock(&vmx_vpid_lock);
 }
 
-static int vmx_vm_has_apicv(struct kvm *kvm)
+static void free_vpid(struct vcpu_vmx *vmx)
 {
-	return enable_apicv && irqchip_in_kernel(kvm);
+	if (!enable_vpid)
+		return;
+	spin_lock(&vmx_vpid_lock);
+	if (vmx->vpid != 0)
+		__clear_bit(vmx->vpid, vmx_vpid_bitmap);
+	spin_unlock(&vmx_vpid_lock);
 }
 
 /*
@@ -4376,17 +4586,6 @@  static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 	return exec_control;
 }
 
-static void ept_set_mmio_spte_mask(void)
-{
-	/*
-	 * EPT Misconfigurations can be generated if the value of bits 2:0
-	 * of an EPT paging-structure entry is 110b (write/execute).
-	 * Also, magic bits (0x3ull << 62) is set to quickly identify mmio
-	 * spte.
-	 */
-	kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
-}
-
 /*
  * Sets up the vmcs for emulated real mode.
  */
@@ -5706,76 +5905,6 @@  out:
 	return ret;
 }
 
-static int __grow_ple_window(int val)
-{
-	if (ple_window_grow < 1)
-		return ple_window;
-
-	val = min(val, ple_window_actual_max);
-
-	if (ple_window_grow < ple_window)
-		val *= ple_window_grow;
-	else
-		val += ple_window_grow;
-
-	return val;
-}
-
-static int __shrink_ple_window(int val, int modifier, int minimum)
-{
-	if (modifier < 1)
-		return ple_window;
-
-	if (modifier < ple_window)
-		val /= modifier;
-	else
-		val -= modifier;
-
-	return max(val, minimum);
-}
-
-static void grow_ple_window(struct kvm_vcpu *vcpu)
-{
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	int old = vmx->ple_window;
-
-	vmx->ple_window = __grow_ple_window(old);
-
-	if (vmx->ple_window != old)
-		vmx->ple_window_dirty = true;
-
-	trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
-}
-
-static void shrink_ple_window(struct kvm_vcpu *vcpu)
-{
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	int old = vmx->ple_window;
-
-	vmx->ple_window = __shrink_ple_window(old,
-	                                      ple_window_shrink, ple_window);
-
-	if (vmx->ple_window != old)
-		vmx->ple_window_dirty = true;
-
-	trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
-}
-
-/*
- * ple_window_actual_max is computed to be one grow_ple_window() below
- * ple_window_max. (See __grow_ple_window for the reason.)
- * This prevents overflows, because ple_window_max is int.
- * ple_window_max effectively rounded down to a multiple of ple_window_grow in
- * this process.
- * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
- */
-static void update_ple_window_actual_max(void)
-{
-	ple_window_actual_max =
-			__shrink_ple_window(max(ple_window_max, ple_window),
-			                    ple_window_grow, INT_MIN);
-}
-
 /*
  * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
  * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
@@ -9158,150 +9287,23 @@  static struct kvm_x86_ops vmx_x86_ops = {
 
 static int __init vmx_init(void)
 {
-	int r, i, msr;
-
-	rdmsrl_safe(MSR_EFER, &host_efer);
-
-	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
-		kvm_define_shared_msr(i, vmx_msr_index[i]);
-
-	vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_io_bitmap_a)
-		return -ENOMEM;
-
-	r = -ENOMEM;
-
-	vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_io_bitmap_b)
-		goto out;
-
-	vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap_legacy)
-		goto out1;
-
-	vmx_msr_bitmap_legacy_x2apic =
-				(unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap_legacy_x2apic)
-		goto out2;
-
-	vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap_longmode)
-		goto out3;
-
-	vmx_msr_bitmap_longmode_x2apic =
-				(unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap_longmode_x2apic)
-		goto out4;
-	vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_vmread_bitmap)
-		goto out5;
-
-	vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_vmwrite_bitmap)
-		goto out6;
-
-	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
-	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
-
-	/*
-	 * Allow direct access to the PC debug port (it is often used for I/O
-	 * delays, but the vmexits simply slow things down).
-	 */
-	memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
-	clear_bit(0x80, vmx_io_bitmap_a);
-
-	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
-
-	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
-	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
-
-	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
+	int r = -ENOMEM;
 
 	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
 		     __alignof__(struct vcpu_vmx), THIS_MODULE);
 	if (r)
-		goto out7;
+		return r;
 
 #ifdef CONFIG_KEXEC
 	rcu_assign_pointer(crash_vmclear_loaded_vmcss,
 			   crash_vmclear_local_loaded_vmcss);
 #endif
 
-	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
-	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
-	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
-	vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
-
-	memcpy(vmx_msr_bitmap_legacy_x2apic,
-			vmx_msr_bitmap_legacy, PAGE_SIZE);
-	memcpy(vmx_msr_bitmap_longmode_x2apic,
-			vmx_msr_bitmap_longmode, PAGE_SIZE);
-
-	if (enable_apicv) {
-		for (msr = 0x800; msr <= 0x8ff; msr++)
-			vmx_disable_intercept_msr_read_x2apic(msr);
-
-		/* According SDM, in x2apic mode, the whole id reg is used.
-		 * But in KVM, it only use the highest eight bits. Need to
-		 * intercept it */
-		vmx_enable_intercept_msr_read_x2apic(0x802);
-		/* TMCCT */
-		vmx_enable_intercept_msr_read_x2apic(0x839);
-		/* TPR */
-		vmx_disable_intercept_msr_write_x2apic(0x808);
-		/* EOI */
-		vmx_disable_intercept_msr_write_x2apic(0x80b);
-		/* SELF-IPI */
-		vmx_disable_intercept_msr_write_x2apic(0x83f);
-	}
-
-	if (enable_ept) {
-		kvm_mmu_set_mask_ptes(0ull,
-			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
-			(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
-			0ull, VMX_EPT_EXECUTABLE_MASK);
-		ept_set_mmio_spte_mask();
-		kvm_enable_tdp();
-	} else
-		kvm_disable_tdp();
-
-	update_ple_window_actual_max();
-
 	return 0;
-
-out7:
-	free_page((unsigned long)vmx_vmwrite_bitmap);
-out6:
-	free_page((unsigned long)vmx_vmread_bitmap);
-out5:
-	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-out4:
-	free_page((unsigned long)vmx_msr_bitmap_longmode);
-out3:
-	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-out2:
-	free_page((unsigned long)vmx_msr_bitmap_legacy);
-out1:
-	free_page((unsigned long)vmx_io_bitmap_b);
-out:
-	free_page((unsigned long)vmx_io_bitmap_a);
-	return r;
 }
 
 static void __exit vmx_exit(void)
 {
-	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-	free_page((unsigned long)vmx_msr_bitmap_legacy);
-	free_page((unsigned long)vmx_msr_bitmap_longmode);
-	free_page((unsigned long)vmx_io_bitmap_b);
-	free_page((unsigned long)vmx_io_bitmap_a);
-	free_page((unsigned long)vmx_vmwrite_bitmap);
-	free_page((unsigned long)vmx_vmread_bitmap);
-
 #ifdef CONFIG_KEXEC
 	RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
 	synchronize_rcu();