diff mbox series

[v19,022/130] KVM: x86/vmx: Refactor KVM VMX module init/exit functions

Message ID 11d5ae6a1102a50b0e773fc7efd949bb0bd2b776.1708933498.git.isaku.yamahata@intel.com (mailing list archive)
State New, archived
Headers show
Series [v19,001/130] x86/virt/tdx: Rename _offset to _member for TD_SYSINFO_MAP() macro | expand

Commit Message

Isaku Yamahata Feb. 26, 2024, 8:25 a.m. UTC
From: Isaku Yamahata <isaku.yamahata@intel.com>

Currently, KVM VMX module initialization/exit functions are a single
function each.  Refactor KVM VMX module initialization functions into KVM
common part and VMX part so that TDX specific part can be added cleanly.
Opportunistically refactor module exit function as well.

The current module initialization flow is,
0.) Check if VMX is supported,
1.) hyper-v specific initialization,
2.) system-wide x86 specific and vendor specific initialization,
3.) Final VMX specific system-wide initialization,
4.) calculate the sizes of VMX kvm structure and VMX vcpu structure,
5.) report those sizes to the KVM common layer and KVM common
    initialization

Refactor the KVM VMX module initialization function into functions with a
wrapper function to separate VMX logic in vmx.c from a file, main.c, common
among VMX and TDX.  Introduce a wrapper function for vmx_init().

The KVM architecture common layer allocates struct kvm with reported size
for architecture-specific code.  The KVM VMX module defines its structure
as struct vmx_kvm { struct kvm; VMX specific members;} and uses it as
struct vmx kvm.  Similar for vcpu structure. TDX KVM patches will define
TDX specific kvm and vcpu structures.

The current module exit function is also a single function, a combination
of VMX specific logic and common KVM logic.  Refactor it into VMX specific
logic and KVM common logic.  This is just refactoring to keep the VMX
specific logic in vmx.c from main.c.

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
v19:
- Eliminate the unnecessary churn with vmx_hardware_setup() by Xiaoyao

v18:
- Move loaded_vmcss_on_cpu initialization to vt_init() before
  kvm_x86_vendor_init().
- added __init to an empty stub fucntion, hv_init_evmcs().

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
 arch/x86/kvm/vmx/main.c    | 54 ++++++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/vmx.c     | 60 +++++---------------------------------
 arch/x86/kvm/vmx/x86_ops.h | 14 +++++++++
 3 files changed, 75 insertions(+), 53 deletions(-)

Comments

Yin Fengwei March 11, 2024, 5:32 a.m. UTC | #1
On 2/26/2024 4:25 PM, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> Currently, KVM VMX module initialization/exit functions are a single
> function each.  Refactor KVM VMX module initialization functions into KVM
> common part and VMX part so that TDX specific part can be added cleanly.
> Opportunistically refactor module exit function as well.
> 
> The current module initialization flow is,
> 0.) Check if VMX is supported,
> 1.) hyper-v specific initialization,
> 2.) system-wide x86 specific and vendor specific initialization,
> 3.) Final VMX specific system-wide initialization,
> 4.) calculate the sizes of VMX kvm structure and VMX vcpu structure,
> 5.) report those sizes to the KVM common layer and KVM common
>      initialization
> 
> Refactor the KVM VMX module initialization function into functions with a
> wrapper function to separate VMX logic in vmx.c from a file, main.c, common
> among VMX and TDX.  Introduce a wrapper function for vmx_init().
> 
> The KVM architecture common layer allocates struct kvm with reported size
> for architecture-specific code.  The KVM VMX module defines its structure
> as struct vmx_kvm { struct kvm; VMX specific members;} and uses it as
> struct vmx kvm.  Similar for vcpu structure. TDX KVM patches will define
> TDX specific kvm and vcpu structures.
> 
> The current module exit function is also a single function, a combination
> of VMX specific logic and common KVM logic.  Refactor it into VMX specific
> logic and KVM common logic.  This is just refactoring to keep the VMX
> specific logic in vmx.c from main.c.
> 
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
> v19:
> - Eliminate the unnecessary churn with vmx_hardware_setup() by Xiaoyao
> 
> v18:
> - Move loaded_vmcss_on_cpu initialization to vt_init() before
>    kvm_x86_vendor_init().
> - added __init to an empty stub fucntion, hv_init_evmcs().
> 
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Reviewed-by: Yin Fengwei <fengwei.yin@intel.com>

With one minor comment. See below.

> ---
>   arch/x86/kvm/vmx/main.c    | 54 ++++++++++++++++++++++++++++++++++
>   arch/x86/kvm/vmx/vmx.c     | 60 +++++---------------------------------
>   arch/x86/kvm/vmx/x86_ops.h | 14 +++++++++
>   3 files changed, 75 insertions(+), 53 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> index eeb7a43b271d..18cecf12c7c8 100644
> --- a/arch/x86/kvm/vmx/main.c
> +++ b/arch/x86/kvm/vmx/main.c
> @@ -167,3 +167,57 @@ struct kvm_x86_init_ops vt_init_ops __initdata = {
>   	.runtime_ops = &vt_x86_ops,
>   	.pmu_ops = &intel_pmu_ops,
>   };
> +
> +static int __init vt_init(void)
> +{
> +	unsigned int vcpu_size, vcpu_align;
> +	int cpu, r;
> +
> +	if (!kvm_is_vmx_supported())
> +		return -EOPNOTSUPP;
> +
> +	/*
> +	 * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing
> +	 * to unwind if a later step fails.
> +	 */
> +	hv_init_evmcs();
> +
> +	/* vmx_hardware_disable() accesses loaded_vmcss_on_cpu. */
> +	for_each_possible_cpu(cpu)
> +		INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
> +
> +	r = kvm_x86_vendor_init(&vt_init_ops);
> +	if (r)
> +		return r;
> +
> +	r = vmx_init();
> +	if (r)
> +		goto err_vmx_init;
> +
> +	/*
> +	 * Common KVM initialization _must_ come last, after this, /dev/kvm is
> +	 * exposed to userspace!
> +	 */
> +	vcpu_size = sizeof(struct vcpu_vmx);
> +	vcpu_align = __alignof__(struct vcpu_vmx);
> +	r = kvm_init(vcpu_size, vcpu_align, THIS_MODULE);
> +	if (r)
> +		goto err_kvm_init;
> +
> +	return 0;
> +
> +err_kvm_init:
> +	vmx_exit();
> +err_vmx_init:
> +	kvm_x86_vendor_exit();
> +	return r;
> +}
> +module_init(vt_init);
> +
> +static void vt_exit(void)
> +{
> +	kvm_exit();
> +	kvm_x86_vendor_exit();
> +	vmx_exit();
> +}
> +module_exit(vt_exit);
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 8af0668e4dca..2fb1cd2e28a2 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -477,7 +477,7 @@ DEFINE_PER_CPU(struct vmcs *, current_vmcs);
>    * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
>    * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
>    */
> -static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
> +DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
>   
>   static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
>   static DEFINE_SPINLOCK(vmx_vpid_lock);
> @@ -537,7 +537,7 @@ static int hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu)
>   	return 0;
>   }
>   
> -static __init void hv_init_evmcs(void)
> +__init void hv_init_evmcs(void)
>   {
>   	int cpu;
>   
> @@ -573,7 +573,7 @@ static __init void hv_init_evmcs(void)
>   	}
>   }
>   
> -static void hv_reset_evmcs(void)
> +void hv_reset_evmcs(void)
>   {
>   	struct hv_vp_assist_page *vp_ap;
>   
> @@ -597,10 +597,6 @@ static void hv_reset_evmcs(void)
>   	vp_ap->current_nested_vmcs = 0;
>   	vp_ap->enlighten_vmentry = 0;
>   }
> -
> -#else /* IS_ENABLED(CONFIG_HYPERV) */
> -static void hv_init_evmcs(void) {}
> -static void hv_reset_evmcs(void) {}
>   #endif /* IS_ENABLED(CONFIG_HYPERV) */
>   
>   /*
> @@ -2743,7 +2739,7 @@ static bool __kvm_is_vmx_supported(void)
>   	return true;
>   }
>   
> -static bool kvm_is_vmx_supported(void)
> +bool kvm_is_vmx_supported(void)
>   {
>   	bool supported;
>   
> @@ -8508,7 +8504,7 @@ static void vmx_cleanup_l1d_flush(void)
>   	l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
>   }
>   
> -static void __vmx_exit(void)
> +void vmx_exit(void)
>   {
>   	allow_smaller_maxphyaddr = false;
>   
> @@ -8517,36 +8513,10 @@ static void __vmx_exit(void)
>   	vmx_cleanup_l1d_flush();
>   }
>   
> -static void vmx_exit(void)
> -{
> -	kvm_exit();
> -	kvm_x86_vendor_exit();
> -
> -	__vmx_exit();
> -}
> -module_exit(vmx_exit);
> -
> -static int __init vmx_init(void)
> +int __init vmx_init(void)
>   {
>   	int r, cpu;
>   
> -	if (!kvm_is_vmx_supported())
> -		return -EOPNOTSUPP;
> -
> -	/*
> -	 * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing
> -	 * to unwind if a later step fails.
> -	 */
> -	hv_init_evmcs();
> -
> -	/* vmx_hardware_disable() accesses loaded_vmcss_on_cpu. */
> -	for_each_possible_cpu(cpu)
> -		INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
> -
> -	r = kvm_x86_vendor_init(&vt_init_ops);
> -	if (r)
> -		return r;
> -
>   	/*
>   	 * Must be called after common x86 init so enable_ept is properly set
>   	 * up. Hand the parameter mitigation value in which was stored in
I am wondering whether the first sentence of above comment should be
moved to vt_init()? So vt_init() has whole information about the init
sequence.


Regards
Yin, Fengwei

> @@ -8556,7 +8526,7 @@ static int __init vmx_init(void)
>   	 */
>   	r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
>   	if (r)
> -		goto err_l1d_flush;
> +		return r;
>   
>   	for_each_possible_cpu(cpu)
>   		pi_init_cpu(cpu);
> @@ -8573,21 +8543,5 @@ static int __init vmx_init(void)
>   	if (!enable_ept)
>   		allow_smaller_maxphyaddr = true;
>   
> -	/*
> -	 * Common KVM initialization _must_ come last, after this, /dev/kvm is
> -	 * exposed to userspace!
> -	 */
> -	r = kvm_init(sizeof(struct vcpu_vmx), __alignof__(struct vcpu_vmx),
> -		     THIS_MODULE);
> -	if (r)
> -		goto err_kvm_init;
> -
>   	return 0;
> -
> -err_kvm_init:
> -	__vmx_exit();
> -err_l1d_flush:
> -	kvm_x86_vendor_exit();
> -	return r;
>   }
> -module_init(vmx_init);
> diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
> index 2f8b6c43fe0f..b936388853ab 100644
> --- a/arch/x86/kvm/vmx/x86_ops.h
> +++ b/arch/x86/kvm/vmx/x86_ops.h
> @@ -6,6 +6,20 @@
>   
>   #include "x86.h"
>   
> +#if IS_ENABLED(CONFIG_HYPERV)
> +__init void hv_init_evmcs(void);
> +void hv_reset_evmcs(void);
> +#else /* IS_ENABLED(CONFIG_HYPERV) */
> +static inline __init void hv_init_evmcs(void) {}
> +static inline void hv_reset_evmcs(void) {}
> +#endif /* IS_ENABLED(CONFIG_HYPERV) */
> +
> +DECLARE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
> +
> +bool kvm_is_vmx_supported(void);
> +int __init vmx_init(void);
> +void vmx_exit(void);
> +
>   extern struct kvm_x86_ops vt_x86_ops __initdata;
>   extern struct kvm_x86_init_ops vt_init_ops __initdata;
>
Isaku Yamahata March 12, 2024, 2:15 a.m. UTC | #2
On Mon, Mar 11, 2024 at 01:32:08PM +0800,
"Yin, Fengwei" <fengwei.yin@intel.com> wrote:

> 
> 
> On 2/26/2024 4:25 PM, isaku.yamahata@intel.com wrote:
> > From: Isaku Yamahata <isaku.yamahata@intel.com>
> > 
> > Currently, KVM VMX module initialization/exit functions are a single
> > function each.  Refactor KVM VMX module initialization functions into KVM
> > common part and VMX part so that TDX specific part can be added cleanly.
> > Opportunistically refactor module exit function as well.
> > 
> > The current module initialization flow is,
> > 0.) Check if VMX is supported,
> > 1.) hyper-v specific initialization,
> > 2.) system-wide x86 specific and vendor specific initialization,
> > 3.) Final VMX specific system-wide initialization,
> > 4.) calculate the sizes of VMX kvm structure and VMX vcpu structure,
> > 5.) report those sizes to the KVM common layer and KVM common
> >      initialization
> > 
> > Refactor the KVM VMX module initialization function into functions with a
> > wrapper function to separate VMX logic in vmx.c from a file, main.c, common
> > among VMX and TDX.  Introduce a wrapper function for vmx_init().
> > 
> > The KVM architecture common layer allocates struct kvm with reported size
> > for architecture-specific code.  The KVM VMX module defines its structure
> > as struct vmx_kvm { struct kvm; VMX specific members;} and uses it as
> > struct vmx kvm.  Similar for vcpu structure. TDX KVM patches will define
> > TDX specific kvm and vcpu structures.
> > 
> > The current module exit function is also a single function, a combination
> > of VMX specific logic and common KVM logic.  Refactor it into VMX specific
> > logic and KVM common logic.  This is just refactoring to keep the VMX
> > specific logic in vmx.c from main.c.
> > 
> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > ---
> > v19:
> > - Eliminate the unnecessary churn with vmx_hardware_setup() by Xiaoyao
> > 
> > v18:
> > - Move loaded_vmcss_on_cpu initialization to vt_init() before
> >    kvm_x86_vendor_init().
> > - added __init to an empty stub fucntion, hv_init_evmcs().
> > 
> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> Reviewed-by: Yin Fengwei <fengwei.yin@intel.com>
> 
> With one minor comment. See below.
> 
> > ---
> >   arch/x86/kvm/vmx/main.c    | 54 ++++++++++++++++++++++++++++++++++
> >   arch/x86/kvm/vmx/vmx.c     | 60 +++++---------------------------------
> >   arch/x86/kvm/vmx/x86_ops.h | 14 +++++++++
> >   3 files changed, 75 insertions(+), 53 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> > index eeb7a43b271d..18cecf12c7c8 100644
> > --- a/arch/x86/kvm/vmx/main.c
> > +++ b/arch/x86/kvm/vmx/main.c
> > @@ -167,3 +167,57 @@ struct kvm_x86_init_ops vt_init_ops __initdata = {
> >   	.runtime_ops = &vt_x86_ops,
> >   	.pmu_ops = &intel_pmu_ops,
> >   };
> > +
> > +static int __init vt_init(void)
> > +{
> > +	unsigned int vcpu_size, vcpu_align;
> > +	int cpu, r;
> > +
> > +	if (!kvm_is_vmx_supported())
> > +		return -EOPNOTSUPP;
> > +
> > +	/*
> > +	 * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing
> > +	 * to unwind if a later step fails.
> > +	 */
> > +	hv_init_evmcs();
> > +
> > +	/* vmx_hardware_disable() accesses loaded_vmcss_on_cpu. */
> > +	for_each_possible_cpu(cpu)
> > +		INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
> > +
> > +	r = kvm_x86_vendor_init(&vt_init_ops);
> > +	if (r)
> > +		return r;
> > +
> > +	r = vmx_init();
> > +	if (r)
> > +		goto err_vmx_init;
> > +
> > +	/*
> > +	 * Common KVM initialization _must_ come last, after this, /dev/kvm is
> > +	 * exposed to userspace!
> > +	 */
> > +	vcpu_size = sizeof(struct vcpu_vmx);
> > +	vcpu_align = __alignof__(struct vcpu_vmx);
> > +	r = kvm_init(vcpu_size, vcpu_align, THIS_MODULE);
> > +	if (r)
> > +		goto err_kvm_init;
> > +
> > +	return 0;
> > +
> > +err_kvm_init:
> > +	vmx_exit();
> > +err_vmx_init:
> > +	kvm_x86_vendor_exit();
> > +	return r;
> > +}
> > +module_init(vt_init);
> > +
> > +static void vt_exit(void)
> > +{
> > +	kvm_exit();
> > +	kvm_x86_vendor_exit();
> > +	vmx_exit();
> > +}
> > +module_exit(vt_exit);
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index 8af0668e4dca..2fb1cd2e28a2 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -477,7 +477,7 @@ DEFINE_PER_CPU(struct vmcs *, current_vmcs);
> >    * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
> >    * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
> >    */
> > -static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
> > +DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
> >   static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
> >   static DEFINE_SPINLOCK(vmx_vpid_lock);
> > @@ -537,7 +537,7 @@ static int hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu)
> >   	return 0;
> >   }
> > -static __init void hv_init_evmcs(void)
> > +__init void hv_init_evmcs(void)
> >   {
> >   	int cpu;
> > @@ -573,7 +573,7 @@ static __init void hv_init_evmcs(void)
> >   	}
> >   }
> > -static void hv_reset_evmcs(void)
> > +void hv_reset_evmcs(void)
> >   {
> >   	struct hv_vp_assist_page *vp_ap;
> > @@ -597,10 +597,6 @@ static void hv_reset_evmcs(void)
> >   	vp_ap->current_nested_vmcs = 0;
> >   	vp_ap->enlighten_vmentry = 0;
> >   }
> > -
> > -#else /* IS_ENABLED(CONFIG_HYPERV) */
> > -static void hv_init_evmcs(void) {}
> > -static void hv_reset_evmcs(void) {}
> >   #endif /* IS_ENABLED(CONFIG_HYPERV) */
> >   /*
> > @@ -2743,7 +2739,7 @@ static bool __kvm_is_vmx_supported(void)
> >   	return true;
> >   }
> > -static bool kvm_is_vmx_supported(void)
> > +bool kvm_is_vmx_supported(void)
> >   {
> >   	bool supported;
> > @@ -8508,7 +8504,7 @@ static void vmx_cleanup_l1d_flush(void)
> >   	l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
> >   }
> > -static void __vmx_exit(void)
> > +void vmx_exit(void)
> >   {
> >   	allow_smaller_maxphyaddr = false;
> > @@ -8517,36 +8513,10 @@ static void __vmx_exit(void)
> >   	vmx_cleanup_l1d_flush();
> >   }
> > -static void vmx_exit(void)
> > -{
> > -	kvm_exit();
> > -	kvm_x86_vendor_exit();
> > -
> > -	__vmx_exit();
> > -}
> > -module_exit(vmx_exit);
> > -
> > -static int __init vmx_init(void)
> > +int __init vmx_init(void)
> >   {
> >   	int r, cpu;
> > -	if (!kvm_is_vmx_supported())
> > -		return -EOPNOTSUPP;
> > -
> > -	/*
> > -	 * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing
> > -	 * to unwind if a later step fails.
> > -	 */
> > -	hv_init_evmcs();
> > -
> > -	/* vmx_hardware_disable() accesses loaded_vmcss_on_cpu. */
> > -	for_each_possible_cpu(cpu)
> > -		INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
> > -
> > -	r = kvm_x86_vendor_init(&vt_init_ops);
> > -	if (r)
> > -		return r;
> > -
> >   	/*
> >   	 * Must be called after common x86 init so enable_ept is properly set
> >   	 * up. Hand the parameter mitigation value in which was stored in
> I am wondering whether the first sentence of above comment should be
> moved to vt_init()? So vt_init() has whole information about the init
> sequence.

If we do so, we should move the call of "vmx_setup_l1d_flush() to vt_init().
I hesitated to remove static of vmx_setup_l1d_flush().
Yin Fengwei March 12, 2024, 2:21 a.m. UTC | #3
On 3/12/24 10:15, Isaku Yamahata wrote:
>>> -
>>> -	__vmx_exit();
>>> -}
>>> -module_exit(vmx_exit);
>>> -
>>> -static int __init vmx_init(void)
>>> +int __init vmx_init(void)
>>>   {
>>>   	int r, cpu;
>>> -	if (!kvm_is_vmx_supported())
>>> -		return -EOPNOTSUPP;
>>> -
>>> -	/*
>>> -	 * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing
>>> -	 * to unwind if a later step fails.
>>> -	 */
>>> -	hv_init_evmcs();
>>> -
>>> -	/* vmx_hardware_disable() accesses loaded_vmcss_on_cpu. */
>>> -	for_each_possible_cpu(cpu)
>>> -		INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
>>> -
>>> -	r = kvm_x86_vendor_init(&vt_init_ops);
>>> -	if (r)
>>> -		return r;
>>> -
>>>   	/*
>>>   	 * Must be called after common x86 init so enable_ept is properly set
>>>   	 * up. Hand the parameter mitigation value in which was stored in
>> I am wondering whether the first sentence of above comment should be
>> moved to vt_init()? So vt_init() has whole information about the init
>> sequence.
> If we do so, we should move the call of "vmx_setup_l1d_flush() to vt_init().
> I hesitated to remove static of vmx_setup_l1d_flush().
I meant this one:
 "Must be called after common x86 init so enable_ept is properly set up"

Not necessary to move vmx_setup_l1d_flush().

Regards
Yin, Fengwei


> --
Isaku Yamahata March 12, 2024, 4:42 a.m. UTC | #4
On Tue, Mar 12, 2024 at 10:21:28AM +0800,
Yin Fengwei <fengwei.yin@intel.com> wrote:

> 
> 
> On 3/12/24 10:15, Isaku Yamahata wrote:
> >>> -
> >>> -	__vmx_exit();
> >>> -}
> >>> -module_exit(vmx_exit);
> >>> -
> >>> -static int __init vmx_init(void)
> >>> +int __init vmx_init(void)
> >>>   {
> >>>   	int r, cpu;
> >>> -	if (!kvm_is_vmx_supported())
> >>> -		return -EOPNOTSUPP;
> >>> -
> >>> -	/*
> >>> -	 * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing
> >>> -	 * to unwind if a later step fails.
> >>> -	 */
> >>> -	hv_init_evmcs();
> >>> -
> >>> -	/* vmx_hardware_disable() accesses loaded_vmcss_on_cpu. */
> >>> -	for_each_possible_cpu(cpu)
> >>> -		INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
> >>> -
> >>> -	r = kvm_x86_vendor_init(&vt_init_ops);
> >>> -	if (r)
> >>> -		return r;
> >>> -
> >>>   	/*
> >>>   	 * Must be called after common x86 init so enable_ept is properly set
> >>>   	 * up. Hand the parameter mitigation value in which was stored in
> >> I am wondering whether the first sentence of above comment should be
> >> moved to vt_init()? So vt_init() has whole information about the init
> >> sequence.
> > If we do so, we should move the call of "vmx_setup_l1d_flush() to vt_init().
> > I hesitated to remove static of vmx_setup_l1d_flush().
> I meant this one:
>  "Must be called after common x86 init so enable_ept is properly set up"
> 
> Not necessary to move vmx_setup_l1d_flush().

Ah, you mean "only" first sentence. Ok. I'll move it.
Huang, Kai March 21, 2024, 11:27 a.m. UTC | #5
On Mon, 2024-02-26 at 00:25 -0800, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> Currently, KVM VMX module initialization/exit functions are a single
> function each.  Refactor KVM VMX module initialization functions into KVM
> common part and VMX part so that TDX specific part can be added cleanly.
> Opportunistically refactor module exit function as well.
> 
> The current module initialization flow is,

					  ^ ',' -> ':'

And please add an empty line to make text more breathable.

> 0.) Check if VMX is supported,
> 1.) hyper-v specific initialization,
> 2.) system-wide x86 specific and vendor specific initialization,
> 3.) Final VMX specific system-wide initialization,
> 4.) calculate the sizes of VMX kvm structure and VMX vcpu structure,
> 5.) report those sizes to the KVM common layer and KVM common
>     initialization

Is there any difference between "KVM common layer" and "KVM common
initialization"?  I think you can remove the former.

> 
> Refactor the KVM VMX module initialization function into functions with a
> wrapper function to separate VMX logic in vmx.c from a file, main.c, common
> among VMX and TDX.  Introduce a wrapper function for vmx_init().

Sorry I don't quite follow what your are trying to say in the above paragraph.

You have adequately put what is the _current_ flow, and I am expecting to see
the flow _after_ the refactor here.
  
> 
> The KVM architecture common layer allocates struct kvm with reported size
> for architecture-specific code.  The KVM VMX module defines its structure
> as struct vmx_kvm { struct kvm; VMX specific members;} and uses it as
> struct vmx kvm.  Similar for vcpu structure. TDX KVM patches will define

	 ^vmx_kvm.

Please be more consistent on the words.

> TDX specific kvm and vcpu structures.

Is this paragraph related to the changes in this patch?

For instance, why do you need to point out we will have TDX-specific 'kvm and
vcpu' structures?

> 
> The current module exit function is also a single function, a combination
> of VMX specific logic and common KVM logic.  Refactor it into VMX specific
> logic and KVM common logic.  
> 

[...]

> This is just refactoring to keep the VMX
> specific logic in vmx.c from main.c.

It's better to make this as a separate paragraph, because it is a summary to
this patch.

And in other words: No functional change intended?
Isaku Yamahata March 22, 2024, 5:39 p.m. UTC | #6
On Thu, Mar 21, 2024 at 11:27:46AM +0000,
"Huang, Kai" <kai.huang@intel.com> wrote:

> On Mon, 2024-02-26 at 00:25 -0800, isaku.yamahata@intel.com wrote:
> > From: Isaku Yamahata <isaku.yamahata@intel.com>
> > 
> > Currently, KVM VMX module initialization/exit functions are a single
> > function each.  Refactor KVM VMX module initialization functions into KVM
> > common part and VMX part so that TDX specific part can be added cleanly.
> > Opportunistically refactor module exit function as well.
> > 
> > The current module initialization flow is,
> 
> 					  ^ ',' -> ':'
> 
> And please add an empty line to make text more breathable.
> 
> > 0.) Check if VMX is supported,
> > 1.) hyper-v specific initialization,
> > 2.) system-wide x86 specific and vendor specific initialization,
> > 3.) Final VMX specific system-wide initialization,
> > 4.) calculate the sizes of VMX kvm structure and VMX vcpu structure,
> > 5.) report those sizes to the KVM common layer and KVM common
> >     initialization
> 
> Is there any difference between "KVM common layer" and "KVM common
> initialization"?  I think you can remove the former.

Ok.

> > Refactor the KVM VMX module initialization function into functions with a
> > wrapper function to separate VMX logic in vmx.c from a file, main.c, common
> > among VMX and TDX.  Introduce a wrapper function for vmx_init().
> 
> Sorry I don't quite follow what your are trying to say in the above paragraph.
> 
> You have adequately put what is the _current_ flow, and I am expecting to see
> the flow _after_ the refactor here.

Will add it.


> > The KVM architecture common layer allocates struct kvm with reported size
> > for architecture-specific code.  The KVM VMX module defines its structure
> > as struct vmx_kvm { struct kvm; VMX specific members;} and uses it as
> > struct vmx kvm.  Similar for vcpu structure. TDX KVM patches will define
> 
> 	 ^vmx_kvm.
> 
> Please be more consistent on the words.
> 
> > TDX specific kvm and vcpu structures.
> 
> Is this paragraph related to the changes in this patch?
> 
> For instance, why do you need to point out we will have TDX-specific 'kvm and
> vcpu' structures?

The point of this refactoring is to make room for TDX-specific code.  The
consideration point is data size/alignment difference and VMX-dependency.
Let me re-order the sentences.


> > The current module exit function is also a single function, a combination
> > of VMX specific logic and common KVM logic.  Refactor it into VMX specific
> > logic and KVM common logic.  
> > 
> 
> [...]
> 
> > This is just refactoring to keep the VMX
> > specific logic in vmx.c from main.c.
> 
> It's better to make this as a separate paragraph, because it is a summary to
> this patch.
> 
> And in other words: No functional change intended?

Thanks for the feedback.  Here is the revised version.

KVM: x86/vmx: Refactor KVM VMX module init/exit functions

Split KVM VMX kernel module initialization into one specific to VMX in
vmx.c and one common for VMX and TDX in main.c to make room for
TDX-specific logic to fit in.  Opportunistically, refactor module exit
function as well.

The key points are data structure difference and TDX dependency on
VMX.  The data structures for TDX are different from VMX.  So are its
size and alignment.  Because TDX depends on VMX, TDX initialization
must be after VMX initialization.  TDX cleanup must be before VMX
cleanup.

The current module initialization flow is:

0.) Check if VMX is supported,
1.) Hyper-v specific initialization,
2.) System-wide x86 specific and vendor-specific initialization,
3.) Final VMX-specific system-wide initialization,
4.) Calculate the sizes of the kvm and vcpu structure for VMX,
5.) Report those sizes to the KVM-common initialization

After refactoring and TDX, the flow will be:

0.) Check if VMX is supported (main.c),
1.) Hyper-v specific initialization (main.c),
2.) System-wide x86 specific and vendor-specific initialization,
    2.1) VMX-specific initialization (vmx.c)
    2.2) TDX-specific initialization (tdx.c)
3.) Final VMX-specific system-wide initialization (vmx.c),
    TDX doesn't need this step.
4.) Calculate the sizes of the kvm and vcpu structure for both VMX and
    TDX, (main.c)
5.) Report those sizes to the KVM-common initialization (main.c)

No functional change intended.
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index eeb7a43b271d..18cecf12c7c8 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -167,3 +167,57 @@  struct kvm_x86_init_ops vt_init_ops __initdata = {
 	.runtime_ops = &vt_x86_ops,
 	.pmu_ops = &intel_pmu_ops,
 };
+
+static int __init vt_init(void)
+{
+	unsigned int vcpu_size, vcpu_align;
+	int cpu, r;
+
+	if (!kvm_is_vmx_supported())
+		return -EOPNOTSUPP;
+
+	/*
+	 * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing
+	 * to unwind if a later step fails.
+	 */
+	hv_init_evmcs();
+
+	/* vmx_hardware_disable() accesses loaded_vmcss_on_cpu. */
+	for_each_possible_cpu(cpu)
+		INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
+
+	r = kvm_x86_vendor_init(&vt_init_ops);
+	if (r)
+		return r;
+
+	r = vmx_init();
+	if (r)
+		goto err_vmx_init;
+
+	/*
+	 * Common KVM initialization _must_ come last, after this, /dev/kvm is
+	 * exposed to userspace!
+	 */
+	vcpu_size = sizeof(struct vcpu_vmx);
+	vcpu_align = __alignof__(struct vcpu_vmx);
+	r = kvm_init(vcpu_size, vcpu_align, THIS_MODULE);
+	if (r)
+		goto err_kvm_init;
+
+	return 0;
+
+err_kvm_init:
+	vmx_exit();
+err_vmx_init:
+	kvm_x86_vendor_exit();
+	return r;
+}
+module_init(vt_init);
+
+static void vt_exit(void)
+{
+	kvm_exit();
+	kvm_x86_vendor_exit();
+	vmx_exit();
+}
+module_exit(vt_exit);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 8af0668e4dca..2fb1cd2e28a2 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -477,7 +477,7 @@  DEFINE_PER_CPU(struct vmcs *, current_vmcs);
  * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
  * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
  */
-static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
+DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
 
 static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
 static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -537,7 +537,7 @@  static int hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static __init void hv_init_evmcs(void)
+__init void hv_init_evmcs(void)
 {
 	int cpu;
 
@@ -573,7 +573,7 @@  static __init void hv_init_evmcs(void)
 	}
 }
 
-static void hv_reset_evmcs(void)
+void hv_reset_evmcs(void)
 {
 	struct hv_vp_assist_page *vp_ap;
 
@@ -597,10 +597,6 @@  static void hv_reset_evmcs(void)
 	vp_ap->current_nested_vmcs = 0;
 	vp_ap->enlighten_vmentry = 0;
 }
-
-#else /* IS_ENABLED(CONFIG_HYPERV) */
-static void hv_init_evmcs(void) {}
-static void hv_reset_evmcs(void) {}
 #endif /* IS_ENABLED(CONFIG_HYPERV) */
 
 /*
@@ -2743,7 +2739,7 @@  static bool __kvm_is_vmx_supported(void)
 	return true;
 }
 
-static bool kvm_is_vmx_supported(void)
+bool kvm_is_vmx_supported(void)
 {
 	bool supported;
 
@@ -8508,7 +8504,7 @@  static void vmx_cleanup_l1d_flush(void)
 	l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
 }
 
-static void __vmx_exit(void)
+void vmx_exit(void)
 {
 	allow_smaller_maxphyaddr = false;
 
@@ -8517,36 +8513,10 @@  static void __vmx_exit(void)
 	vmx_cleanup_l1d_flush();
 }
 
-static void vmx_exit(void)
-{
-	kvm_exit();
-	kvm_x86_vendor_exit();
-
-	__vmx_exit();
-}
-module_exit(vmx_exit);
-
-static int __init vmx_init(void)
+int __init vmx_init(void)
 {
 	int r, cpu;
 
-	if (!kvm_is_vmx_supported())
-		return -EOPNOTSUPP;
-
-	/*
-	 * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing
-	 * to unwind if a later step fails.
-	 */
-	hv_init_evmcs();
-
-	/* vmx_hardware_disable() accesses loaded_vmcss_on_cpu. */
-	for_each_possible_cpu(cpu)
-		INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
-
-	r = kvm_x86_vendor_init(&vt_init_ops);
-	if (r)
-		return r;
-
 	/*
 	 * Must be called after common x86 init so enable_ept is properly set
 	 * up. Hand the parameter mitigation value in which was stored in
@@ -8556,7 +8526,7 @@  static int __init vmx_init(void)
 	 */
 	r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
 	if (r)
-		goto err_l1d_flush;
+		return r;
 
 	for_each_possible_cpu(cpu)
 		pi_init_cpu(cpu);
@@ -8573,21 +8543,5 @@  static int __init vmx_init(void)
 	if (!enable_ept)
 		allow_smaller_maxphyaddr = true;
 
-	/*
-	 * Common KVM initialization _must_ come last, after this, /dev/kvm is
-	 * exposed to userspace!
-	 */
-	r = kvm_init(sizeof(struct vcpu_vmx), __alignof__(struct vcpu_vmx),
-		     THIS_MODULE);
-	if (r)
-		goto err_kvm_init;
-
 	return 0;
-
-err_kvm_init:
-	__vmx_exit();
-err_l1d_flush:
-	kvm_x86_vendor_exit();
-	return r;
 }
-module_init(vmx_init);
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 2f8b6c43fe0f..b936388853ab 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -6,6 +6,20 @@ 
 
 #include "x86.h"
 
+#if IS_ENABLED(CONFIG_HYPERV)
+__init void hv_init_evmcs(void);
+void hv_reset_evmcs(void);
+#else /* IS_ENABLED(CONFIG_HYPERV) */
+static inline __init void hv_init_evmcs(void) {}
+static inline void hv_reset_evmcs(void) {}
+#endif /* IS_ENABLED(CONFIG_HYPERV) */
+
+DECLARE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
+
+bool kvm_is_vmx_supported(void);
+int __init vmx_init(void);
+void vmx_exit(void);
+
 extern struct kvm_x86_ops vt_x86_ops __initdata;
 extern struct kvm_x86_init_ops vt_init_ops __initdata;