Message ID | 4a766983346b2c01e943348af3c5ca6691e272f9.1708933498.git.isaku.yamahata@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v19,001/130] x86/virt/tdx: Rename _offset to _member for TD_SYSINFO_MAP() macro | expand |
On 2/26/2024 4:26 PM, isaku.yamahata@intel.com wrote: > From: Isaku Yamahata <isaku.yamahata@intel.com> > > On entering/exiting TDX vcpu, Preserved or clobbered CPU state is different > from VMX case. Could you add more descriptions about the differences? > Add TDX hooks to save/restore host/guest CPU state. KVM doesn't save/restore guest CPU state for TDX. > Save/restore kernel GS base MSR. > > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> > Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> > --- > arch/x86/kvm/vmx/main.c | 30 +++++++++++++++++++++++++-- > arch/x86/kvm/vmx/tdx.c | 42 ++++++++++++++++++++++++++++++++++++++ > arch/x86/kvm/vmx/tdx.h | 4 ++++ > arch/x86/kvm/vmx/x86_ops.h | 4 ++++ > 4 files changed, 78 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c > index d72651ce99ac..8275a242ce07 100644 > --- a/arch/x86/kvm/vmx/main.c > +++ b/arch/x86/kvm/vmx/main.c > @@ -158,6 +158,32 @@ static void vt_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) > vmx_vcpu_reset(vcpu, init_event); > } > > +static void vt_prepare_switch_to_guest(struct kvm_vcpu *vcpu) > +{ > + /* > + * All host state is saved/restored across SEAMCALL/SEAMRET, It sounds confusing to me. If all host states are saved/restored across SEAMCALL/SEAMRET, why this patch saves/restores MSR_KERNEL_GS_BASE for host? > and the > + * guest state of a TD is obviously off limits. Deferring MSRs and DRs > + * is pointless because the TDX module needs to load *something* so as > + * not to expose guest state. > + */ > + if (is_td_vcpu(vcpu)) { > + tdx_prepare_switch_to_guest(vcpu); > + return; > + } > + > + vmx_prepare_switch_to_guest(vcpu); > +} > + > +static void vt_vcpu_put(struct kvm_vcpu *vcpu) > +{ > + if (is_td_vcpu(vcpu)) { > + tdx_vcpu_put(vcpu); > + return; > + } > + > + vmx_vcpu_put(vcpu); > +} > + > static int vt_vcpu_pre_run(struct kvm_vcpu *vcpu) > { > if (is_td_vcpu(vcpu)) > @@ -326,9 +352,9 @@ struct kvm_x86_ops vt_x86_ops __initdata = { > .vcpu_free = vt_vcpu_free, > .vcpu_reset = vt_vcpu_reset, > > - .prepare_switch_to_guest = vmx_prepare_switch_to_guest, > + .prepare_switch_to_guest = vt_prepare_switch_to_guest, > .vcpu_load = vmx_vcpu_load, > - .vcpu_put = vmx_vcpu_put, > + .vcpu_put = vt_vcpu_put, > > .update_exception_bitmap = vmx_update_exception_bitmap, > .get_msr_feature = vmx_get_msr_feature, > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c > index fdf9196cb592..9616b1aab6ce 100644 > --- a/arch/x86/kvm/vmx/tdx.c > +++ b/arch/x86/kvm/vmx/tdx.c > @@ -1,5 +1,6 @@ > // SPDX-License-Identifier: GPL-2.0 > #include <linux/cpu.h> > +#include <linux/mmu_context.h> > > #include <asm/tdx.h> > > @@ -423,6 +424,7 @@ u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) > int tdx_vcpu_create(struct kvm_vcpu *vcpu) > { > struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm); > + struct vcpu_tdx *tdx = to_tdx(vcpu); > > WARN_ON_ONCE(vcpu->arch.cpuid_entries); > WARN_ON_ONCE(vcpu->arch.cpuid_nent); > @@ -446,9 +448,47 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu) > if ((kvm_tdx->xfam & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE) > vcpu->arch.xfd_no_write_intercept = true; > > + tdx->host_state_need_save = true; > + tdx->host_state_need_restore = false; > + > return 0; > } > > +void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) Just like vmx_prepare_switch_to_host(), the input can be "struct vcpu_tdx *", since vcpu is not used inside the function. And the callsites just use "to_tdx(vcpu)" > +{ > + struct vcpu_tdx *tdx = to_tdx(vcpu); Then, this can be dropped. > + > + if (!tdx->host_state_need_save) > + return; > + > + if (likely(is_64bit_mm(current->mm))) > + tdx->msr_host_kernel_gs_base = current->thread.gsbase; > + else > + tdx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); > + > + tdx->host_state_need_save = false; > +} > + > +static void tdx_prepare_switch_to_host(struct kvm_vcpu *vcpu) ditto > +{ > + struct vcpu_tdx *tdx = to_tdx(vcpu); > + > + tdx->host_state_need_save = true; > + if (!tdx->host_state_need_restore) > + return; > + > + ++vcpu->stat.host_state_reload; > + > + wrmsrl(MSR_KERNEL_GS_BASE, tdx->msr_host_kernel_gs_base); > + tdx->host_state_need_restore = false; > +} > + > +void tdx_vcpu_put(struct kvm_vcpu *vcpu) > +{ > + vmx_vcpu_pi_put(vcpu); > + tdx_prepare_switch_to_host(vcpu); > +} > + > void tdx_vcpu_free(struct kvm_vcpu *vcpu) > { > struct vcpu_tdx *tdx = to_tdx(vcpu); > @@ -569,6 +609,8 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu) > > tdx_vcpu_enter_exit(tdx); > > + tdx->host_state_need_restore = true; > + > vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET; > trace_kvm_exit(vcpu, KVM_ISA_VMX); > > diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h > index 81d301fbe638..e96c416e73bf 100644 > --- a/arch/x86/kvm/vmx/tdx.h > +++ b/arch/x86/kvm/vmx/tdx.h > @@ -69,6 +69,10 @@ struct vcpu_tdx { > > bool initialized; > > + bool host_state_need_save; > + bool host_state_need_restore; > + u64 msr_host_kernel_gs_base; > + > /* > * Dummy to make pmu_intel not corrupt memory. > * TODO: Support PMU for TDX. Future work. > diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h > index 3e29a6fe28ef..9fd997c79c33 100644 > --- a/arch/x86/kvm/vmx/x86_ops.h > +++ b/arch/x86/kvm/vmx/x86_ops.h > @@ -151,6 +151,8 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu); > void tdx_vcpu_free(struct kvm_vcpu *vcpu); > void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); > fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu); > +void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); > +void tdx_vcpu_put(struct kvm_vcpu *vcpu); > u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); > > int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp); > @@ -186,6 +188,8 @@ static inline int tdx_vcpu_create(struct kvm_vcpu *vcpu) { return -EOPNOTSUPP; } > static inline void tdx_vcpu_free(struct kvm_vcpu *vcpu) {} > static inline void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) {} > static inline fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu) { return EXIT_FASTPATH_NONE; } > +static inline void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) {} > +static inline void tdx_vcpu_put(struct kvm_vcpu *vcpu) {} > static inline u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) { return 0; } > > static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; }
On Sun, Apr 07, 2024 at 11:02:52AM +0800, Binbin Wu <binbin.wu@linux.intel.com> wrote: > > diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c > > index d72651ce99ac..8275a242ce07 100644 > > --- a/arch/x86/kvm/vmx/main.c > > +++ b/arch/x86/kvm/vmx/main.c > > @@ -158,6 +158,32 @@ static void vt_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) > > vmx_vcpu_reset(vcpu, init_event); > > } > > +static void vt_prepare_switch_to_guest(struct kvm_vcpu *vcpu) > > +{ > > + /* > > + * All host state is saved/restored across SEAMCALL/SEAMRET, > > It sounds confusing to me. > If all host states are saved/restored across SEAMCALL/SEAMRET, why this > patch saves/restores MSR_KERNEL_GS_BASE for host? > No. Probably we should update the comment. Something like restored => restored or initialized to reset state. Except conditionally saved/restored MSRs (e.g., perfrmon, debugreg), IA32_START, IA32_LSTART, MSR_SYSCALL_MASK, IA32_TSC_AUX and TA32_KERNEL_GS_BASE are reset to initial state. uret handles the first four. The kernel_gs_base needs to be restored on TDExit. > > and the > > + * guest state of a TD is obviously off limits. Deferring MSRs and DRs > > + * is pointless because the TDX module needs to load *something* so as > > + * not to expose guest state. > > + */ > > + if (is_td_vcpu(vcpu)) { > > + tdx_prepare_switch_to_guest(vcpu); > > + return; > > + } > > + > > + vmx_prepare_switch_to_guest(vcpu); > > +} > > + > > +static void vt_vcpu_put(struct kvm_vcpu *vcpu) > > +{ > > + if (is_td_vcpu(vcpu)) { > > + tdx_vcpu_put(vcpu); > > + return; > > + } > > + > > + vmx_vcpu_put(vcpu); > > +} > > + > > static int vt_vcpu_pre_run(struct kvm_vcpu *vcpu) > > { > > if (is_td_vcpu(vcpu)) > > @@ -326,9 +352,9 @@ struct kvm_x86_ops vt_x86_ops __initdata = { > > .vcpu_free = vt_vcpu_free, > > .vcpu_reset = vt_vcpu_reset, > > - .prepare_switch_to_guest = vmx_prepare_switch_to_guest, > > + .prepare_switch_to_guest = vt_prepare_switch_to_guest, > > .vcpu_load = vmx_vcpu_load, > > - .vcpu_put = vmx_vcpu_put, > > + .vcpu_put = vt_vcpu_put, > > .update_exception_bitmap = vmx_update_exception_bitmap, > > .get_msr_feature = vmx_get_msr_feature, > > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c > > index fdf9196cb592..9616b1aab6ce 100644 > > --- a/arch/x86/kvm/vmx/tdx.c > > +++ b/arch/x86/kvm/vmx/tdx.c > > @@ -1,5 +1,6 @@ > > // SPDX-License-Identifier: GPL-2.0 > > #include <linux/cpu.h> > > +#include <linux/mmu_context.h> > > #include <asm/tdx.h> > > @@ -423,6 +424,7 @@ u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) > > int tdx_vcpu_create(struct kvm_vcpu *vcpu) > > { > > struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm); > > + struct vcpu_tdx *tdx = to_tdx(vcpu); > > WARN_ON_ONCE(vcpu->arch.cpuid_entries); > > WARN_ON_ONCE(vcpu->arch.cpuid_nent); > > @@ -446,9 +448,47 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu) > > if ((kvm_tdx->xfam & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE) > > vcpu->arch.xfd_no_write_intercept = true; > > + tdx->host_state_need_save = true; > > + tdx->host_state_need_restore = false; > > + > > return 0; > > } > > +void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) > > Just like vmx_prepare_switch_to_host(), the input can be "struct vcpu_tdx > *", since vcpu is not used inside the function. > And the callsites just use "to_tdx(vcpu)" > > > +{ > > + struct vcpu_tdx *tdx = to_tdx(vcpu); > Then, this can be dropped. prepare_switch_to_guest() is used for kvm_x86_ops.prepare_switch_to_guest(). kvm_x86_ops consistently takes struct kvm_vcpu.
On 4/13/2024 4:17 AM, Isaku Yamahata wrote: >>> +void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) >> Just like vmx_prepare_switch_to_host(), the input can be "struct vcpu_tdx >> *", since vcpu is not used inside the function. >> And the callsites just use "to_tdx(vcpu)" >> >>> +{ >>> + struct vcpu_tdx *tdx = to_tdx(vcpu); >> Then, this can be dropped. > prepare_switch_to_guest() is used for kvm_x86_ops.prepare_switch_to_guest(). > kvm_x86_ops consistently takes struct kvm_vcpu. Oh yes, it's not suitable for tdx_prepare_switch_to_guest(). Still, it can be for tdx_prepare_switch_to_host().
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index d72651ce99ac..8275a242ce07 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -158,6 +158,32 @@ static void vt_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx_vcpu_reset(vcpu, init_event); } +static void vt_prepare_switch_to_guest(struct kvm_vcpu *vcpu) +{ + /* + * All host state is saved/restored across SEAMCALL/SEAMRET, and the + * guest state of a TD is obviously off limits. Deferring MSRs and DRs + * is pointless because the TDX module needs to load *something* so as + * not to expose guest state. + */ + if (is_td_vcpu(vcpu)) { + tdx_prepare_switch_to_guest(vcpu); + return; + } + + vmx_prepare_switch_to_guest(vcpu); +} + +static void vt_vcpu_put(struct kvm_vcpu *vcpu) +{ + if (is_td_vcpu(vcpu)) { + tdx_vcpu_put(vcpu); + return; + } + + vmx_vcpu_put(vcpu); +} + static int vt_vcpu_pre_run(struct kvm_vcpu *vcpu) { if (is_td_vcpu(vcpu)) @@ -326,9 +352,9 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .vcpu_free = vt_vcpu_free, .vcpu_reset = vt_vcpu_reset, - .prepare_switch_to_guest = vmx_prepare_switch_to_guest, + .prepare_switch_to_guest = vt_prepare_switch_to_guest, .vcpu_load = vmx_vcpu_load, - .vcpu_put = vmx_vcpu_put, + .vcpu_put = vt_vcpu_put, .update_exception_bitmap = vmx_update_exception_bitmap, .get_msr_feature = vmx_get_msr_feature, diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index fdf9196cb592..9616b1aab6ce 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/cpu.h> +#include <linux/mmu_context.h> #include <asm/tdx.h> @@ -423,6 +424,7 @@ u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) int tdx_vcpu_create(struct kvm_vcpu *vcpu) { struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm); + struct vcpu_tdx *tdx = to_tdx(vcpu); WARN_ON_ONCE(vcpu->arch.cpuid_entries); WARN_ON_ONCE(vcpu->arch.cpuid_nent); @@ -446,9 +448,47 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu) if ((kvm_tdx->xfam & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE) vcpu->arch.xfd_no_write_intercept = true; + tdx->host_state_need_save = true; + tdx->host_state_need_restore = false; + return 0; } +void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + + if (!tdx->host_state_need_save) + return; + + if (likely(is_64bit_mm(current->mm))) + tdx->msr_host_kernel_gs_base = current->thread.gsbase; + else + tdx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); + + tdx->host_state_need_save = false; +} + +static void tdx_prepare_switch_to_host(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + + tdx->host_state_need_save = true; + if (!tdx->host_state_need_restore) + return; + + ++vcpu->stat.host_state_reload; + + wrmsrl(MSR_KERNEL_GS_BASE, tdx->msr_host_kernel_gs_base); + tdx->host_state_need_restore = false; +} + +void tdx_vcpu_put(struct kvm_vcpu *vcpu) +{ + vmx_vcpu_pi_put(vcpu); + tdx_prepare_switch_to_host(vcpu); +} + void tdx_vcpu_free(struct kvm_vcpu *vcpu) { struct vcpu_tdx *tdx = to_tdx(vcpu); @@ -569,6 +609,8 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu) tdx_vcpu_enter_exit(tdx); + tdx->host_state_need_restore = true; + vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET; trace_kvm_exit(vcpu, KVM_ISA_VMX); diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h index 81d301fbe638..e96c416e73bf 100644 --- a/arch/x86/kvm/vmx/tdx.h +++ b/arch/x86/kvm/vmx/tdx.h @@ -69,6 +69,10 @@ struct vcpu_tdx { bool initialized; + bool host_state_need_save; + bool host_state_need_restore; + u64 msr_host_kernel_gs_base; + /* * Dummy to make pmu_intel not corrupt memory. * TODO: Support PMU for TDX. Future work. diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index 3e29a6fe28ef..9fd997c79c33 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -151,6 +151,8 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu); void tdx_vcpu_free(struct kvm_vcpu *vcpu); void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu); +void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); +void tdx_vcpu_put(struct kvm_vcpu *vcpu); u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp); @@ -186,6 +188,8 @@ static inline int tdx_vcpu_create(struct kvm_vcpu *vcpu) { return -EOPNOTSUPP; } static inline void tdx_vcpu_free(struct kvm_vcpu *vcpu) {} static inline void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) {} static inline fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu) { return EXIT_FASTPATH_NONE; } +static inline void tdx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) {} +static inline void tdx_vcpu_put(struct kvm_vcpu *vcpu) {} static inline u8 tdx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) { return 0; } static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; }