Message ID | d6547bd0c1eccdfb4a4908e330cc56ad39535f5e.1708933498.git.isaku.yamahata@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v19,001/130] x86/virt/tdx: Rename _offset to _member for TD_SYSINFO_MAP() macro | expand |
On Mon, Feb 26, 2024 at 12:26:39AM -0800, isaku.yamahata@intel.com wrote: >@@ -10162,18 +10151,49 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) > > WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ); > vcpu->arch.complete_userspace_io = complete_hypercall_exit; >+ /* stat is incremented on completion. */ Perhaps we could use a distinct return value to signal that the request is redirected to userspace. This way, more cases can be supported, e.g., accesses to MTRR MSRs, requests to service TDs, etc. And then ... > return 0; > } > default: > ret = -KVM_ENOSYS; > break; > } >+ > out: >+ ++vcpu->stat.hypercalls; >+ return ret; >+} >+EXPORT_SYMBOL_GPL(__kvm_emulate_hypercall); >+ >+int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) >+{ >+ unsigned long nr, a0, a1, a2, a3, ret; >+ int op_64_bit; >+ int cpl; >+ >+ if (kvm_xen_hypercall_enabled(vcpu->kvm)) >+ return kvm_xen_hypercall(vcpu); >+ >+ if (kvm_hv_hypercall_enabled(vcpu)) >+ return kvm_hv_hypercall(vcpu); >+ >+ nr = kvm_rax_read(vcpu); >+ a0 = kvm_rbx_read(vcpu); >+ a1 = kvm_rcx_read(vcpu); >+ a2 = kvm_rdx_read(vcpu); >+ a3 = kvm_rsi_read(vcpu); >+ op_64_bit = is_64_bit_hypercall(vcpu); >+ cpl = static_call(kvm_x86_get_cpl)(vcpu); >+ >+ ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl); >+ if (nr == KVM_HC_MAP_GPA_RANGE && !ret) >+ /* MAP_GPA tosses the request to the user space. */ no need to check what the request is. Just checking the return value will suffice. >+ return 0; >+ > if (!op_64_bit) > ret = (u32)ret; > kvm_rax_write(vcpu, ret); > >- ++vcpu->stat.hypercalls; > return kvm_skip_emulated_instruction(vcpu); > } > EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); >-- >2.25.1 > >
On Fri, Mar 29, 2024 at 11:24:55AM +0800, Chao Gao <chao.gao@intel.com> wrote: > On Mon, Feb 26, 2024 at 12:26:39AM -0800, isaku.yamahata@intel.com wrote: > >@@ -10162,18 +10151,49 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) > > > > WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ); > > vcpu->arch.complete_userspace_io = complete_hypercall_exit; > >+ /* stat is incremented on completion. */ > > Perhaps we could use a distinct return value to signal that the request is redirected > to userspace. This way, more cases can be supported, e.g., accesses to MTRR > MSRs, requests to service TDs, etc. And then ... The convention here is the one for exit_handler vcpu_enter_guest() already uses. If we introduce something like KVM_VCPU_CONTINUE=1, KVM_VCPU_EXIT_TO_USER=0, it will touch many places. So if we will (I'm not sure it's worthwhile), the cleanup should be done as independently. > > return 0; > > } > > default: > > ret = -KVM_ENOSYS; > > break; > > } > >+ > > out: > >+ ++vcpu->stat.hypercalls; > >+ return ret; > >+} > >+EXPORT_SYMBOL_GPL(__kvm_emulate_hypercall); > >+ > >+int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) > >+{ > >+ unsigned long nr, a0, a1, a2, a3, ret; > >+ int op_64_bit; > >+ int cpl; > >+ > >+ if (kvm_xen_hypercall_enabled(vcpu->kvm)) > >+ return kvm_xen_hypercall(vcpu); > >+ > >+ if (kvm_hv_hypercall_enabled(vcpu)) > >+ return kvm_hv_hypercall(vcpu); > >+ > >+ nr = kvm_rax_read(vcpu); > >+ a0 = kvm_rbx_read(vcpu); > >+ a1 = kvm_rcx_read(vcpu); > >+ a2 = kvm_rdx_read(vcpu); > >+ a3 = kvm_rsi_read(vcpu); > >+ op_64_bit = is_64_bit_hypercall(vcpu); > >+ cpl = static_call(kvm_x86_get_cpl)(vcpu); > >+ > >+ ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl); > >+ if (nr == KVM_HC_MAP_GPA_RANGE && !ret) > >+ /* MAP_GPA tosses the request to the user space. */ > > no need to check what the request is. Just checking the return value will suffice. This is needed to avoid updating rax etc. KVM_HC_MAP_GPA_RANGE is only an exception to go to the user space. This check is a bit weird, but I couldn't find a good way. > > >+ return 0; > >+ > > if (!op_64_bit) > > ret = (u32)ret; > > kvm_rax_write(vcpu, ret); > > > >- ++vcpu->stat.hypercalls; > > return kvm_skip_emulated_instruction(vcpu); > > } > > EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); > >-- > >2.25.1 > > > > >
On Wed, Apr 03, 2024, Isaku Yamahata wrote: > On Fri, Mar 29, 2024 at 11:24:55AM +0800, > Chao Gao <chao.gao@intel.com> wrote: > > > On Mon, Feb 26, 2024 at 12:26:39AM -0800, isaku.yamahata@intel.com wrote: > > >@@ -10162,18 +10151,49 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) > > > > > > WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ); > > > vcpu->arch.complete_userspace_io = complete_hypercall_exit; > > >+ /* stat is incremented on completion. */ > > > > Perhaps we could use a distinct return value to signal that the request is redirected > > to userspace. This way, more cases can be supported, e.g., accesses to MTRR > > MSRs, requests to service TDs, etc. And then ... > > The convention here is the one for exit_handler vcpu_enter_guest() already uses. > If we introduce something like KVM_VCPU_CONTINUE=1, KVM_VCPU_EXIT_TO_USER=0, it > will touch many places. So if we will (I'm not sure it's worthwhile), the > cleanup should be done as independently. Yeah, this is far from the first time that someone has complained about KVM's awful 1/0 return magic. And every time we've looked at it, we've come to the conclusion that it's not worth the churn/risk. And if we really need to further overload the return value, we can, e.g. KVM already does this for MSR accesses: /* * Internal error codes that are used to indicate that MSR emulation encountered * an error that should result in #GP in the guest, unless userspace * handles it. */ #define KVM_MSR_RET_INVALID 2 /* in-kernel MSR emulation #GP condition */ #define KVM_MSR_RET_FILTERED 3 /* #GP due to userspace MSR filter */
On 2/26/2024 4:26 PM, isaku.yamahata@intel.com wrote: > From: Sean Christopherson <sean.j.christopherson@intel.com> > > By necessity, TDX will use a different register ABI for hypercalls. > Break out the core functionality so that it may be reused for TDX. > > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> > --- > arch/x86/include/asm/kvm_host.h | 4 +++ > arch/x86/kvm/x86.c | 56 ++++++++++++++++++++++----------- > 2 files changed, 42 insertions(+), 18 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index e0ffef1d377d..bb8be091f996 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -2177,6 +2177,10 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm, > kvm_set_or_clear_apicv_inhibit(kvm, reason, false); > } > > +unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, > + unsigned long a0, unsigned long a1, > + unsigned long a2, unsigned long a3, > + int op_64_bit, int cpl); > int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); > > int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index fb7597c22f31..03950368d8db 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -10073,26 +10073,15 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu) > return kvm_skip_emulated_instruction(vcpu); > } > > -int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) > +unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, > + unsigned long a0, unsigned long a1, > + unsigned long a2, unsigned long a3, > + int op_64_bit, int cpl) > { > - unsigned long nr, a0, a1, a2, a3, ret; > - int op_64_bit; > - > - if (kvm_xen_hypercall_enabled(vcpu->kvm)) > - return kvm_xen_hypercall(vcpu); > - > - if (kvm_hv_hypercall_enabled(vcpu)) > - return kvm_hv_hypercall(vcpu); > - > - nr = kvm_rax_read(vcpu); > - a0 = kvm_rbx_read(vcpu); > - a1 = kvm_rcx_read(vcpu); > - a2 = kvm_rdx_read(vcpu); > - a3 = kvm_rsi_read(vcpu); > + unsigned long ret; > > trace_kvm_hypercall(nr, a0, a1, a2, a3); > > - op_64_bit = is_64_bit_hypercall(vcpu); > if (!op_64_bit) { > nr &= 0xFFFFFFFF; > a0 &= 0xFFFFFFFF; > @@ -10101,7 +10090,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) > a3 &= 0xFFFFFFFF; > } > > - if (static_call(kvm_x86_get_cpl)(vcpu) != 0) { > + if (cpl) { > ret = -KVM_EPERM; > goto out; > } > @@ -10162,18 +10151,49 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) > > WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ); > vcpu->arch.complete_userspace_io = complete_hypercall_exit; > + /* stat is incremented on completion. */ > return 0; > } > default: > ret = -KVM_ENOSYS; > break; > } > + > out: > + ++vcpu->stat.hypercalls; > + return ret; > +} > +EXPORT_SYMBOL_GPL(__kvm_emulate_hypercall); > + > +int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) > +{ > + unsigned long nr, a0, a1, a2, a3, ret; > + int op_64_bit; Can it be opportunistically changed to bool type, as well as the argument type of "op_64_bit" in __kvm_emulate_hypercall()? > + int cpl; > + > + if (kvm_xen_hypercall_enabled(vcpu->kvm)) > + return kvm_xen_hypercall(vcpu); > + > + if (kvm_hv_hypercall_enabled(vcpu)) > + return kvm_hv_hypercall(vcpu); > + > + nr = kvm_rax_read(vcpu); > + a0 = kvm_rbx_read(vcpu); > + a1 = kvm_rcx_read(vcpu); > + a2 = kvm_rdx_read(vcpu); > + a3 = kvm_rsi_read(vcpu); > + op_64_bit = is_64_bit_hypercall(vcpu); > + cpl = static_call(kvm_x86_get_cpl)(vcpu); > + > + ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl); > + if (nr == KVM_HC_MAP_GPA_RANGE && !ret) > + /* MAP_GPA tosses the request to the user space. */ > + return 0; > + > if (!op_64_bit) > ret = (u32)ret; > kvm_rax_write(vcpu, ret); > > - ++vcpu->stat.hypercalls; > return kvm_skip_emulated_instruction(vcpu); > } > EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
On Tue, Apr 09, 2024 at 05:28:05PM +0800, Binbin Wu <binbin.wu@linux.intel.com> wrote: > > +int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) > > +{ > > + unsigned long nr, a0, a1, a2, a3, ret; > > + int op_64_bit; > > Can it be opportunistically changed to bool type, as well as the argument > type of "op_64_bit" in __kvm_emulate_hypercall()? Yes. We can also fix kvm_pv_send_ipi(op_64_bit).
On 4/4/2024 2:34 AM, Isaku Yamahata wrote: > On Fri, Mar 29, 2024 at 11:24:55AM +0800, > Chao Gao <chao.gao@intel.com> wrote: > >> On Mon, Feb 26, 2024 at 12:26:39AM -0800, isaku.yamahata@intel.com wrote: >>> + >>> +int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) >>> +{ >>> + unsigned long nr, a0, a1, a2, a3, ret; >>> + int op_64_bit; >>> + int cpl; >>> + >>> + if (kvm_xen_hypercall_enabled(vcpu->kvm)) >>> + return kvm_xen_hypercall(vcpu); >>> + >>> + if (kvm_hv_hypercall_enabled(vcpu)) >>> + return kvm_hv_hypercall(vcpu); >>> + >>> + nr = kvm_rax_read(vcpu); >>> + a0 = kvm_rbx_read(vcpu); >>> + a1 = kvm_rcx_read(vcpu); >>> + a2 = kvm_rdx_read(vcpu); >>> + a3 = kvm_rsi_read(vcpu); >>> + op_64_bit = is_64_bit_hypercall(vcpu); >>> + cpl = static_call(kvm_x86_get_cpl)(vcpu); >>> + >>> + ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl); >>> + if (nr == KVM_HC_MAP_GPA_RANGE && !ret) >>> + /* MAP_GPA tosses the request to the user space. */ >> no need to check what the request is. Just checking the return value will suffice. > This is needed to avoid updating rax etc. KVM_HC_MAP_GPA_RANGE is only an > exception to go to the user space. This check is a bit weird, but I couldn't > find a good way. To be generic, I think we can use "vcpu->kvm->arch.hypercall_exit_enabled & (1 << nr)" to check if it needs to exit to userspace. i.e., + ... + ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl); + if (!ret && (vcpu->kvm->arch.hypercall_exit_enabled & (1 << nr))) + /* The hypercall is requested to exit to userspace. */ + return 0; > >>> + return 0; >>> + >>> if (!op_64_bit) >>> ret = (u32)ret; >>> kvm_rax_write(vcpu, ret); >>> >>> - ++vcpu->stat.hypercalls; >>> return kvm_skip_emulated_instruction(vcpu); >>> } >>> EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); >>> -- >>> 2.25.1 >>> >>>
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e0ffef1d377d..bb8be091f996 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2177,6 +2177,10 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm, kvm_set_or_clear_apicv_inhibit(kvm, reason, false); } +unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, + unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + int op_64_bit, int cpl); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fb7597c22f31..03950368d8db 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10073,26 +10073,15 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } -int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) +unsigned long __kvm_emulate_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, + unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, + int op_64_bit, int cpl) { - unsigned long nr, a0, a1, a2, a3, ret; - int op_64_bit; - - if (kvm_xen_hypercall_enabled(vcpu->kvm)) - return kvm_xen_hypercall(vcpu); - - if (kvm_hv_hypercall_enabled(vcpu)) - return kvm_hv_hypercall(vcpu); - - nr = kvm_rax_read(vcpu); - a0 = kvm_rbx_read(vcpu); - a1 = kvm_rcx_read(vcpu); - a2 = kvm_rdx_read(vcpu); - a3 = kvm_rsi_read(vcpu); + unsigned long ret; trace_kvm_hypercall(nr, a0, a1, a2, a3); - op_64_bit = is_64_bit_hypercall(vcpu); if (!op_64_bit) { nr &= 0xFFFFFFFF; a0 &= 0xFFFFFFFF; @@ -10101,7 +10090,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) a3 &= 0xFFFFFFFF; } - if (static_call(kvm_x86_get_cpl)(vcpu) != 0) { + if (cpl) { ret = -KVM_EPERM; goto out; } @@ -10162,18 +10151,49 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ); vcpu->arch.complete_userspace_io = complete_hypercall_exit; + /* stat is incremented on completion. */ return 0; } default: ret = -KVM_ENOSYS; break; } + out: + ++vcpu->stat.hypercalls; + return ret; +} +EXPORT_SYMBOL_GPL(__kvm_emulate_hypercall); + +int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) +{ + unsigned long nr, a0, a1, a2, a3, ret; + int op_64_bit; + int cpl; + + if (kvm_xen_hypercall_enabled(vcpu->kvm)) + return kvm_xen_hypercall(vcpu); + + if (kvm_hv_hypercall_enabled(vcpu)) + return kvm_hv_hypercall(vcpu); + + nr = kvm_rax_read(vcpu); + a0 = kvm_rbx_read(vcpu); + a1 = kvm_rcx_read(vcpu); + a2 = kvm_rdx_read(vcpu); + a3 = kvm_rsi_read(vcpu); + op_64_bit = is_64_bit_hypercall(vcpu); + cpl = static_call(kvm_x86_get_cpl)(vcpu); + + ret = __kvm_emulate_hypercall(vcpu, nr, a0, a1, a2, a3, op_64_bit, cpl); + if (nr == KVM_HC_MAP_GPA_RANGE && !ret) + /* MAP_GPA tosses the request to the user space. */ + return 0; + if (!op_64_bit) ret = (u32)ret; kvm_rax_write(vcpu, ret); - ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); } EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);