Message ID | 20200804042043.3592620-2-aaronlewis@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Allow userspace to manage MSRs | expand |
On 04.08.20 06:20, Aaron Lewis wrote: > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. > > > > Add KVM_SET_EXIT_MSRS ioctl to allow userspace to pass in a list of MSRs > that force an exit to userspace when rdmsr or wrmsr are used by the > guest. > > KVM_SET_EXIT_MSRS will need to be called before any vCPUs are > created to protect the 'user_exit_msrs' list from being mutated while > vCPUs are running. > > Add KVM_CAP_SET_MSR_EXITS to identify the feature exists. > > Signed-off-by: Aaron Lewis <aaronlewis@google.com> > Reviewed-by: Oliver Upton <oupton@google.com> > --- > Documentation/virt/kvm/api.rst | 24 +++++++++++++++++++ > arch/x86/include/asm/kvm_host.h | 2 ++ > arch/x86/kvm/x86.c | 41 +++++++++++++++++++++++++++++++++ > include/uapi/linux/kvm.h | 2 ++ > 4 files changed, 69 insertions(+) > > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst > index 320788f81a05..7d8167c165aa 100644 > --- a/Documentation/virt/kvm/api.rst > +++ b/Documentation/virt/kvm/api.rst > @@ -1006,6 +1006,30 @@ such as migration. > :Parameters: struct kvm_vcpu_event (out) > :Returns: 0 on success, -1 on error > > +4.32 KVM_SET_EXIT_MSRS > +------------------ > + > +:Capability: KVM_CAP_SET_MSR_EXITS > +:Architectures: x86 > +:Type: vm ioctl > +:Parameters: struct kvm_msr_list (in) > +:Returns: 0 on success, -1 on error > + > +Sets the userspace MSR list which is used to track which MSRs KVM should send > +to userspace to be serviced when the guest executes rdmsr or wrmsr. Unfortunately this doesn't solve the whole "ignore_msrs" mess that we have today. How can I just say "tell user space about unhandled MSRs"? And if I add that on top of this mechanism, how do we not make the list of MSRs that are handled in-kernel an ABI? > + > +This ioctl needs to be called before vCPUs are setup otherwise the list of MSRs > +will not be accepted and an EINVAL error will be returned. Also, if a list of > +MSRs has already been supplied, and this ioctl is called again an EEXIST error > +will be returned. > + > +:: > + > + struct kvm_msr_list { > + __u32 nmsrs; > + __u32 indices[0]; > +}; > + > X86: > ^^^^ > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index be5363b21540..510055471dd0 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1004,6 +1004,8 @@ struct kvm_arch { > > struct kvm_pmu_event_filter *pmu_event_filter; > struct task_struct *nx_lpage_recovery_thread; > + > + struct kvm_msr_list *user_exit_msrs; > }; > > struct kvm_vm_stat { > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 88c593f83b28..46a0fb9e0869 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -3419,6 +3419,42 @@ static inline bool kvm_can_mwait_in_guest(void) > boot_cpu_has(X86_FEATURE_ARAT); > } > > +static int kvm_vm_ioctl_set_exit_msrs(struct kvm *kvm, > + struct kvm_msr_list __user *user_msr_list) > +{ > + struct kvm_msr_list *msr_list, hdr; > + size_t indices_size; > + > + if (kvm->arch.user_exit_msrs != NULL) > + return -EEXIST; > + > + if (kvm->created_vcpus) > + return -EINVAL; What if we need to change the set of user space handled MSRs dynamically, for example because a feature has been enabled through a previous MSR? > + > + if (copy_from_user(&hdr, user_msr_list, sizeof(hdr))) > + return -EFAULT; > + > + if (hdr.nmsrs >= MAX_IO_MSRS) This constant is not defined inside the scope of this patch. Is your patchset fully bisectable? Please make sure that every patch builds successfully on its own :). Alex > + return -E2BIG; > + > + indices_size = sizeof(hdr.indices[0]) * hdr.nmsrs; > + msr_list = kvzalloc(sizeof(struct kvm_msr_list) + indices_size, > + GFP_KERNEL_ACCOUNT); > + if (!msr_list) > + return -ENOMEM; > + msr_list->nmsrs = hdr.nmsrs; > + > + if (copy_from_user(msr_list->indices, user_msr_list->indices, > + indices_size)) { > + kvfree(msr_list); > + return -EFAULT; > + } > + > + kvm->arch.user_exit_msrs = msr_list; > + > + return 0; > +} > + > int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > { > int r = 0; > @@ -3476,6 +3512,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_MSR_PLATFORM_INFO: > case KVM_CAP_EXCEPTION_PAYLOAD: > case KVM_CAP_SET_GUEST_DEBUG: > + case KVM_CAP_SET_MSR_EXITS: > r = 1; > break; > case KVM_CAP_SYNC_REGS: > @@ -5261,6 +5298,10 @@ long kvm_arch_vm_ioctl(struct file *filp, > r = 0; > break; > } > + case KVM_SET_EXIT_MSRS: { > + r = kvm_vm_ioctl_set_exit_msrs(kvm, argp); > + break; > + } > case KVM_MEMORY_ENCRYPT_OP: { > r = -ENOTTY; > if (kvm_x86_ops.mem_enc_op) > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 4fdf30316582..de4638c1bd15 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -1031,6 +1031,7 @@ struct kvm_ppc_resize_hpt { > #define KVM_CAP_PPC_SECURE_GUEST 181 > #define KVM_CAP_HALT_POLL 182 > #define KVM_CAP_ASYNC_PF_INT 183 > +#define KVM_CAP_SET_MSR_EXITS 185 > > #ifdef KVM_CAP_IRQ_ROUTING > > @@ -1371,6 +1372,7 @@ struct kvm_s390_ucas_mapping { > /* Available with KVM_CAP_PMU_EVENT_FILTER */ > #define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) > #define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3) > +#define KVM_SET_EXIT_MSRS _IOW(KVMIO, 0xb4, struct kvm_msr_list) > > /* ioctl for vm fd */ > #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) > -- > 2.28.0.163.g6104cc2f0b6-goog > Amazon Development Center Germany GmbH Krausenstr. 38 10117 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B Sitz: Berlin Ust-ID: DE 289 237 879
On Fri, Aug 7, 2020 at 9:12 AM Alexander Graf <graf@amazon.com> wrote: > > > > On 04.08.20 06:20, Aaron Lewis wrote: > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. > > > > > > > > Add KVM_SET_EXIT_MSRS ioctl to allow userspace to pass in a list of MSRs > > that force an exit to userspace when rdmsr or wrmsr are used by the > > guest. > > > > KVM_SET_EXIT_MSRS will need to be called before any vCPUs are > > created to protect the 'user_exit_msrs' list from being mutated while > > vCPUs are running. > > > > Add KVM_CAP_SET_MSR_EXITS to identify the feature exists. > > > > Signed-off-by: Aaron Lewis <aaronlewis@google.com> > > Reviewed-by: Oliver Upton <oupton@google.com> > > --- > > Documentation/virt/kvm/api.rst | 24 +++++++++++++++++++ > > arch/x86/include/asm/kvm_host.h | 2 ++ > > arch/x86/kvm/x86.c | 41 +++++++++++++++++++++++++++++++++ > > include/uapi/linux/kvm.h | 2 ++ > > 4 files changed, 69 insertions(+) > > > > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst > > index 320788f81a05..7d8167c165aa 100644 > > --- a/Documentation/virt/kvm/api.rst > > +++ b/Documentation/virt/kvm/api.rst > > @@ -1006,6 +1006,30 @@ such as migration. > > :Parameters: struct kvm_vcpu_event (out) > > :Returns: 0 on success, -1 on error > > > > +4.32 KVM_SET_EXIT_MSRS > > +------------------ > > + > > +:Capability: KVM_CAP_SET_MSR_EXITS > > +:Architectures: x86 > > +:Type: vm ioctl > > +:Parameters: struct kvm_msr_list (in) > > +:Returns: 0 on success, -1 on error > > + > > +Sets the userspace MSR list which is used to track which MSRs KVM should send > > +to userspace to be serviced when the guest executes rdmsr or wrmsr. > > Unfortunately this doesn't solve the whole "ignore_msrs" mess that we > have today. How can I just say "tell user space about unhandled MSRs"? > And if I add that on top of this mechanism, how do we not make the list > of MSRs that are handled in-kernel an ABI? Jumping in for Aaron, who is out this afternoon... This patch doesn't attempt to solve your problem, "tell user space about unhandled MSRs." It attempts to solve our problem, "tell user space about a specific set of MSRs, even if kvm learns to handle them in the future." This is, in fact, what we really wanted to do when Peter Hornyack implemented the "tell user space about unhandled MSRs" changes in 2015. We just didn't realize it at the time. Though your proposal does partially solve our problem, it's a lot easier to specify a small set of MSRs by inclusion than it is by exclusion. (Where your proposal falls short of our needs is when userspace wants to handle an MSR that kvm would not typically intercept at all.) > > + > > +This ioctl needs to be called before vCPUs are setup otherwise the list of MSRs > > +will not be accepted and an EINVAL error will be returned. Also, if a list of > > +MSRs has already been supplied, and this ioctl is called again an EEXIST error > > +will be returned. > > + > > +:: > > + > > + struct kvm_msr_list { > > + __u32 nmsrs; > > + __u32 indices[0]; > > +}; > > + > > X86: > > ^^^^ > > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index be5363b21540..510055471dd0 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -1004,6 +1004,8 @@ struct kvm_arch { > > > > struct kvm_pmu_event_filter *pmu_event_filter; > > struct task_struct *nx_lpage_recovery_thread; > > + > > + struct kvm_msr_list *user_exit_msrs; > > }; > > > > struct kvm_vm_stat { > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index 88c593f83b28..46a0fb9e0869 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -3419,6 +3419,42 @@ static inline bool kvm_can_mwait_in_guest(void) > > boot_cpu_has(X86_FEATURE_ARAT); > > } > > > > +static int kvm_vm_ioctl_set_exit_msrs(struct kvm *kvm, > > + struct kvm_msr_list __user *user_msr_list) > > +{ > > + struct kvm_msr_list *msr_list, hdr; > > + size_t indices_size; > > + > > + if (kvm->arch.user_exit_msrs != NULL) > > + return -EEXIST; > > + > > + if (kvm->created_vcpus) > > + return -EINVAL; > > What if we need to change the set of user space handled MSRs > dynamically, for example because a feature has been enabled through a > previous MSR? It's never been an issue for us, and this approach avoids the messiness of having to back out the old changes to the MSR permissions bitmaps, which is fraught. It can be done, but I would question the ROI on the additional complexity. In any case, I think a VCPU ioctl would be more appropriate than a VM ioctl if dynamic modifications were allowed. For instance, in your example, the previous MSR write that enabled a feature requiring a new user space MSR exit would likely apply only to the VCPU on which that previous MSR write happened. > > + > > + if (copy_from_user(&hdr, user_msr_list, sizeof(hdr))) > > + return -EFAULT; > > + > > + if (hdr.nmsrs >= MAX_IO_MSRS) > > This constant is not defined inside the scope of this patch. Is your > patchset fully bisectable? Please make sure that every patch builds > successfully on its own :). I can't vouch for this patchset being fully bisectable, but this particular constant was moved to x86.c ~13 years ago, in commit 313a3dc75da20 ("KVM: Portability: split kvm_vcpu_ioctl"). It should not cause any build issues. :)
On 07.08.20 23:16, Jim Mattson wrote: > > On Fri, Aug 7, 2020 at 9:12 AM Alexander Graf <graf@amazon.com> wrote: >> >> >> >> On 04.08.20 06:20, Aaron Lewis wrote: >>> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. >>> >>> >>> >>> Add KVM_SET_EXIT_MSRS ioctl to allow userspace to pass in a list of MSRs >>> that force an exit to userspace when rdmsr or wrmsr are used by the >>> guest. >>> >>> KVM_SET_EXIT_MSRS will need to be called before any vCPUs are >>> created to protect the 'user_exit_msrs' list from being mutated while >>> vCPUs are running. >>> >>> Add KVM_CAP_SET_MSR_EXITS to identify the feature exists. >>> >>> Signed-off-by: Aaron Lewis <aaronlewis@google.com> >>> Reviewed-by: Oliver Upton <oupton@google.com> >>> --- >>> Documentation/virt/kvm/api.rst | 24 +++++++++++++++++++ >>> arch/x86/include/asm/kvm_host.h | 2 ++ >>> arch/x86/kvm/x86.c | 41 +++++++++++++++++++++++++++++++++ >>> include/uapi/linux/kvm.h | 2 ++ >>> 4 files changed, 69 insertions(+) >>> >>> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst >>> index 320788f81a05..7d8167c165aa 100644 >>> --- a/Documentation/virt/kvm/api.rst >>> +++ b/Documentation/virt/kvm/api.rst >>> @@ -1006,6 +1006,30 @@ such as migration. >>> :Parameters: struct kvm_vcpu_event (out) >>> :Returns: 0 on success, -1 on error >>> >>> +4.32 KVM_SET_EXIT_MSRS >>> +------------------ >>> + >>> +:Capability: KVM_CAP_SET_MSR_EXITS >>> +:Architectures: x86 >>> +:Type: vm ioctl >>> +:Parameters: struct kvm_msr_list (in) >>> +:Returns: 0 on success, -1 on error >>> + >>> +Sets the userspace MSR list which is used to track which MSRs KVM should send >>> +to userspace to be serviced when the guest executes rdmsr or wrmsr. >> >> Unfortunately this doesn't solve the whole "ignore_msrs" mess that we >> have today. How can I just say "tell user space about unhandled MSRs"? >> And if I add that on top of this mechanism, how do we not make the list >> of MSRs that are handled in-kernel an ABI? > > Jumping in for Aaron, who is out this afternoon... Awesome, thanks for the super quick reply! > > This patch doesn't attempt to solve your problem, "tell user space > about unhandled MSRs." It attempts to solve our problem, "tell user > space about a specific set of MSRs, even if kvm learns to handle them > in the future." This is, in fact, what we really wanted to do when > Peter Hornyack implemented the "tell user space about unhandled MSRs" > changes in 2015. We just didn't realize it at the time. Ok, let's take a step back then. What exactly are you trying to solve and which MSRs do you care about? My main problem with a deny list approach is that it's (practically) impossible to map the allow list use case onto it. It is however trivial to only deny a few select MSRs explicitly with an allow list approach. I don't want to introduce yet another ABI to KVM in 2 years to then have both :). > Though your proposal does partially solve our problem, it's a lot > easier to specify a small set of MSRs by inclusion than it is by > exclusion. (Where your proposal falls short of our needs is when > userspace wants to handle an MSR that kvm would not typically > intercept at all.) The only MSRs that KVM does not intercept are the ones explicitly specified as pass-through > >>> + >>> +This ioctl needs to be called before vCPUs are setup otherwise the list of MSRs >>> +will not be accepted and an EINVAL error will be returned. Also, if a list of >>> +MSRs has already been supplied, and this ioctl is called again an EEXIST error >>> +will be returned. >>> + >>> +:: >>> + >>> + struct kvm_msr_list { >>> + __u32 nmsrs; >>> + __u32 indices[0]; >>> +}; >>> + >>> X86: >>> ^^^^ >>> >>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h >>> index be5363b21540..510055471dd0 100644 >>> --- a/arch/x86/include/asm/kvm_host.h >>> +++ b/arch/x86/include/asm/kvm_host.h >>> @@ -1004,6 +1004,8 @@ struct kvm_arch { >>> >>> struct kvm_pmu_event_filter *pmu_event_filter; >>> struct task_struct *nx_lpage_recovery_thread; >>> + >>> + struct kvm_msr_list *user_exit_msrs; >>> }; >>> >>> struct kvm_vm_stat { >>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >>> index 88c593f83b28..46a0fb9e0869 100644 >>> --- a/arch/x86/kvm/x86.c >>> +++ b/arch/x86/kvm/x86.c >>> @@ -3419,6 +3419,42 @@ static inline bool kvm_can_mwait_in_guest(void) >>> boot_cpu_has(X86_FEATURE_ARAT); >>> } >>> >>> +static int kvm_vm_ioctl_set_exit_msrs(struct kvm *kvm, >>> + struct kvm_msr_list __user *user_msr_list) >>> +{ >>> + struct kvm_msr_list *msr_list, hdr; >>> + size_t indices_size; >>> + >>> + if (kvm->arch.user_exit_msrs != NULL) >>> + return -EEXIST; >>> + >>> + if (kvm->created_vcpus) >>> + return -EINVAL; >> >> What if we need to change the set of user space handled MSRs >> dynamically, for example because a feature has been enabled through a >> previous MSR? > > It's never been an issue for us, and this approach avoids the > messiness of having to back out the old changes to the MSR permissions > bitmaps, which is fraught. It can be done, but I would question the > ROI on the additional complexity. In any case, I think a VCPU ioctl > would be more appropriate than a VM ioctl if dynamic modifications > were allowed. For instance, in your example, the previous MSR write > that enabled a feature requiring a new user space MSR exit would > likely apply only to the VCPU on which that previous MSR write > happened. My general rule of thumb with PPC has always been to make as much vCPU specific as possible. X86 KVM is a bit different there - and I guess it does help for memory consumption :). > >>> + >>> + if (copy_from_user(&hdr, user_msr_list, sizeof(hdr))) >>> + return -EFAULT; >>> + >>> + if (hdr.nmsrs >= MAX_IO_MSRS) >> >> This constant is not defined inside the scope of this patch. Is your >> patchset fully bisectable? Please make sure that every patch builds >> successfully on its own :). > > I can't vouch for this patchset being fully bisectable, but this > particular constant was moved to x86.c ~13 years ago, in commit > 313a3dc75da20 ("KVM: Portability: split kvm_vcpu_ioctl"). It should > not cause any build issues. :) Ugh, sorry, my bad :). Alex Amazon Development Center Germany GmbH Krausenstr. 38 10117 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B Sitz: Berlin Ust-ID: DE 289 237 879
On Fri, Aug 7, 2020 at 3:49 PM Alexander Graf <graf@amazon.com> wrote: > > On 07.08.20 23:16, Jim Mattson wrote: > > This patch doesn't attempt to solve your problem, "tell user space > > about unhandled MSRs." It attempts to solve our problem, "tell user > > space about a specific set of MSRs, even if kvm learns to handle them > > in the future." This is, in fact, what we really wanted to do when > > Peter Hornyack implemented the "tell user space about unhandled MSRs" > > changes in 2015. We just didn't realize it at the time. > > Ok, let's take a step back then. What exactly are you trying to solve > and which MSRs do you care about? Our userspace handles a handful of MSRs, including some of the synthetic PV time-related MSRs (both kvm and Hyper-V), which we want to exit to userspace even though kvm thinks it knows what to do with them. We also have a range of Google-specific MSRs that kvm will probably never know about. And, finally, we find that as time goes on, there are situations where we want to add support for a new MSR in userspace because we can roll out a new userspace more quickly than a new kernel. An example of this last category is MSR_IA32_ARCH_CAPABILITIES. We don't necessarily want to cede control of such MSRs to kvm as soon as it knows about them. For instance, we may want to wait until the new kernel reaches its rollback horizon. There may be a few more MSRs we handle in userspace, but I think that covers the bulk of them. I don't believe we have yet seen a case where we wanted to take control of an MSR that kvm didn't intercept itself. That part of Aaron's patch may be overkill, but it gives the API clean semantics. It seems a bit awkward to have a userspace MSR list where accesses to some of the MSRs exit to userspace and accesses to others do not. (I believe that your implementation of the allow list suffers from this awkwardness, though I haven't yet had time to review it in depth.) > My main problem with a deny list approach is that it's (practically) > impossible to map the allow list use case onto it. It is however trivial > to only deny a few select MSRs explicitly with an allow list approach. I > don't want to introduce yet another ABI to KVM in 2 years to then have > both :). I agree in part. A deny list does not support your use case well. But does an allow list support it that much better? I suspect that the allow list that specifies "every MSR that kvm knows about today" is far from trivial to construct, especially if you consider different microarchitectures and different VM configurations (e.g. vPMU vs. no vPMU). Can we back up and ask what it is that you want to accomplish? If you simply want ignore_msrs to be per-VM, wouldn't it be easier to add an ioctl to simply set a boolean that potentially overrides the module parameter, and then modify the ignore_msrs code to consult the per-VM boolean? Getting back to one of the issues I raised earlier: your "exit instead of #GP" patch exits to userspace on both unknown MSRs and illegal WRMSR values. If userspace wants to implement "ignore MSRs" for a particular VM, it's absolutely trivial when the reason for the exit is "unknown MSR." However, it becomes more complicated when the reason for the exit is "#GP." Now, userspace has to know which MSRs are known to kvm before it can decide whether or not the access should #GP. I hope this doesn't come across sounding antagonistic. Like you, I want to avoid revisiting this design a few years down the road. We're already in that boat with the 2015 "exit on unknown MSR" change!
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 320788f81a05..7d8167c165aa 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1006,6 +1006,30 @@ such as migration. :Parameters: struct kvm_vcpu_event (out) :Returns: 0 on success, -1 on error +4.32 KVM_SET_EXIT_MSRS +------------------ + +:Capability: KVM_CAP_SET_MSR_EXITS +:Architectures: x86 +:Type: vm ioctl +:Parameters: struct kvm_msr_list (in) +:Returns: 0 on success, -1 on error + +Sets the userspace MSR list which is used to track which MSRs KVM should send +to userspace to be serviced when the guest executes rdmsr or wrmsr. + +This ioctl needs to be called before vCPUs are setup otherwise the list of MSRs +will not be accepted and an EINVAL error will be returned. Also, if a list of +MSRs has already been supplied, and this ioctl is called again an EEXIST error +will be returned. + +:: + + struct kvm_msr_list { + __u32 nmsrs; + __u32 indices[0]; +}; + X86: ^^^^ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index be5363b21540..510055471dd0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1004,6 +1004,8 @@ struct kvm_arch { struct kvm_pmu_event_filter *pmu_event_filter; struct task_struct *nx_lpage_recovery_thread; + + struct kvm_msr_list *user_exit_msrs; }; struct kvm_vm_stat { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 88c593f83b28..46a0fb9e0869 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3419,6 +3419,42 @@ static inline bool kvm_can_mwait_in_guest(void) boot_cpu_has(X86_FEATURE_ARAT); } +static int kvm_vm_ioctl_set_exit_msrs(struct kvm *kvm, + struct kvm_msr_list __user *user_msr_list) +{ + struct kvm_msr_list *msr_list, hdr; + size_t indices_size; + + if (kvm->arch.user_exit_msrs != NULL) + return -EEXIST; + + if (kvm->created_vcpus) + return -EINVAL; + + if (copy_from_user(&hdr, user_msr_list, sizeof(hdr))) + return -EFAULT; + + if (hdr.nmsrs >= MAX_IO_MSRS) + return -E2BIG; + + indices_size = sizeof(hdr.indices[0]) * hdr.nmsrs; + msr_list = kvzalloc(sizeof(struct kvm_msr_list) + indices_size, + GFP_KERNEL_ACCOUNT); + if (!msr_list) + return -ENOMEM; + msr_list->nmsrs = hdr.nmsrs; + + if (copy_from_user(msr_list->indices, user_msr_list->indices, + indices_size)) { + kvfree(msr_list); + return -EFAULT; + } + + kvm->arch.user_exit_msrs = msr_list; + + return 0; +} + int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r = 0; @@ -3476,6 +3512,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MSR_PLATFORM_INFO: case KVM_CAP_EXCEPTION_PAYLOAD: case KVM_CAP_SET_GUEST_DEBUG: + case KVM_CAP_SET_MSR_EXITS: r = 1; break; case KVM_CAP_SYNC_REGS: @@ -5261,6 +5298,10 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_SET_EXIT_MSRS: { + r = kvm_vm_ioctl_set_exit_msrs(kvm, argp); + break; + } case KVM_MEMORY_ENCRYPT_OP: { r = -ENOTTY; if (kvm_x86_ops.mem_enc_op) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4fdf30316582..de4638c1bd15 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1031,6 +1031,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_SECURE_GUEST 181 #define KVM_CAP_HALT_POLL 182 #define KVM_CAP_ASYNC_PF_INT 183 +#define KVM_CAP_SET_MSR_EXITS 185 #ifdef KVM_CAP_IRQ_ROUTING @@ -1371,6 +1372,7 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_PMU_EVENT_FILTER */ #define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) #define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3) +#define KVM_SET_EXIT_MSRS _IOW(KVMIO, 0xb4, struct kvm_msr_list) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)