Message ID | 20200803211423.29398-3-graf@amazon.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Allow user space to restrict and augment MSR emulation | expand |
On Mon, Aug 3, 2020 at 2:14 PM Alexander Graf <graf@amazon.com> wrote: > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -901,6 +901,13 @@ struct kvm_hv { > struct kvm_hv_syndbg hv_syndbg; > }; > > +struct msr_bitmap_range { > + u32 flags; > + u32 nmsrs; > + u32 base; > + unsigned long *bitmap; > +}; > + > enum kvm_irqchip_mode { > KVM_IRQCHIP_NONE, > KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */ > @@ -1005,6 +1012,9 @@ struct kvm_arch { > /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */ > bool user_space_msr_enabled; > > + struct msr_bitmap_range msr_allowlist_ranges[10]; Why 10? I think this is the only use of this constant, but a macro would still be nice, especially since the number appears to be arbitrary. > diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h > index 0780f97c1850..c33fb1d72d52 100644 > --- a/arch/x86/include/uapi/asm/kvm.h > +++ b/arch/x86/include/uapi/asm/kvm.h > @@ -192,6 +192,21 @@ struct kvm_msr_list { > __u32 indices[0]; > }; > > +#define KVM_MSR_ALLOW_READ (1 << 0) > +#define KVM_MSR_ALLOW_WRITE (1 << 1) > + > +/* Maximum size of the of the bitmap in bytes */ > +#define KVM_MSR_ALLOWLIST_MAX_LEN 0x600 Wouldn't 0x400 be a more natural size, since both Intel and AMD MSR permission bitmaps cover ranges of 8192 MSRs? > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index e1139124350f..25e58ceb19de 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -1472,6 +1472,38 @@ void kvm_enable_efer_bits(u64 mask) > } > EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); > > +static bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type) In another thread, when I suggested that a function should return bool, you said, "'I'm not a big fan of bool returning APIs unless they have an "is" in their name.' This function doesn't have "is" in its name. :-) > +{ > + struct kvm *kvm = vcpu->kvm; > + struct msr_bitmap_range *ranges = kvm->arch.msr_allowlist_ranges; > + u32 count = kvm->arch.msr_allowlist_ranges_count; Shouldn't the read of kvm->arch.msr_allowlist_ranges_count be guarded by the mutex, below? > + u32 i; > + bool r = false; > + > + /* MSR allowlist not set up, allow everything */ > + if (!count) > + return true; > + > + /* Prevent collision with clear_msr_allowlist */ > + mutex_lock(&kvm->lock); > + > + for (i = 0; i < count; i++) { > + u32 start = ranges[i].base; > + u32 end = start + ranges[i].nmsrs; > + u32 flags = ranges[i].flags; > + unsigned long *bitmap = ranges[i].bitmap; > + > + if ((index >= start) && (index < end) && (flags & type)) { > + r = !!test_bit(index - start, bitmap); The !! seems gratuitous, since r is of type bool. > @@ -1483,6 +1515,9 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, > { > struct msr_data msr; > > + if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_ALLOW_WRITE)) > + return -ENOENT; Perhaps -EPERM is more appropriate here? > switch (index) { > case MSR_FS_BASE: > case MSR_GS_BASE: > @@ -1528,6 +1563,9 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, > struct msr_data msr; > int ret; > > + if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_ALLOW_READ)) > + return -ENOENT; ...and here? > +static bool msr_range_overlaps(struct kvm *kvm, struct msr_bitmap_range *range) Another bool function with no "is"? :-) > +{ > + struct msr_bitmap_range *ranges = kvm->arch.msr_allowlist_ranges; > + u32 i, count = kvm->arch.msr_allowlist_ranges_count; > + bool r = false; > + > + for (i = 0; i < count; i++) { > + u32 start = max(range->base, ranges[i].base); > + u32 end = min(range->base + range->nmsrs, > + ranges[i].base + ranges[i].nmsrs); > + > + if ((start < end) && (range->flags & ranges[i].flags)) { > + r = true; > + break; > + } > + } > + > + return r; > +} This seems like an awkward constraint. Would it be possible to allow overlapping ranges as long as the access types don't clash? So, for example, could I specify an allow list for READ of MSRs 0-0x1ffff and an allow list for WRITE of MSRs 0-0x1ffff? Actually, I don't see why you have to prohibit overlapping ranges at all. > +static int kvm_vm_ioctl_clear_msr_allowlist(struct kvm *kvm) > +{ > + int i; Nit: In earlier code, you use u32 for this index. (I'm actually a fan of int, myself.) > @@ -10086,6 +10235,8 @@ void kvm_arch_pre_destroy_vm(struct kvm *kvm) > > void kvm_arch_destroy_vm(struct kvm *kvm) > { > + int i; It's 50/50 now, u32 vs. int. :-)
On 20.08.20 00:49, Jim Mattson wrote: > > On Mon, Aug 3, 2020 at 2:14 PM Alexander Graf <graf@amazon.com> wrote: > >> --- a/arch/x86/include/asm/kvm_host.h >> +++ b/arch/x86/include/asm/kvm_host.h >> @@ -901,6 +901,13 @@ struct kvm_hv { >> struct kvm_hv_syndbg hv_syndbg; >> }; >> >> +struct msr_bitmap_range { >> + u32 flags; >> + u32 nmsrs; >> + u32 base; >> + unsigned long *bitmap; >> +}; >> + >> enum kvm_irqchip_mode { >> KVM_IRQCHIP_NONE, >> KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */ >> @@ -1005,6 +1012,9 @@ struct kvm_arch { >> /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */ >> bool user_space_msr_enabled; >> >> + struct msr_bitmap_range msr_allowlist_ranges[10]; > > Why 10? I think this is the only use of this constant, but a macro > would still be nice, especially since the number appears to be > arbitrary. > >> diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h >> index 0780f97c1850..c33fb1d72d52 100644 >> --- a/arch/x86/include/uapi/asm/kvm.h >> +++ b/arch/x86/include/uapi/asm/kvm.h >> @@ -192,6 +192,21 @@ struct kvm_msr_list { >> __u32 indices[0]; >> }; >> >> +#define KVM_MSR_ALLOW_READ (1 << 0) >> +#define KVM_MSR_ALLOW_WRITE (1 << 1) >> + >> +/* Maximum size of the of the bitmap in bytes */ >> +#define KVM_MSR_ALLOWLIST_MAX_LEN 0x600 > > Wouldn't 0x400 be a more natural size, since both Intel and AMD MSR > permission bitmaps cover ranges of 8192 MSRs? You can always make your bitmaps 0x400 :). I had to choose something that limits our memory footprint, so that user space can't allocate infinite amounts of memory. > >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index e1139124350f..25e58ceb19de 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -1472,6 +1472,38 @@ void kvm_enable_efer_bits(u64 mask) >> } >> EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); >> >> +static bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type) > > In another thread, when I suggested that a function should return > bool, you said, "'I'm not a big fan of bool returning APIs unless they > have an "is" in their name.' This function doesn't have "is" in its > name. :-) I've left this unanswered for way too long :). IMHO, passive is fine too, as it implies an "is" in my brain. Or to put it differently: bad: bool kvm_get_msr() bad: bool kvm_get_msr_user_space() good: bool kvm_msr_blocked() good: bool kvm_msr_allowed() good: bool is_kvm_msr_allowed() > >> +{ >> + struct kvm *kvm = vcpu->kvm; >> + struct msr_bitmap_range *ranges = kvm->arch.msr_allowlist_ranges; >> + u32 count = kvm->arch.msr_allowlist_ranges_count; > > Shouldn't the read of kvm->arch.msr_allowlist_ranges_count be guarded > by the mutex, below? > >> + u32 i; >> + bool r = false; >> + >> + /* MSR allowlist not set up, allow everything */ >> + if (!count) >> + return true; >> + >> + /* Prevent collision with clear_msr_allowlist */ >> + mutex_lock(&kvm->lock); >> + >> + for (i = 0; i < count; i++) { >> + u32 start = ranges[i].base; >> + u32 end = start + ranges[i].nmsrs; >> + u32 flags = ranges[i].flags; >> + unsigned long *bitmap = ranges[i].bitmap; >> + >> + if ((index >= start) && (index < end) && (flags & type)) { >> + r = !!test_bit(index - start, bitmap); > > The !! seems gratuitous, since r is of type bool. > >> @@ -1483,6 +1515,9 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, >> { >> struct msr_data msr; >> >> + if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_ALLOW_WRITE)) >> + return -ENOENT; > > Perhaps -EPERM is more appropriate here? > >> switch (index) { >> case MSR_FS_BASE: >> case MSR_GS_BASE: >> @@ -1528,6 +1563,9 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, >> struct msr_data msr; >> int ret; >> >> + if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_ALLOW_READ)) >> + return -ENOENT; > > ...and here? > >> +static bool msr_range_overlaps(struct kvm *kvm, struct msr_bitmap_range *range) > > Another bool function with no "is"? :-) > >> +{ >> + struct msr_bitmap_range *ranges = kvm->arch.msr_allowlist_ranges; >> + u32 i, count = kvm->arch.msr_allowlist_ranges_count; >> + bool r = false; >> + >> + for (i = 0; i < count; i++) { >> + u32 start = max(range->base, ranges[i].base); >> + u32 end = min(range->base + range->nmsrs, >> + ranges[i].base + ranges[i].nmsrs); >> + >> + if ((start < end) && (range->flags & ranges[i].flags)) { >> + r = true; >> + break; >> + } >> + } >> + >> + return r; >> +} > > This seems like an awkward constraint. Would it be possible to allow > overlapping ranges as long as the access types don't clash? So, for > example, could I specify an allow list for READ of MSRs 0-0x1ffff and > an allow list for WRITE of MSRs 0-0x1ffff? Actually, I don't see why > you have to prohibit overlapping ranges at all. I tend to agree. Now that the order is obvious through the new API, we no longer need to check for overlaps. > > >> +static int kvm_vm_ioctl_clear_msr_allowlist(struct kvm *kvm) >> +{ >> + int i; > > Nit: In earlier code, you use u32 for this index. (I'm actually a fan > of int, myself.) I usually use int as well because it's easier to type, but doing signed indexes is just so wrong on so many levels :). I'll fix them up too be all u32. Alex Amazon Development Center Germany GmbH Krausenstr. 38 10117 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B Sitz: Berlin Ust-ID: DE 289 237 879
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 2ca38649b3d4..9cb36060f61c 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4697,6 +4697,82 @@ KVM_PV_VM_VERIFY Verify the integrity of the unpacked image. Only if this succeeds, KVM is allowed to start protected VCPUs. +4.126 KVM_X86_ADD_MSR_ALLOWLIST +------------------------------- + +:Capability: KVM_CAP_X86_MSR_ALLOWLIST +:Architectures: x86 +:Type: vm ioctl +:Parameters: struct kvm_msr_allowlist +:Returns: 0 on success, < 0 on error + +:: + + struct kvm_msr_allowlist { + __u32 flags; + __u32 nmsrs; /* number of msrs in bitmap */ + __u32 base; /* base address for the MSRs bitmap */ + __u32 pad; + + __u8 bitmap[0]; /* a set bit allows that the operation set in flags */ + }; + +flags values: + +KVM_MSR_ALLOW_READ + + Filter read accesses to MSRs using the given bitmap. A 0 in the bitmap + indicates that a read should immediately fail, while a 1 indicates that + a read should be handled by the normal KVM MSR emulation logic. + +KVM_MSR_ALLOW_WRITE + + Filter write accesses to MSRs using the given bitmap. A 0 in the bitmap + indicates that a write should immediately fail, while a 1 indicates that + a write should be handled by the normal KVM MSR emulation logic. + +KVM_MSR_ALLOW_READ | KVM_MSR_ALLOW_WRITE + + Filter booth read and write accesses to MSRs using the given bitmap. A 0 + in the bitmap indicates that both reads and writes should immediately fail, + while a 1 indicates that reads and writes should be handled by the normal + KVM MSR emulation logic. + +This ioctl allows user space to define a set of bitmaps of MSR ranges to +specify whether a certain MSR access is allowed or not. + +If this ioctl has never been invoked, MSR accesses are not guarded and the +old KVM in-kernel emulation behavior is fully preserved. + +As soon as the first allow list was specified, only allowed MSR accesses +are permitted inside of KVM's MSR code. + +Each allowlist specifies a range of MSRs to potentially allow access on. +The range goes from MSR index [base .. base+nmsrs]. The flags field +indicates whether reads, writes or both reads and writes are permitted +by setting a 1 bit in the bitmap for the corresponding MSR index. + +If an MSR access is not permitted through the allow list, it generates a +#GP inside the guest. When combined with KVM_CAP_X86_USER_SPACE_MSR, that +allows user space to deflect and potentially handle various MSR accesses +into user space. + +4.124 KVM_X86_CLEAR_MSR_ALLOWLIST +--------------------------------- + +:Capability: KVM_CAP_X86_MSR_ALLOWLIST +:Architectures: x86 +:Type: vcpu ioctl +:Parameters: none +:Returns: 0 + +This ioctl resets all internal MSR allow lists. After this call, no allow +list is present and the guest would execute as if no allow lists were set, +so all MSRs are considered allowed and thus handled by the in-kernel MSR +emulation logic. + +No vCPU may be in running state when calling this ioctl. + 5. The kvm_run structure ======================== @@ -6213,3 +6289,18 @@ writes to user space. It can be enabled on a VM level. If enabled, MSR accesses that would usually trigger a #GP by KVM into the guest will instead get bounced to user space through the KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications. + +8.25 KVM_CAP_X86_MSR_ALLOWLIST +------------------------------ + +:Architectures: x86 + +This capability indicates that KVM supports emulation of only select MSR +registers. With this capability exposed, KVM exports two new VM ioctls: +KVM_X86_ADD_MSR_ALLOWLIST which user space can call to specify bitmaps of MSR +ranges that KVM should emulate in kernel space and KVM_X86_CLEAR_MSR_ALLOWLIST +which user space can call to remove all MSR allow lists from the VM context. + +In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to +trap and emulate MSRs that are outside of the scope of KVM as well as +limit the attack surface on KVM's MSR emulation code. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2f2307e71342..4b1ff7cb848f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -901,6 +901,13 @@ struct kvm_hv { struct kvm_hv_syndbg hv_syndbg; }; +struct msr_bitmap_range { + u32 flags; + u32 nmsrs; + u32 base; + unsigned long *bitmap; +}; + enum kvm_irqchip_mode { KVM_IRQCHIP_NONE, KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */ @@ -1005,6 +1012,9 @@ struct kvm_arch { /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */ bool user_space_msr_enabled; + struct msr_bitmap_range msr_allowlist_ranges[10]; + int msr_allowlist_ranges_count; + struct kvm_pmu_event_filter *pmu_event_filter; struct task_struct *nx_lpage_recovery_thread; }; diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 0780f97c1850..c33fb1d72d52 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -192,6 +192,21 @@ struct kvm_msr_list { __u32 indices[0]; }; +#define KVM_MSR_ALLOW_READ (1 << 0) +#define KVM_MSR_ALLOW_WRITE (1 << 1) + +/* Maximum size of the of the bitmap in bytes */ +#define KVM_MSR_ALLOWLIST_MAX_LEN 0x600 + +/* for KVM_X86_ADD_MSR_ALLOWLIST */ +struct kvm_msr_allowlist { + __u32 flags; + __u32 nmsrs; /* number of msrs in bitmap */ + __u32 base; /* base address for the MSRs bitmap */ + __u32 pad; + + __u8 bitmap[0]; /* a set bit allows that the operation set in flags */ +}; struct kvm_cpuid_entry { __u32 function; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e1139124350f..25e58ceb19de 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1472,6 +1472,38 @@ void kvm_enable_efer_bits(u64 mask) } EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); +static bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type) +{ + struct kvm *kvm = vcpu->kvm; + struct msr_bitmap_range *ranges = kvm->arch.msr_allowlist_ranges; + u32 count = kvm->arch.msr_allowlist_ranges_count; + u32 i; + bool r = false; + + /* MSR allowlist not set up, allow everything */ + if (!count) + return true; + + /* Prevent collision with clear_msr_allowlist */ + mutex_lock(&kvm->lock); + + for (i = 0; i < count; i++) { + u32 start = ranges[i].base; + u32 end = start + ranges[i].nmsrs; + u32 flags = ranges[i].flags; + unsigned long *bitmap = ranges[i].bitmap; + + if ((index >= start) && (index < end) && (flags & type)) { + r = !!test_bit(index - start, bitmap); + break; + } + } + + mutex_unlock(&kvm->lock); + + return r; +} + /* * Write @data into the MSR specified by @index. Select MSR specific fault * checks are bypassed if @host_initiated is %true. @@ -1483,6 +1515,9 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, { struct msr_data msr; + if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_ALLOW_WRITE)) + return -ENOENT; + switch (index) { case MSR_FS_BASE: case MSR_GS_BASE: @@ -1528,6 +1563,9 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, struct msr_data msr; int ret; + if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_ALLOW_READ)) + return -ENOENT; + msr.index = index; msr.host_initiated = host_initiated; @@ -3550,6 +3588,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_EXCEPTION_PAYLOAD: case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_X86_USER_SPACE_MSR: + case KVM_CAP_X86_MSR_ALLOWLIST: r = 1; break; case KVM_CAP_SYNC_REGS: @@ -5075,6 +5114,110 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, return r; } +static bool msr_range_overlaps(struct kvm *kvm, struct msr_bitmap_range *range) +{ + struct msr_bitmap_range *ranges = kvm->arch.msr_allowlist_ranges; + u32 i, count = kvm->arch.msr_allowlist_ranges_count; + bool r = false; + + for (i = 0; i < count; i++) { + u32 start = max(range->base, ranges[i].base); + u32 end = min(range->base + range->nmsrs, + ranges[i].base + ranges[i].nmsrs); + + if ((start < end) && (range->flags & ranges[i].flags)) { + r = true; + break; + } + } + + return r; +} + +static int kvm_vm_ioctl_add_msr_allowlist(struct kvm *kvm, void __user *argp) +{ + struct msr_bitmap_range *ranges = kvm->arch.msr_allowlist_ranges; + struct kvm_msr_allowlist __user *user_msr_allowlist = argp; + struct msr_bitmap_range range; + struct kvm_msr_allowlist kernel_msr_allowlist; + unsigned long *bitmap = NULL; + size_t bitmap_size; + int r = 0; + + if (copy_from_user(&kernel_msr_allowlist, user_msr_allowlist, + sizeof(kernel_msr_allowlist))) { + r = -EFAULT; + goto out; + } + + bitmap_size = BITS_TO_LONGS(kernel_msr_allowlist.nmsrs) * sizeof(long); + if (bitmap_size > KVM_MSR_ALLOWLIST_MAX_LEN) { + r = -EINVAL; + goto out; + } + + bitmap = memdup_user(user_msr_allowlist->bitmap, bitmap_size); + if (IS_ERR(bitmap)) { + r = PTR_ERR(bitmap); + goto out; + } + + range = (struct msr_bitmap_range) { + .flags = kernel_msr_allowlist.flags, + .base = kernel_msr_allowlist.base, + .nmsrs = kernel_msr_allowlist.nmsrs, + .bitmap = bitmap, + }; + + if (range.flags & ~(KVM_MSR_ALLOW_READ | KVM_MSR_ALLOW_WRITE)) { + r = -EINVAL; + goto out; + } + + /* + * Protect from concurrent calls to this function that could trigger + * a TOCTOU violation on kvm->arch.msr_allowlist_ranges_count. + */ + mutex_lock(&kvm->lock); + + if (kvm->arch.msr_allowlist_ranges_count >= + ARRAY_SIZE(kvm->arch.msr_allowlist_ranges)) { + r = -E2BIG; + goto out_locked; + } + + if (msr_range_overlaps(kvm, &range)) { + r = -EINVAL; + goto out_locked; + } + + /* Everything ok, add this range identifier to our global pool */ + ranges[kvm->arch.msr_allowlist_ranges_count++] = range; + +out_locked: + mutex_unlock(&kvm->lock); +out: + if (r) + kfree(bitmap); + + return r; +} + +static int kvm_vm_ioctl_clear_msr_allowlist(struct kvm *kvm) +{ + int i; + + mutex_lock(&kvm->lock); + + for (i = 0; i < kvm->arch.msr_allowlist_ranges_count; i++) + kfree(kvm->arch.msr_allowlist_ranges[i].bitmap); + kvm->arch.msr_allowlist_ranges_count = 0; + + mutex_unlock(&kvm->lock); + + return 0; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -5381,6 +5524,12 @@ long kvm_arch_vm_ioctl(struct file *filp, case KVM_SET_PMU_EVENT_FILTER: r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp); break; + case KVM_X86_ADD_MSR_ALLOWLIST: + r = kvm_vm_ioctl_add_msr_allowlist(kvm, argp); + break; + case KVM_X86_CLEAR_MSR_ALLOWLIST: + r = kvm_vm_ioctl_clear_msr_allowlist(kvm); + break; default: r = -ENOTTY; } @@ -10086,6 +10235,8 @@ void kvm_arch_pre_destroy_vm(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { + int i; + if (current->mm == kvm->mm) { /* * Free memory regions allocated on behalf of userspace, @@ -10102,6 +10253,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) } if (kvm_x86_ops.vm_destroy) kvm_x86_ops.vm_destroy(kvm); + for (i = 0; i < kvm->arch.msr_allowlist_ranges_count; i++) + kfree(kvm->arch.msr_allowlist_ranges[i].bitmap); kvm_pic_destroy(kvm); kvm_ioapic_destroy(kvm); kvm_free_vcpus(kvm); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 13fc7de1eb50..4d6bb06e0fb1 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1041,6 +1041,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HALT_POLL 182 #define KVM_CAP_ASYNC_PF_INT 183 #define KVM_CAP_X86_USER_SPACE_MSR 184 +#define KVM_CAP_X86_MSR_ALLOWLIST 185 #ifdef KVM_CAP_IRQ_ROUTING @@ -1542,6 +1543,10 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_S390_PROTECTED */ #define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) +/* Available with KVM_CAP_X86_MSR_ALLOWLIST */ +#define KVM_X86_ADD_MSR_ALLOWLIST _IOW(KVMIO, 0xc6, struct kvm_msr_allowlist) +#define KVM_X86_CLEAR_MSR_ALLOWLIST _IO(KVMIO, 0xc7) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */