Message ID | 20200925143422.21718-6-graf@amazon.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Allow user space to restrict and augment MSR emulation | expand |
On 25/09/20 16:34, Alexander Graf wrote: > We will introduce the concept of MSRs that may not be handled in kernel > space soon. Some MSRs are directly passed through to the guest, effectively > making them handled by KVM from user space's point of view. > > This patch introduces all logic required to ensure that MSRs that > user space wants trapped are not marked as direct access for guests. > > Signed-off-by: Alexander Graf <graf@amazon.com> VMX and SVM agree about the awful naming of MSR functions... There is some confusion between msrs and indexes in msrpm_offsets. I'll revisit this after looking at Sean's pending series that cleans up VMX. Paolo > --- > > v7 -> v8: > > - s/KVM_MSR_ALLOW/KVM_MSR_FILTER/g > --- > arch/x86/kvm/svm/svm.c | 77 +++++++++++++++++++++++++++++++++++++----- > arch/x86/kvm/svm/svm.h | 7 ++++ > 2 files changed, 76 insertions(+), 8 deletions(-) > > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c > index 41aaee666751..45b0c180f42c 100644 > --- a/arch/x86/kvm/svm/svm.c > +++ b/arch/x86/kvm/svm/svm.c > @@ -91,7 +91,7 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio); > static const struct svm_direct_access_msrs { > u32 index; /* Index of the MSR */ > bool always; /* True if intercept is always on */ > -} direct_access_msrs[] = { > +} direct_access_msrs[MAX_DIRECT_ACCESS_MSRS] = { > { .index = MSR_STAR, .always = true }, > { .index = MSR_IA32_SYSENTER_CS, .always = true }, > #ifdef CONFIG_X86_64 > @@ -553,15 +553,41 @@ static int svm_cpu_init(int cpu) > > } > > -static bool valid_msr_intercept(u32 index) > +static int direct_access_msr_idx(u32 msr) > { > - int i; > + u32 i; > > for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) > - if (direct_access_msrs[i].index == index) > - return true; > + if (direct_access_msrs[i].index == msr) > + return i; > > - return false; > + return -EINVAL; > +} > + > +static void set_shadow_msr_intercept(struct kvm_vcpu *vcpu, u32 msr, int read, > + int write) > +{ > + struct vcpu_svm *svm = to_svm(vcpu); > + int idx = direct_access_msr_idx(msr); > + > + if (idx == -EINVAL) > + return; > + > + /* Set the shadow bitmaps to the desired intercept states */ > + if (read) > + set_bit(idx, svm->shadow_msr_intercept.read); > + else > + clear_bit(idx, svm->shadow_msr_intercept.read); > + > + if (write) > + set_bit(idx, svm->shadow_msr_intercept.write); > + else > + clear_bit(idx, svm->shadow_msr_intercept.write); > +} > + > +static bool valid_msr_intercept(u32 index) > +{ > + return direct_access_msr_idx(index) != -EINVAL; > } > > static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) > @@ -583,8 +609,8 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) > return !!test_bit(bit_write, &tmp); > } > > -static void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, > - int read, int write) > +static void set_msr_interception_nosync(struct kvm_vcpu *vcpu, u32 *msrpm, > + u32 msr, int read, int write) > { > u8 bit_read, bit_write; > unsigned long tmp; > @@ -596,6 +622,13 @@ static void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, > */ > WARN_ON(!valid_msr_intercept(msr)); > > + /* Enforce non allowed MSRs to trap */ > + if (read && !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ)) > + read = 0; > + > + if (write && !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE)) > + write = 0; > + > offset = svm_msrpm_offset(msr); > bit_read = 2 * (msr & 0x0f); > bit_write = 2 * (msr & 0x0f) + 1; > @@ -609,6 +642,13 @@ static void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, > msrpm[offset] = tmp; > } > > +static void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, > + int read, int write) > +{ > + set_shadow_msr_intercept(vcpu, msr, read, write); > + set_msr_interception_nosync(vcpu, msrpm, msr, read, write); > +} > + > static u32 *svm_vcpu_alloc_msrpm(void) > { > struct page *pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER); > @@ -639,6 +679,25 @@ static void svm_vcpu_free_msrpm(u32 *msrpm) > __free_pages(virt_to_page(msrpm), MSRPM_ALLOC_ORDER); > } > > +static void svm_msr_filter_changed(struct kvm_vcpu *vcpu) > +{ > + struct vcpu_svm *svm = to_svm(vcpu); > + u32 i; > + > + /* > + * Set intercept permissions for all direct access MSRs again. They > + * will automatically get filtered through the MSR filter, so we are > + * back in sync after this. > + */ > + for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { > + u32 msr = direct_access_msrs[i].index; > + u32 read = test_bit(i, svm->shadow_msr_intercept.read); > + u32 write = test_bit(i, svm->shadow_msr_intercept.write); > + > + set_msr_interception_nosync(vcpu, svm->msrpm, msr, read, write); > + } > +} > + > static void add_msr_offset(u32 offset) > { > int i; > @@ -4212,6 +4271,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { > .need_emulation_on_page_fault = svm_need_emulation_on_page_fault, > > .apic_init_signal_blocked = svm_apic_init_signal_blocked, > + > + .msr_filter_changed = svm_msr_filter_changed, > }; > > static struct kvm_x86_init_ops svm_init_ops __initdata = { > diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h > index 45496775f0db..07bec0d5aad4 100644 > --- a/arch/x86/kvm/svm/svm.h > +++ b/arch/x86/kvm/svm/svm.h > @@ -31,6 +31,7 @@ static const u32 host_save_user_msrs[] = { > > #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) > > +#define MAX_DIRECT_ACCESS_MSRS 15 > #define MSRPM_OFFSETS 16 > extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; > extern bool npt_enabled; > @@ -157,6 +158,12 @@ struct vcpu_svm { > */ > struct list_head ir_list; > spinlock_t ir_list_lock; > + > + /* Save desired MSR intercept (read: pass-through) state */ > + struct { > + DECLARE_BITMAP(read, MAX_DIRECT_ACCESS_MSRS); > + DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS); > + } shadow_msr_intercept; > }; > > struct svm_cpu_data { >
On 25/09/20 16:34, Alexander Graf wrote: > We will introduce the concept of MSRs that may not be handled in kernel > space soon. Some MSRs are directly passed through to the guest, effectively > making them handled by KVM from user space's point of view. > > This patch introduces all logic required to ensure that MSRs that > user space wants trapped are not marked as direct access for guests. > > Signed-off-by: Alexander Graf <graf@amazon.com> > > --- > > v7 -> v8: > > - s/KVM_MSR_ALLOW/KVM_MSR_FILTER/g > --- Ok, just some cosmetic fixes on top: diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index bb9f438e9e62..692110f2ac6f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -553,7 +553,7 @@ static int svm_cpu_init(int cpu) } -static int direct_access_msr_idx(u32 msr) +static int direct_access_msr_slot(u32 msr) { u32 i; @@ -561,33 +561,33 @@ static int direct_access_msr_idx(u32 msr) if (direct_access_msrs[i].index == msr) return i; - return -EINVAL; + return -ENOENT; } static void set_shadow_msr_intercept(struct kvm_vcpu *vcpu, u32 msr, int read, int write) { struct vcpu_svm *svm = to_svm(vcpu); - int idx = direct_access_msr_idx(msr); + int slot = direct_access_msr_slot(msr); - if (idx == -EINVAL) + if (slot == -ENOENT) return; /* Set the shadow bitmaps to the desired intercept states */ if (read) - set_bit(idx, svm->shadow_msr_intercept.read); + set_bit(slot, svm->shadow_msr_intercept.read); else - clear_bit(idx, svm->shadow_msr_intercept.read); + clear_bit(slot, svm->shadow_msr_intercept.read); if (write) - set_bit(idx, svm->shadow_msr_intercept.write); + set_bit(slot, svm->shadow_msr_intercept.write); else - clear_bit(idx, svm->shadow_msr_intercept.write); + clear_bit(slot, svm->shadow_msr_intercept.write); } static bool valid_msr_intercept(u32 index) { - return direct_access_msr_idx(index) != -EINVAL; + return direct_access_msr_slot(index) != -ENOENT; } static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) @@ -609,7 +609,7 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) return !!test_bit(bit_write, &tmp); } -static void set_msr_interception_nosync(struct kvm_vcpu *vcpu, u32 *msrpm, +static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, int read, int write) { u8 bit_read, bit_write; @@ -646,7 +646,7 @@ static void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, int read, int write) { set_shadow_msr_intercept(vcpu, msr, read, write); - set_msr_interception_nosync(vcpu, msrpm, msr, read, write); + set_msr_interception_bitmap(vcpu, msrpm, msr, read, write); } static u32 *svm_vcpu_alloc_msrpm(void) @@ -694,7 +694,7 @@ static void svm_msr_filter_changed(struct kvm_vcpu *vcpu) u32 read = test_bit(i, svm->shadow_msr_intercept.read); u32 write = test_bit(i, svm->shadow_msr_intercept.write); - set_msr_interception_nosync(vcpu, svm->msrpm, msr, read, write); + set_msr_interception_bitmap(vcpu, svm->msrpm, msr, read, write); } }
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 41aaee666751..45b0c180f42c 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -91,7 +91,7 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio); static const struct svm_direct_access_msrs { u32 index; /* Index of the MSR */ bool always; /* True if intercept is always on */ -} direct_access_msrs[] = { +} direct_access_msrs[MAX_DIRECT_ACCESS_MSRS] = { { .index = MSR_STAR, .always = true }, { .index = MSR_IA32_SYSENTER_CS, .always = true }, #ifdef CONFIG_X86_64 @@ -553,15 +553,41 @@ static int svm_cpu_init(int cpu) } -static bool valid_msr_intercept(u32 index) +static int direct_access_msr_idx(u32 msr) { - int i; + u32 i; for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) - if (direct_access_msrs[i].index == index) - return true; + if (direct_access_msrs[i].index == msr) + return i; - return false; + return -EINVAL; +} + +static void set_shadow_msr_intercept(struct kvm_vcpu *vcpu, u32 msr, int read, + int write) +{ + struct vcpu_svm *svm = to_svm(vcpu); + int idx = direct_access_msr_idx(msr); + + if (idx == -EINVAL) + return; + + /* Set the shadow bitmaps to the desired intercept states */ + if (read) + set_bit(idx, svm->shadow_msr_intercept.read); + else + clear_bit(idx, svm->shadow_msr_intercept.read); + + if (write) + set_bit(idx, svm->shadow_msr_intercept.write); + else + clear_bit(idx, svm->shadow_msr_intercept.write); +} + +static bool valid_msr_intercept(u32 index) +{ + return direct_access_msr_idx(index) != -EINVAL; } static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) @@ -583,8 +609,8 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) return !!test_bit(bit_write, &tmp); } -static void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, - int read, int write) +static void set_msr_interception_nosync(struct kvm_vcpu *vcpu, u32 *msrpm, + u32 msr, int read, int write) { u8 bit_read, bit_write; unsigned long tmp; @@ -596,6 +622,13 @@ static void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, */ WARN_ON(!valid_msr_intercept(msr)); + /* Enforce non allowed MSRs to trap */ + if (read && !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ)) + read = 0; + + if (write && !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE)) + write = 0; + offset = svm_msrpm_offset(msr); bit_read = 2 * (msr & 0x0f); bit_write = 2 * (msr & 0x0f) + 1; @@ -609,6 +642,13 @@ static void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, msrpm[offset] = tmp; } +static void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, + int read, int write) +{ + set_shadow_msr_intercept(vcpu, msr, read, write); + set_msr_interception_nosync(vcpu, msrpm, msr, read, write); +} + static u32 *svm_vcpu_alloc_msrpm(void) { struct page *pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER); @@ -639,6 +679,25 @@ static void svm_vcpu_free_msrpm(u32 *msrpm) __free_pages(virt_to_page(msrpm), MSRPM_ALLOC_ORDER); } +static void svm_msr_filter_changed(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + u32 i; + + /* + * Set intercept permissions for all direct access MSRs again. They + * will automatically get filtered through the MSR filter, so we are + * back in sync after this. + */ + for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { + u32 msr = direct_access_msrs[i].index; + u32 read = test_bit(i, svm->shadow_msr_intercept.read); + u32 write = test_bit(i, svm->shadow_msr_intercept.write); + + set_msr_interception_nosync(vcpu, svm->msrpm, msr, read, write); + } +} + static void add_msr_offset(u32 offset) { int i; @@ -4212,6 +4271,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .need_emulation_on_page_fault = svm_need_emulation_on_page_fault, .apic_init_signal_blocked = svm_apic_init_signal_blocked, + + .msr_filter_changed = svm_msr_filter_changed, }; static struct kvm_x86_init_ops svm_init_ops __initdata = { diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 45496775f0db..07bec0d5aad4 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -31,6 +31,7 @@ static const u32 host_save_user_msrs[] = { #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) +#define MAX_DIRECT_ACCESS_MSRS 15 #define MSRPM_OFFSETS 16 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; extern bool npt_enabled; @@ -157,6 +158,12 @@ struct vcpu_svm { */ struct list_head ir_list; spinlock_t ir_list_lock; + + /* Save desired MSR intercept (read: pass-through) state */ + struct { + DECLARE_BITMAP(read, MAX_DIRECT_ACCESS_MSRS); + DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS); + } shadow_msr_intercept; }; struct svm_cpu_data {
We will introduce the concept of MSRs that may not be handled in kernel space soon. Some MSRs are directly passed through to the guest, effectively making them handled by KVM from user space's point of view. This patch introduces all logic required to ensure that MSRs that user space wants trapped are not marked as direct access for guests. Signed-off-by: Alexander Graf <graf@amazon.com> --- v7 -> v8: - s/KVM_MSR_ALLOW/KVM_MSR_FILTER/g --- arch/x86/kvm/svm/svm.c | 77 +++++++++++++++++++++++++++++++++++++----- arch/x86/kvm/svm/svm.h | 7 ++++ 2 files changed, 76 insertions(+), 8 deletions(-)