diff mbox series

[4/6] KVM: x86: Ensure the MSR bitmap never clears userspace tracked MSRs

Message ID 20200804042043.3592620-5-aaronlewis@google.com (mailing list archive)
State New, archived
Headers show
Series Allow userspace to manage MSRs | expand

Commit Message

Aaron Lewis Aug. 4, 2020, 4:20 a.m. UTC
SDM volume 3: 24.6.9 "MSR-Bitmap Address" and APM volume 2: 15.11 "MS
intercepts" describe MSR permission bitmaps.  Permission bitmaps are
used to control whether an execution of rdmsr or wrmsr will cause a
vm exit.  For userspace tracked MSRs it is required they cause a vm
exit, so the host is able to forward the MSR to userspace.  This change
adds vmx/svm support to ensure the permission bitmap is properly set to
cause a vm_exit to the host when rdmsr or wrmsr is used by one of the
userspace tracked MSRs.  Also, to avoid repeatedly setting them,
kvm_make_request() is used to coalesce these into a single call.

Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Reviewed-by: Oliver Upton <oupton@google.com>
---
 arch/x86/include/asm/kvm_host.h |  3 ++
 arch/x86/kvm/svm/svm.c          | 49 ++++++++++++++++++++++++++-------
 arch/x86/kvm/vmx/vmx.c          | 13 ++++++++-
 arch/x86/kvm/x86.c              | 16 +++++++++++
 4 files changed, 70 insertions(+), 11 deletions(-)

Comments

Alexander Graf Aug. 7, 2020, 11:03 p.m. UTC | #1
On 04.08.20 06:20, Aaron Lewis wrote:
> 
> SDM volume 3: 24.6.9 "MSR-Bitmap Address" and APM volume 2: 15.11 "MS
> intercepts" describe MSR permission bitmaps.  Permission bitmaps are
> used to control whether an execution of rdmsr or wrmsr will cause a
> vm exit.  For userspace tracked MSRs it is required they cause a vm
> exit, so the host is able to forward the MSR to userspace.  This change
> adds vmx/svm support to ensure the permission bitmap is properly set to
> cause a vm_exit to the host when rdmsr or wrmsr is used by one of the
> userspace tracked MSRs.  Also, to avoid repeatedly setting them,
> kvm_make_request() is used to coalesce these into a single call.

I might have some fundamental misunderstanding here:

1) You force that the list of trapped MSRs is set before vCPU creation, 
so at the point of vCPU creation, you know already which MSRs should 
trap to user space

2) MSR intercept bitmaps are (AFAIK) all set to "trap by default". That 
means any MSR that we want the guest to get direct access to needs 
explicit opt-in through bitmap modifications.

That means if you simply check whether an MSR is supposed to trap to 
user space in the bitmap set path, you don't need any of the complicated 
logic below, no?


Alex

> 
> Signed-off-by: Aaron Lewis <aaronlewis@google.com>
> Reviewed-by: Oliver Upton <oupton@google.com>
> ---
>   arch/x86/include/asm/kvm_host.h |  3 ++
>   arch/x86/kvm/svm/svm.c          | 49 ++++++++++++++++++++++++++-------
>   arch/x86/kvm/vmx/vmx.c          | 13 ++++++++-
>   arch/x86/kvm/x86.c              | 16 +++++++++++
>   4 files changed, 70 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 510055471dd0..07a85f5f0b8a 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -87,6 +87,7 @@
>   #define KVM_REQ_HV_TLB_FLUSH \
>          KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
>   #define KVM_REQ_APF_READY              KVM_ARCH_REQ(28)
> +#define KVM_REQ_USER_MSR_UPDATE KVM_ARCH_REQ(29)
> 
>   #define CR0_RESERVED_BITS                                               \
>          (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
> @@ -1271,6 +1272,8 @@ struct kvm_x86_ops {
>          int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
> 
>          void (*migrate_timers)(struct kvm_vcpu *vcpu);
> +
> +       void (*set_user_msr_intercept)(struct kvm_vcpu *vcpu, u32 msr);
>   };
> 
>   struct kvm_x86_nested_ops {
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index eb673b59f7b7..c560d283b2af 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -583,13 +583,27 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
>          return !!test_bit(bit_write,  &tmp);
>   }
> 
> +static void __set_msr_interception(u32 *msrpm, u32 msr, int read, int write,
> +                                  u32 offset)
> +{
> +       u8 bit_read, bit_write;
> +       unsigned long tmp;
> +
> +       bit_read  = 2 * (msr & 0x0f);
> +       bit_write = 2 * (msr & 0x0f) + 1;
> +       tmp       = msrpm[offset];
> +
> +       read  ? clear_bit(bit_read,  &tmp) : set_bit(bit_read,  &tmp);
> +       write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
> +
> +       msrpm[offset] = tmp;
> +}
> +
>   static void set_msr_interception(struct kvm_vcpu *vcpu, u32 msr, int read,
>                                   int write)
>   {
>          struct vcpu_svm *svm = to_svm(vcpu);
>          u32 *msrpm = svm->msrpm;
> -       u8 bit_read, bit_write;
> -       unsigned long tmp;
>          u32 offset;
> 
>          /*
> @@ -598,17 +612,30 @@ static void set_msr_interception(struct kvm_vcpu *vcpu, u32 msr, int read,
>           */
>          WARN_ON(!valid_msr_intercept(msr));
> 
> -       offset    = svm_msrpm_offset(msr);
> -       bit_read  = 2 * (msr & 0x0f);
> -       bit_write = 2 * (msr & 0x0f) + 1;
> -       tmp       = msrpm[offset];
> -
> +       offset = svm_msrpm_offset(msr);
>          BUG_ON(offset == MSR_INVALID);
> 
> -       read  ? clear_bit(bit_read,  &tmp) : set_bit(bit_read,  &tmp);
> -       write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
> +       __set_msr_interception(msrpm, msr, read, write, offset);
> 
> -       msrpm[offset] = tmp;
> +       if (read || write)
> +               kvm_make_request(KVM_REQ_USER_MSR_UPDATE, vcpu);
> +}
> +
> +static void set_user_msr_interception(struct kvm_vcpu *vcpu, u32 msr, int read,
> +                                     int write)
> +{
> +       struct vcpu_svm *svm = to_svm(vcpu);
> +       u32 *msrpm = svm->msrpm;
> +       u32 offset;
> +
> +       offset = svm_msrpm_offset(msr);
> +       if (offset != MSR_INVALID)
> +               __set_msr_interception(msrpm, msr, read, write, offset);
> +}
> +
> +void svm_set_user_msr_intercept(struct kvm_vcpu *vcpu, u32 msr)
> +{
> +       set_user_msr_interception(vcpu, msr, 0, 0);
>   }
> 
>   static void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm)
> @@ -4088,6 +4115,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
>          .need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
> 
>          .apic_init_signal_blocked = svm_apic_init_signal_blocked,
> +
> +       .set_user_msr_intercept = svm_set_user_msr_intercept,
>   };
> 
>   static struct kvm_x86_init_ops svm_init_ops __initdata = {
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 1313e47a5a1e..3d3d9eaeca64 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -3728,6 +3728,10 @@ static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu,
>                          __clear_bit(msr, msr_bitmap + 0xc00 / f);
> 
>          }
> +
> +       if (type & MSR_TYPE_R || type & MSR_TYPE_W) {
> +               kvm_make_request(KVM_REQ_USER_MSR_UPDATE, vcpu);
> +       }
>   }
> 
>   static __always_inline void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu,
> @@ -3795,7 +3799,7 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
>   }
> 
>   static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu,
> -                                        unsigned long *msr_bitmap, u8 mode)
> +                                       unsigned long *msr_bitmap, u8 mode)
>   {
>          int msr;
> 
> @@ -3819,6 +3823,11 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu,
>          }
>   }
> 
> +void vmx_set_user_msr_intercept(struct kvm_vcpu *vcpu, u32 msr)
> +{
> +       vmx_enable_intercept_for_msr(vcpu, msr, MSR_TYPE_RW);
> +}
> +
>   void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
>   {
>          struct vcpu_vmx *vmx = to_vmx(vcpu);
> @@ -7965,6 +7974,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
>          .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
>          .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
>          .migrate_timers = vmx_migrate_timers,
> +
> +       .set_user_msr_intercept = vmx_set_user_msr_intercept,
>   };
> 
>   static __init int hardware_setup(void)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 47619b49818a..45bf59f94d34 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3537,6 +3537,19 @@ bool kvm_msr_user_exit(struct kvm *kvm, u32 index)
>   }
>   EXPORT_SYMBOL_GPL(kvm_msr_user_exit);
> 
> +static void kvm_set_user_msr_intercepts(struct kvm_vcpu *vcpu)
> +{
> +       struct kvm_msr_list *msr_list = vcpu->kvm->arch.user_exit_msrs;
> +       u32 i, msr;
> +
> +       if (msr_list) {
> +               for (i = 0; i < msr_list->nmsrs; i++) {
> +                       msr = msr_list->indices[i];
> +                       kvm_x86_ops.set_user_msr_intercept(vcpu, msr);
> +               }
> +       }
> +}
> +
>   int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>   {
>          int r = 0;
> @@ -8553,6 +8566,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>                          kvm_vcpu_update_apicv(vcpu);
>                  if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
>                          kvm_check_async_pf_completion(vcpu);
> +
> +               if (kvm_check_request(KVM_REQ_USER_MSR_UPDATE, vcpu))
> +                       kvm_set_user_msr_intercepts(vcpu);
>          }
> 
>          if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> --
> 2.28.0.163.g6104cc2f0b6-goog
> 



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
Aaron Lewis Aug. 18, 2020, 9:34 p.m. UTC | #2
On Fri, Aug 7, 2020 at 4:03 PM Alexander Graf <graf@amazon.com> wrote:
>
>
>
> On 04.08.20 06:20, Aaron Lewis wrote:
> >
> > SDM volume 3: 24.6.9 "MSR-Bitmap Address" and APM volume 2: 15.11 "MS
> > intercepts" describe MSR permission bitmaps.  Permission bitmaps are
> > used to control whether an execution of rdmsr or wrmsr will cause a
> > vm exit.  For userspace tracked MSRs it is required they cause a vm
> > exit, so the host is able to forward the MSR to userspace.  This change
> > adds vmx/svm support to ensure the permission bitmap is properly set to
> > cause a vm_exit to the host when rdmsr or wrmsr is used by one of the
> > userspace tracked MSRs.  Also, to avoid repeatedly setting them,
> > kvm_make_request() is used to coalesce these into a single call.
>
> I might have some fundamental misunderstanding here:
>
> 1) You force that the list of trapped MSRs is set before vCPU creation,
> so at the point of vCPU creation, you know already which MSRs should
> trap to user space
>
> 2) MSR intercept bitmaps are (AFAIK) all set to "trap by default". That
> means any MSR that we want the guest to get direct access to needs
> explicit opt-in through bitmap modifications.
>
> That means if you simply check whether an MSR is supposed to trap to
> user space in the bitmap set path, you don't need any of the complicated
> logic below, no?
>
>
> Alex
>

Yes, I think that should work as well.  However, calling it after the
fact like we do does have a nice advantage of allowing us to coalesce
the calls and limit the number of times we need to search the list.

> >
> > Signed-off-by: Aaron Lewis <aaronlewis@google.com>
> > Reviewed-by: Oliver Upton <oupton@google.com>
> > ---
> >   arch/x86/include/asm/kvm_host.h |  3 ++
> >   arch/x86/kvm/svm/svm.c          | 49 ++++++++++++++++++++++++++-------
> >   arch/x86/kvm/vmx/vmx.c          | 13 ++++++++-
> >   arch/x86/kvm/x86.c              | 16 +++++++++++
> >   4 files changed, 70 insertions(+), 11 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 510055471dd0..07a85f5f0b8a 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -87,6 +87,7 @@
> >   #define KVM_REQ_HV_TLB_FLUSH \
> >          KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
> >   #define KVM_REQ_APF_READY              KVM_ARCH_REQ(28)
> > +#define KVM_REQ_USER_MSR_UPDATE KVM_ARCH_REQ(29)
> >
> >   #define CR0_RESERVED_BITS                                               \
> >          (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
> > @@ -1271,6 +1272,8 @@ struct kvm_x86_ops {
> >          int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
> >
> >          void (*migrate_timers)(struct kvm_vcpu *vcpu);
> > +
> > +       void (*set_user_msr_intercept)(struct kvm_vcpu *vcpu, u32 msr);
> >   };
> >
> >   struct kvm_x86_nested_ops {
> > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> > index eb673b59f7b7..c560d283b2af 100644
> > --- a/arch/x86/kvm/svm/svm.c
> > +++ b/arch/x86/kvm/svm/svm.c
> > @@ -583,13 +583,27 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
> >          return !!test_bit(bit_write,  &tmp);
> >   }
> >
> > +static void __set_msr_interception(u32 *msrpm, u32 msr, int read, int write,
> > +                                  u32 offset)
> > +{
> > +       u8 bit_read, bit_write;
> > +       unsigned long tmp;
> > +
> > +       bit_read  = 2 * (msr & 0x0f);
> > +       bit_write = 2 * (msr & 0x0f) + 1;
> > +       tmp       = msrpm[offset];
> > +
> > +       read  ? clear_bit(bit_read,  &tmp) : set_bit(bit_read,  &tmp);
> > +       write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
> > +
> > +       msrpm[offset] = tmp;
> > +}
> > +
> >   static void set_msr_interception(struct kvm_vcpu *vcpu, u32 msr, int read,
> >                                   int write)
> >   {
> >          struct vcpu_svm *svm = to_svm(vcpu);
> >          u32 *msrpm = svm->msrpm;
> > -       u8 bit_read, bit_write;
> > -       unsigned long tmp;
> >          u32 offset;
> >
> >          /*
> > @@ -598,17 +612,30 @@ static void set_msr_interception(struct kvm_vcpu *vcpu, u32 msr, int read,
> >           */
> >          WARN_ON(!valid_msr_intercept(msr));
> >
> > -       offset    = svm_msrpm_offset(msr);
> > -       bit_read  = 2 * (msr & 0x0f);
> > -       bit_write = 2 * (msr & 0x0f) + 1;
> > -       tmp       = msrpm[offset];
> > -
> > +       offset = svm_msrpm_offset(msr);
> >          BUG_ON(offset == MSR_INVALID);
> >
> > -       read  ? clear_bit(bit_read,  &tmp) : set_bit(bit_read,  &tmp);
> > -       write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
> > +       __set_msr_interception(msrpm, msr, read, write, offset);
> >
> > -       msrpm[offset] = tmp;
> > +       if (read || write)
> > +               kvm_make_request(KVM_REQ_USER_MSR_UPDATE, vcpu);
> > +}
> > +
> > +static void set_user_msr_interception(struct kvm_vcpu *vcpu, u32 msr, int read,
> > +                                     int write)
> > +{
> > +       struct vcpu_svm *svm = to_svm(vcpu);
> > +       u32 *msrpm = svm->msrpm;
> > +       u32 offset;
> > +
> > +       offset = svm_msrpm_offset(msr);
> > +       if (offset != MSR_INVALID)
> > +               __set_msr_interception(msrpm, msr, read, write, offset);
> > +}
> > +
> > +void svm_set_user_msr_intercept(struct kvm_vcpu *vcpu, u32 msr)
> > +{
> > +       set_user_msr_interception(vcpu, msr, 0, 0);
> >   }
> >
> >   static void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm)
> > @@ -4088,6 +4115,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
> >          .need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
> >
> >          .apic_init_signal_blocked = svm_apic_init_signal_blocked,
> > +
> > +       .set_user_msr_intercept = svm_set_user_msr_intercept,
> >   };
> >
> >   static struct kvm_x86_init_ops svm_init_ops __initdata = {
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index 1313e47a5a1e..3d3d9eaeca64 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -3728,6 +3728,10 @@ static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu,
> >                          __clear_bit(msr, msr_bitmap + 0xc00 / f);
> >
> >          }
> > +
> > +       if (type & MSR_TYPE_R || type & MSR_TYPE_W) {
> > +               kvm_make_request(KVM_REQ_USER_MSR_UPDATE, vcpu);
> > +       }
> >   }
> >
> >   static __always_inline void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu,
> > @@ -3795,7 +3799,7 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
> >   }
> >
> >   static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu,
> > -                                        unsigned long *msr_bitmap, u8 mode)
> > +                                       unsigned long *msr_bitmap, u8 mode)
> >   {
> >          int msr;
> >
> > @@ -3819,6 +3823,11 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu,
> >          }
> >   }
> >
> > +void vmx_set_user_msr_intercept(struct kvm_vcpu *vcpu, u32 msr)
> > +{
> > +       vmx_enable_intercept_for_msr(vcpu, msr, MSR_TYPE_RW);
> > +}
> > +
> >   void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
> >   {
> >          struct vcpu_vmx *vmx = to_vmx(vcpu);
> > @@ -7965,6 +7974,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
> >          .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
> >          .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
> >          .migrate_timers = vmx_migrate_timers,
> > +
> > +       .set_user_msr_intercept = vmx_set_user_msr_intercept,
> >   };
> >
> >   static __init int hardware_setup(void)
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 47619b49818a..45bf59f94d34 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -3537,6 +3537,19 @@ bool kvm_msr_user_exit(struct kvm *kvm, u32 index)
> >   }
> >   EXPORT_SYMBOL_GPL(kvm_msr_user_exit);
> >
> > +static void kvm_set_user_msr_intercepts(struct kvm_vcpu *vcpu)
> > +{
> > +       struct kvm_msr_list *msr_list = vcpu->kvm->arch.user_exit_msrs;
> > +       u32 i, msr;
> > +
> > +       if (msr_list) {
> > +               for (i = 0; i < msr_list->nmsrs; i++) {
> > +                       msr = msr_list->indices[i];
> > +                       kvm_x86_ops.set_user_msr_intercept(vcpu, msr);
> > +               }
> > +       }
> > +}
> > +
> >   int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> >   {
> >          int r = 0;
> > @@ -8553,6 +8566,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >                          kvm_vcpu_update_apicv(vcpu);
> >                  if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
> >                          kvm_check_async_pf_completion(vcpu);
> > +
> > +               if (kvm_check_request(KVM_REQ_USER_MSR_UPDATE, vcpu))
> > +                       kvm_set_user_msr_intercepts(vcpu);
> >          }
> >
> >          if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> > --
> > 2.28.0.163.g6104cc2f0b6-goog
> >
>
>
>
> Amazon Development Center Germany GmbH
> Krausenstr. 38
> 10117 Berlin
> Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
> Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
> Sitz: Berlin
> Ust-ID: DE 289 237 879
>
>
Jim Mattson Aug. 18, 2020, 9:57 p.m. UTC | #3
On Tue, Aug 18, 2020 at 2:34 PM Aaron Lewis <aaronlewis@google.com> wrote:
>
> On Fri, Aug 7, 2020 at 4:03 PM Alexander Graf <graf@amazon.com> wrote:
> >
> >
> >
> > On 04.08.20 06:20, Aaron Lewis wrote:
> > >
> > > SDM volume 3: 24.6.9 "MSR-Bitmap Address" and APM volume 2: 15.11 "MS
> > > intercepts" describe MSR permission bitmaps.  Permission bitmaps are
> > > used to control whether an execution of rdmsr or wrmsr will cause a
> > > vm exit.  For userspace tracked MSRs it is required they cause a vm
> > > exit, so the host is able to forward the MSR to userspace.  This change
> > > adds vmx/svm support to ensure the permission bitmap is properly set to
> > > cause a vm_exit to the host when rdmsr or wrmsr is used by one of the
> > > userspace tracked MSRs.  Also, to avoid repeatedly setting them,
> > > kvm_make_request() is used to coalesce these into a single call.
> >
> > I might have some fundamental misunderstanding here:
> >
> > 1) You force that the list of trapped MSRs is set before vCPU creation,
> > so at the point of vCPU creation, you know already which MSRs should
> > trap to user space
> >
> > 2) MSR intercept bitmaps are (AFAIK) all set to "trap by default". That
> > means any MSR that we want the guest to get direct access to needs
> > explicit opt-in through bitmap modifications.
> >
> > That means if you simply check whether an MSR is supposed to trap to
> > user space in the bitmap set path, you don't need any of the complicated
> > logic below, no?
> >
> >
> > Alex
> >
>
> Yes, I think that should work as well.  However, calling it after the
> fact like we do does have a nice advantage of allowing us to coalesce
> the calls and limit the number of times we need to search the list.

Also, if we do decide to let the userspace MSR list change after vCPUs
are created (and it's a VM-wide ioctl), it will be nice to just send
the request to all vCPUs to update their MSR permission bitmaps before
the next VM-entry.

> > >
> > > Signed-off-by: Aaron Lewis <aaronlewis@google.com>
> > > Reviewed-by: Oliver Upton <oupton@google.com>
> > > ---
> > >   arch/x86/include/asm/kvm_host.h |  3 ++
> > >   arch/x86/kvm/svm/svm.c          | 49 ++++++++++++++++++++++++++-------
> > >   arch/x86/kvm/vmx/vmx.c          | 13 ++++++++-
> > >   arch/x86/kvm/x86.c              | 16 +++++++++++
> > >   4 files changed, 70 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > > index 510055471dd0..07a85f5f0b8a 100644
> > > --- a/arch/x86/include/asm/kvm_host.h
> > > +++ b/arch/x86/include/asm/kvm_host.h
> > > @@ -87,6 +87,7 @@
> > >   #define KVM_REQ_HV_TLB_FLUSH \
> > >          KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
> > >   #define KVM_REQ_APF_READY              KVM_ARCH_REQ(28)
> > > +#define KVM_REQ_USER_MSR_UPDATE KVM_ARCH_REQ(29)
> > >
> > >   #define CR0_RESERVED_BITS                                               \
> > >          (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
> > > @@ -1271,6 +1272,8 @@ struct kvm_x86_ops {
> > >          int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
> > >
> > >          void (*migrate_timers)(struct kvm_vcpu *vcpu);
> > > +
> > > +       void (*set_user_msr_intercept)(struct kvm_vcpu *vcpu, u32 msr);
> > >   };
> > >
> > >   struct kvm_x86_nested_ops {
> > > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> > > index eb673b59f7b7..c560d283b2af 100644
> > > --- a/arch/x86/kvm/svm/svm.c
> > > +++ b/arch/x86/kvm/svm/svm.c
> > > @@ -583,13 +583,27 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
> > >          return !!test_bit(bit_write,  &tmp);
> > >   }
> > >
> > > +static void __set_msr_interception(u32 *msrpm, u32 msr, int read, int write,
> > > +                                  u32 offset)
> > > +{
> > > +       u8 bit_read, bit_write;
> > > +       unsigned long tmp;
> > > +
> > > +       bit_read  = 2 * (msr & 0x0f);
> > > +       bit_write = 2 * (msr & 0x0f) + 1;
> > > +       tmp       = msrpm[offset];
> > > +
> > > +       read  ? clear_bit(bit_read,  &tmp) : set_bit(bit_read,  &tmp);
> > > +       write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
> > > +
> > > +       msrpm[offset] = tmp;
> > > +}
> > > +
> > >   static void set_msr_interception(struct kvm_vcpu *vcpu, u32 msr, int read,
> > >                                   int write)
> > >   {
> > >          struct vcpu_svm *svm = to_svm(vcpu);
> > >          u32 *msrpm = svm->msrpm;
> > > -       u8 bit_read, bit_write;
> > > -       unsigned long tmp;
> > >          u32 offset;
> > >
> > >          /*
> > > @@ -598,17 +612,30 @@ static void set_msr_interception(struct kvm_vcpu *vcpu, u32 msr, int read,
> > >           */
> > >          WARN_ON(!valid_msr_intercept(msr));
> > >
> > > -       offset    = svm_msrpm_offset(msr);
> > > -       bit_read  = 2 * (msr & 0x0f);
> > > -       bit_write = 2 * (msr & 0x0f) + 1;
> > > -       tmp       = msrpm[offset];
> > > -
> > > +       offset = svm_msrpm_offset(msr);
> > >          BUG_ON(offset == MSR_INVALID);
> > >
> > > -       read  ? clear_bit(bit_read,  &tmp) : set_bit(bit_read,  &tmp);
> > > -       write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
> > > +       __set_msr_interception(msrpm, msr, read, write, offset);
> > >
> > > -       msrpm[offset] = tmp;
> > > +       if (read || write)
> > > +               kvm_make_request(KVM_REQ_USER_MSR_UPDATE, vcpu);
> > > +}
> > > +
> > > +static void set_user_msr_interception(struct kvm_vcpu *vcpu, u32 msr, int read,
> > > +                                     int write)
> > > +{
> > > +       struct vcpu_svm *svm = to_svm(vcpu);
> > > +       u32 *msrpm = svm->msrpm;
> > > +       u32 offset;
> > > +
> > > +       offset = svm_msrpm_offset(msr);
> > > +       if (offset != MSR_INVALID)
> > > +               __set_msr_interception(msrpm, msr, read, write, offset);
> > > +}
> > > +
> > > +void svm_set_user_msr_intercept(struct kvm_vcpu *vcpu, u32 msr)
> > > +{
> > > +       set_user_msr_interception(vcpu, msr, 0, 0);
> > >   }
> > >
> > >   static void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm)
> > > @@ -4088,6 +4115,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
> > >          .need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
> > >
> > >          .apic_init_signal_blocked = svm_apic_init_signal_blocked,
> > > +
> > > +       .set_user_msr_intercept = svm_set_user_msr_intercept,
> > >   };
> > >
> > >   static struct kvm_x86_init_ops svm_init_ops __initdata = {
> > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > > index 1313e47a5a1e..3d3d9eaeca64 100644
> > > --- a/arch/x86/kvm/vmx/vmx.c
> > > +++ b/arch/x86/kvm/vmx/vmx.c
> > > @@ -3728,6 +3728,10 @@ static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu,
> > >                          __clear_bit(msr, msr_bitmap + 0xc00 / f);
> > >
> > >          }
> > > +
> > > +       if (type & MSR_TYPE_R || type & MSR_TYPE_W) {
> > > +               kvm_make_request(KVM_REQ_USER_MSR_UPDATE, vcpu);
> > > +       }
> > >   }
> > >
> > >   static __always_inline void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu,
> > > @@ -3795,7 +3799,7 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
> > >   }
> > >
> > >   static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu,
> > > -                                        unsigned long *msr_bitmap, u8 mode)
> > > +                                       unsigned long *msr_bitmap, u8 mode)
> > >   {
> > >          int msr;
> > >
> > > @@ -3819,6 +3823,11 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu,
> > >          }
> > >   }
> > >
> > > +void vmx_set_user_msr_intercept(struct kvm_vcpu *vcpu, u32 msr)
> > > +{
> > > +       vmx_enable_intercept_for_msr(vcpu, msr, MSR_TYPE_RW);
> > > +}
> > > +
> > >   void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
> > >   {
> > >          struct vcpu_vmx *vmx = to_vmx(vcpu);
> > > @@ -7965,6 +7974,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
> > >          .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
> > >          .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
> > >          .migrate_timers = vmx_migrate_timers,
> > > +
> > > +       .set_user_msr_intercept = vmx_set_user_msr_intercept,
> > >   };
> > >
> > >   static __init int hardware_setup(void)
> > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > index 47619b49818a..45bf59f94d34 100644
> > > --- a/arch/x86/kvm/x86.c
> > > +++ b/arch/x86/kvm/x86.c
> > > @@ -3537,6 +3537,19 @@ bool kvm_msr_user_exit(struct kvm *kvm, u32 index)
> > >   }
> > >   EXPORT_SYMBOL_GPL(kvm_msr_user_exit);
> > >
> > > +static void kvm_set_user_msr_intercepts(struct kvm_vcpu *vcpu)
> > > +{
> > > +       struct kvm_msr_list *msr_list = vcpu->kvm->arch.user_exit_msrs;
> > > +       u32 i, msr;
> > > +
> > > +       if (msr_list) {
> > > +               for (i = 0; i < msr_list->nmsrs; i++) {
> > > +                       msr = msr_list->indices[i];
> > > +                       kvm_x86_ops.set_user_msr_intercept(vcpu, msr);
> > > +               }
> > > +       }
> > > +}
> > > +
> > >   int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> > >   {
> > >          int r = 0;
> > > @@ -8553,6 +8566,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> > >                          kvm_vcpu_update_apicv(vcpu);
> > >                  if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
> > >                          kvm_check_async_pf_completion(vcpu);
> > > +
> > > +               if (kvm_check_request(KVM_REQ_USER_MSR_UPDATE, vcpu))
> > > +                       kvm_set_user_msr_intercepts(vcpu);
> > >          }
> > >
> > >          if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> > > --
> > > 2.28.0.163.g6104cc2f0b6-goog
> > >
> >
> >
> >
> > Amazon Development Center Germany GmbH
> > Krausenstr. 38
> > 10117 Berlin
> > Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
> > Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
> > Sitz: Berlin
> > Ust-ID: DE 289 237 879
> >
> >
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 510055471dd0..07a85f5f0b8a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -87,6 +87,7 @@ 
 #define KVM_REQ_HV_TLB_FLUSH \
 	KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_APF_READY		KVM_ARCH_REQ(28)
+#define KVM_REQ_USER_MSR_UPDATE KVM_ARCH_REQ(29)
 
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -1271,6 +1272,8 @@  struct kvm_x86_ops {
 	int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
 
 	void (*migrate_timers)(struct kvm_vcpu *vcpu);
+
+	void (*set_user_msr_intercept)(struct kvm_vcpu *vcpu, u32 msr);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index eb673b59f7b7..c560d283b2af 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -583,13 +583,27 @@  static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
 	return !!test_bit(bit_write,  &tmp);
 }
 
+static void __set_msr_interception(u32 *msrpm, u32 msr, int read, int write,
+				   u32 offset)
+{
+	u8 bit_read, bit_write;
+	unsigned long tmp;
+
+	bit_read  = 2 * (msr & 0x0f);
+	bit_write = 2 * (msr & 0x0f) + 1;
+	tmp       = msrpm[offset];
+
+	read  ? clear_bit(bit_read,  &tmp) : set_bit(bit_read,  &tmp);
+	write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
+
+	msrpm[offset] = tmp;
+}
+
 static void set_msr_interception(struct kvm_vcpu *vcpu, u32 msr, int read,
 				 int write)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	u32 *msrpm = svm->msrpm;
-	u8 bit_read, bit_write;
-	unsigned long tmp;
 	u32 offset;
 
 	/*
@@ -598,17 +612,30 @@  static void set_msr_interception(struct kvm_vcpu *vcpu, u32 msr, int read,
 	 */
 	WARN_ON(!valid_msr_intercept(msr));
 
-	offset    = svm_msrpm_offset(msr);
-	bit_read  = 2 * (msr & 0x0f);
-	bit_write = 2 * (msr & 0x0f) + 1;
-	tmp       = msrpm[offset];
-
+	offset = svm_msrpm_offset(msr);
 	BUG_ON(offset == MSR_INVALID);
 
-	read  ? clear_bit(bit_read,  &tmp) : set_bit(bit_read,  &tmp);
-	write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
+	__set_msr_interception(msrpm, msr, read, write, offset);
 
-	msrpm[offset] = tmp;
+	if (read || write)
+		kvm_make_request(KVM_REQ_USER_MSR_UPDATE, vcpu);
+}
+
+static void set_user_msr_interception(struct kvm_vcpu *vcpu, u32 msr, int read,
+				      int write)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	u32 *msrpm = svm->msrpm;
+	u32 offset;
+
+	offset = svm_msrpm_offset(msr);
+	if (offset != MSR_INVALID)
+		__set_msr_interception(msrpm, msr, read, write, offset);
+}
+
+void svm_set_user_msr_intercept(struct kvm_vcpu *vcpu, u32 msr)
+{
+	set_user_msr_interception(vcpu, msr, 0, 0);
 }
 
 static void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm)
@@ -4088,6 +4115,8 @@  static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
 
 	.apic_init_signal_blocked = svm_apic_init_signal_blocked,
+
+	.set_user_msr_intercept = svm_set_user_msr_intercept,
 };
 
 static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1313e47a5a1e..3d3d9eaeca64 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3728,6 +3728,10 @@  static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu,
 			__clear_bit(msr, msr_bitmap + 0xc00 / f);
 
 	}
+
+	if (type & MSR_TYPE_R || type & MSR_TYPE_W) {
+		kvm_make_request(KVM_REQ_USER_MSR_UPDATE, vcpu);
+	}
 }
 
 static __always_inline void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu,
@@ -3795,7 +3799,7 @@  static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
 }
 
 static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu,
-					 unsigned long *msr_bitmap, u8 mode)
+					unsigned long *msr_bitmap, u8 mode)
 {
 	int msr;
 
@@ -3819,6 +3823,11 @@  static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu,
 	}
 }
 
+void vmx_set_user_msr_intercept(struct kvm_vcpu *vcpu, u32 msr)
+{
+	vmx_enable_intercept_for_msr(vcpu, msr, MSR_TYPE_RW);
+}
+
 void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -7965,6 +7974,8 @@  static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
 	.apic_init_signal_blocked = vmx_apic_init_signal_blocked,
 	.migrate_timers = vmx_migrate_timers,
+
+	.set_user_msr_intercept = vmx_set_user_msr_intercept,
 };
 
 static __init int hardware_setup(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 47619b49818a..45bf59f94d34 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3537,6 +3537,19 @@  bool kvm_msr_user_exit(struct kvm *kvm, u32 index)
 }
 EXPORT_SYMBOL_GPL(kvm_msr_user_exit);
 
+static void kvm_set_user_msr_intercepts(struct kvm_vcpu *vcpu)
+{
+	struct kvm_msr_list *msr_list = vcpu->kvm->arch.user_exit_msrs;
+	u32 i, msr;
+
+	if (msr_list) {
+		for (i = 0; i < msr_list->nmsrs; i++) {
+			msr = msr_list->indices[i];
+			kvm_x86_ops.set_user_msr_intercept(vcpu, msr);
+		}
+	}
+}
+
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r = 0;
@@ -8553,6 +8566,9 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_vcpu_update_apicv(vcpu);
 		if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
 			kvm_check_async_pf_completion(vcpu);
+
+		if (kvm_check_request(KVM_REQ_USER_MSR_UPDATE, vcpu))
+			kvm_set_user_msr_intercepts(vcpu);
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {