diff mbox series

[v3,05/12] KVM: x86: Add support for exiting to userspace on rdmsr or wrmsr

Message ID 20200818211533.849501-6-aaronlewis@google.com (mailing list archive)
State New, archived
Headers show
Series Allow userspace to manage MSRs | expand

Commit Message

Aaron Lewis Aug. 18, 2020, 9:15 p.m. UTC
Add support for exiting to userspace on a rdmsr or wrmsr instruction if
the MSR being read from or written to is in the user_exit_msrs list.

Signed-off-by: Aaron Lewis <aaronlewis@google.com>
---

v2 -> v3

  - Refactored commit based on Alexander Graf's changes in the first commit
    in this series.  Changes made were:
      - Updated member 'inject_gp' to 'error' based on struct msr in kvm_run.
      - Move flag 'vcpu->kvm->arch.user_space_msr_enabled' out of
        kvm_msr_user_space() to allow it to work with both methods that bounce
        to userspace (msr list and #GP fallback).  Updated caller functions
        to account for this change.
      - trace_kvm_msr has been moved up and combine with a previous call in
        complete_emulated_msr() based on the suggestion made by Alexander
        Graf <graf@amazon.com>.

---
 arch/x86/kvm/trace.h | 24 ++++++++++++++
 arch/x86/kvm/x86.c   | 76 ++++++++++++++++++++++++++++++++++++++------
 2 files changed, 90 insertions(+), 10 deletions(-)

Comments

Alexander Graf Aug. 19, 2020, 10:25 a.m. UTC | #1
On 18.08.20 23:15, Aaron Lewis wrote:
> 
> Add support for exiting to userspace on a rdmsr or wrmsr instruction if
> the MSR being read from or written to is in the user_exit_msrs list.
> 
> Signed-off-by: Aaron Lewis <aaronlewis@google.com>

Again, this patch should be redundant with the allow list?

Alex



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
Jim Mattson Aug. 20, 2020, 6:17 p.m. UTC | #2
On Tue, Aug 18, 2020 at 2:16 PM Aaron Lewis <aaronlewis@google.com> wrote:
>
> Add support for exiting to userspace on a rdmsr or wrmsr instruction if
> the MSR being read from or written to is in the user_exit_msrs list.
>
> Signed-off-by: Aaron Lewis <aaronlewis@google.com>
> ---
>
> v2 -> v3
>
>   - Refactored commit based on Alexander Graf's changes in the first commit
>     in this series.  Changes made were:
>       - Updated member 'inject_gp' to 'error' based on struct msr in kvm_run.
>       - Move flag 'vcpu->kvm->arch.user_space_msr_enabled' out of
>         kvm_msr_user_space() to allow it to work with both methods that bounce
>         to userspace (msr list and #GP fallback).  Updated caller functions
>         to account for this change.
>       - trace_kvm_msr has been moved up and combine with a previous call in
>         complete_emulated_msr() based on the suggestion made by Alexander
>         Graf <graf@amazon.com>.
>
> ---

> @@ -1653,9 +1663,6 @@ static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index,
>                               u32 exit_reason, u64 data,
>                               int (*completion)(struct kvm_vcpu *vcpu))
>  {
> -       if (!vcpu->kvm->arch.user_space_msr_enabled)
> -               return 0;
> -
>         vcpu->run->exit_reason = exit_reason;
>         vcpu->run->msr.error = 0;
>         vcpu->run->msr.pad[0] = 0;
> @@ -1686,10 +1693,18 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
>         u64 data;
>         int r;
>
> +       if (kvm_msr_user_exit(vcpu->kvm, ecx)) {
> +               kvm_get_msr_user_space(vcpu, ecx);
> +               /* Bounce to user space */
> +               return 0;
> +       }
> +
> +
>         r = kvm_get_msr(vcpu, ecx, &data);
>
>         /* MSR read failed? See if we should ask user space */
> -       if (r && kvm_get_msr_user_space(vcpu, ecx)) {
> +       if (r && vcpu->kvm->arch.user_space_msr_enabled) {
> +               kvm_get_msr_user_space(vcpu, ecx);
>                 /* Bounce to user space */
>                 return 0;
>         }

The before and after bounce to userspace is unfortunate. If we can
consolidate the allow/deny list checks at the top of kvm_get_msr(),
and we can tell why kvm_get_msr() failed (e.g. -EPERM=disallowed,
-ENOENT=unknown MSR, -EINVAL=illegal access), then we can eliminate
the first bounce to userspace above. -EPERM would always go to
userspace. -ENOENT would go to userspace if userspace asked to handle
unknown MSRs. -EINVAL would go to userspace if userspace asked to
handle all #GPs. (Yes; I'd still like to be able to distinguish
between "unknown MSR" and "illegal value." Otherwise, it seems
impossible for userspace to know how to proceed.)

(You may ask, "why would you get -EINVAL on a RDMSR?" This would be
the case if you tried to read a write-only MSR, like IA32_FLUSH_CMD.)

> @@ -1715,10 +1730,17 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
>         u64 data = kvm_read_edx_eax(vcpu);
>         int r;
>
> +       if (kvm_msr_user_exit(vcpu->kvm, ecx)) {
> +               kvm_set_msr_user_space(vcpu, ecx, data);
> +               /* Bounce to user space */
> +               return 0;
> +       }
> +
>         r = kvm_set_msr(vcpu, ecx, data);
>
>         /* MSR write failed? See if we should ask user space */
> -       if (r && kvm_set_msr_user_space(vcpu, ecx, data)) {
> +       if (r && vcpu->kvm->arch.user_space_msr_enabled) {
> +               kvm_set_msr_user_space(vcpu, ecx, data);
>                 /* Bounce to user space */
>                 return 0;
>         }

Same idea as above.

> @@ -3606,6 +3628,25 @@ static int kvm_vm_ioctl_set_exit_msrs(struct kvm *kvm,
>         return 0;
>  }
>
> +bool kvm_msr_user_exit(struct kvm *kvm, u32 index)
> +{
> +       struct kvm_msr_list *exit_msrs;
> +       int i;
> +
> +       exit_msrs = kvm->arch.user_exit_msrs;
> +
> +       if (!exit_msrs)
> +               return false;
> +
> +       for (i = 0; i < exit_msrs->nmsrs; ++i) {
> +               if (exit_msrs->indices[i] == index)
> +                       return true;
> +       }
> +
> +       return false;
> +}
> +EXPORT_SYMBOL_GPL(kvm_msr_user_exit);

I think this should probably be scrapped, along with Alexander's
allow-list check, in favor of a rule-based allow/deny list approach as
I outlined in an earlier message today.
Alexander Graf Aug. 20, 2020, 9:59 p.m. UTC | #3
On 20.08.20 20:17, Jim Mattson wrote:
> 
> On Tue, Aug 18, 2020 at 2:16 PM Aaron Lewis <aaronlewis@google.com> wrote:
>>
>> Add support for exiting to userspace on a rdmsr or wrmsr instruction if
>> the MSR being read from or written to is in the user_exit_msrs list.
>>
>> Signed-off-by: Aaron Lewis <aaronlewis@google.com>
>> ---
>>
>> v2 -> v3
>>
>>    - Refactored commit based on Alexander Graf's changes in the first commit
>>      in this series.  Changes made were:
>>        - Updated member 'inject_gp' to 'error' based on struct msr in kvm_run.
>>        - Move flag 'vcpu->kvm->arch.user_space_msr_enabled' out of
>>          kvm_msr_user_space() to allow it to work with both methods that bounce
>>          to userspace (msr list and #GP fallback).  Updated caller functions
>>          to account for this change.
>>        - trace_kvm_msr has been moved up and combine with a previous call in
>>          complete_emulated_msr() based on the suggestion made by Alexander
>>          Graf <graf@amazon.com>.
>>
>> ---
> 
>> @@ -1653,9 +1663,6 @@ static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index,
>>                                u32 exit_reason, u64 data,
>>                                int (*completion)(struct kvm_vcpu *vcpu))
>>   {
>> -       if (!vcpu->kvm->arch.user_space_msr_enabled)
>> -               return 0;
>> -
>>          vcpu->run->exit_reason = exit_reason;
>>          vcpu->run->msr.error = 0;
>>          vcpu->run->msr.pad[0] = 0;
>> @@ -1686,10 +1693,18 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
>>          u64 data;
>>          int r;
>>
>> +       if (kvm_msr_user_exit(vcpu->kvm, ecx)) {
>> +               kvm_get_msr_user_space(vcpu, ecx);
>> +               /* Bounce to user space */
>> +               return 0;
>> +       }
>> +
>> +
>>          r = kvm_get_msr(vcpu, ecx, &data);
>>
>>          /* MSR read failed? See if we should ask user space */
>> -       if (r && kvm_get_msr_user_space(vcpu, ecx)) {
>> +       if (r && vcpu->kvm->arch.user_space_msr_enabled) {
>> +               kvm_get_msr_user_space(vcpu, ecx);
>>                  /* Bounce to user space */
>>                  return 0;
>>          }
> 
> The before and after bounce to userspace is unfortunate. If we can
> consolidate the allow/deny list checks at the top of kvm_get_msr(),
> and we can tell why kvm_get_msr() failed (e.g. -EPERM=disallowed,
> -ENOENT=unknown MSR, -EINVAL=illegal access), then we can eliminate
> the first bounce to userspace above. -EPERM would always go to
> userspace. -ENOENT would go to userspace if userspace asked to handle
> unknown MSRs. -EINVAL would go to userspace if userspace asked to
> handle all #GPs. (Yes; I'd still like to be able to distinguish
> between "unknown MSR" and "illegal value." Otherwise, it seems
> impossible for userspace to know how to proceed.)
> 
> (You may ask, "why would you get -EINVAL on a RDMSR?" This would be
> the case if you tried to read a write-only MSR, like IA32_FLUSH_CMD.)

Do we really need to do all of this dance of differentiating in kernel 
space between an exit that's there because user space asked for the exit 
and an MSR access that would just generate a #GP?

At the end of the day, user space *knows* which MSRs it asked to 
receive. It can filter for them super easily.


Alex



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
Jim Mattson Aug. 20, 2020, 10:55 p.m. UTC | #4
On Thu, Aug 20, 2020 at 2:59 PM Alexander Graf <graf@amazon.com> wrote:

> Do we really need to do all of this dance of differentiating in kernel
> space between an exit that's there because user space asked for the exit
> and an MSR access that would just generate a #GP?
>
> At the end of the day, user space *knows* which MSRs it asked to
> receive. It can filter for them super easily.

If no one else has an opinion, I can let this go. :-)

However, to make the right decision in kvm_emulate_{rdmsr,wrmsr}
(without the unfortunate before and after checks that Aaron added),
kvm_{get,set}_msr should at least distinguish between "permission
denied" and "raise #GP," so I can provide a deny list without asking
for userspace exits on #GP.
Jim Mattson Aug. 21, 2020, 5:58 p.m. UTC | #5
On Thu, Aug 20, 2020 at 3:55 PM Jim Mattson <jmattson@google.com> wrote:
>
> On Thu, Aug 20, 2020 at 2:59 PM Alexander Graf <graf@amazon.com> wrote:
>
> > Do we really need to do all of this dance of differentiating in kernel
> > space between an exit that's there because user space asked for the exit
> > and an MSR access that would just generate a #GP?
> >
> > At the end of the day, user space *knows* which MSRs it asked to
> > receive. It can filter for them super easily.
>
> If no one else has an opinion, I can let this go. :-)
>
> However, to make the right decision in kvm_emulate_{rdmsr,wrmsr}
> (without the unfortunate before and after checks that Aaron added),
> kvm_{get,set}_msr should at least distinguish between "permission
> denied" and "raise #GP," so I can provide a deny list without asking
> for userspace exits on #GP.

Actually, I think this whole discussion is moot. You no longer need
the first ioctl (ask for a userspace exit on #GP). The allow/deny list
is sufficient. Moreover, the allow/deny list checks can be in
kvm_emulate_{rdmsr,wrmsr} before the call to kvm_{get,set}_msr, so we
needn't be concerned with distinguishable error values either.
Alexander Graf Aug. 24, 2020, 1:35 a.m. UTC | #6
On 21.08.20 19:58, Jim Mattson wrote:
> 
> On Thu, Aug 20, 2020 at 3:55 PM Jim Mattson <jmattson@google.com> wrote:
>>
>> On Thu, Aug 20, 2020 at 2:59 PM Alexander Graf <graf@amazon.com> wrote:
>>
>>> Do we really need to do all of this dance of differentiating in kernel
>>> space between an exit that's there because user space asked for the exit
>>> and an MSR access that would just generate a #GP?
>>>
>>> At the end of the day, user space *knows* which MSRs it asked to
>>> receive. It can filter for them super easily.
>>
>> If no one else has an opinion, I can let this go. :-)
>>
>> However, to make the right decision in kvm_emulate_{rdmsr,wrmsr}
>> (without the unfortunate before and after checks that Aaron added),
>> kvm_{get,set}_msr should at least distinguish between "permission
>> denied" and "raise #GP," so I can provide a deny list without asking
>> for userspace exits on #GP.
> 
> Actually, I think this whole discussion is moot. You no longer need
> the first ioctl (ask for a userspace exit on #GP). The allow/deny list
> is sufficient. Moreover, the allow/deny list checks can be in
> kvm_emulate_{rdmsr,wrmsr} before the call to kvm_{get,set}_msr, so we
> needn't be concerned with distinguishable error values either.
> 

I also care about cases where I allow in-kernel handling, but for 
whatever reason there still would be a #GP injected into the guest. I 
want to record those events and be able to later have data that tell me 
why something went wrong.

So yes, for your use case you do not care about the distinction between 
"deny MSR access" and "report invalid MSR access". However, I do care :).

My stance on this is again that it's trivial to handle a few invalid MSR 
#GPs from user space and just not report anything. It should come at 
almost negligible performance cost, no?

As for your argumentation above, we have a second call chain into 
kvm_{get,set}_msr from the x86 emulator which you'd also need to cover.

One thing we could do I guess is to add a parameter to ENABLE_CAP on 
KVM_CAP_X86_USER_SPACE_MSR so that it only bounces on certain return 
values, such as -ENOENT. I still fail to see cases where that's 
genuinely beneficial though.


Alex



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
Jim Mattson Aug. 24, 2020, 5:23 p.m. UTC | #7
On Sun, Aug 23, 2020 at 6:35 PM Alexander Graf <graf@amazon.com> wrote:
>
>
>
> On 21.08.20 19:58, Jim Mattson wrote:
> >
> > On Thu, Aug 20, 2020 at 3:55 PM Jim Mattson <jmattson@google.com> wrote:
> >>
> >> On Thu, Aug 20, 2020 at 2:59 PM Alexander Graf <graf@amazon.com> wrote:
> >>
> >>> Do we really need to do all of this dance of differentiating in kernel
> >>> space between an exit that's there because user space asked for the exit
> >>> and an MSR access that would just generate a #GP?
> >>>
> >>> At the end of the day, user space *knows* which MSRs it asked to
> >>> receive. It can filter for them super easily.
> >>
> >> If no one else has an opinion, I can let this go. :-)
> >>
> >> However, to make the right decision in kvm_emulate_{rdmsr,wrmsr}
> >> (without the unfortunate before and after checks that Aaron added),
> >> kvm_{get,set}_msr should at least distinguish between "permission
> >> denied" and "raise #GP," so I can provide a deny list without asking
> >> for userspace exits on #GP.
> >
> > Actually, I think this whole discussion is moot. You no longer need
> > the first ioctl (ask for a userspace exit on #GP). The allow/deny list
> > is sufficient. Moreover, the allow/deny list checks can be in
> > kvm_emulate_{rdmsr,wrmsr} before the call to kvm_{get,set}_msr, so we
> > needn't be concerned with distinguishable error values either.
> >
>
> I also care about cases where I allow in-kernel handling, but for
> whatever reason there still would be a #GP injected into the guest. I
> want to record those events and be able to later have data that tell me
> why something went wrong.
>
> So yes, for your use case you do not care about the distinction between
> "deny MSR access" and "report invalid MSR access". However, I do care :).

In that case, I'm going to continue to hold a hard line on the
distinction between a #GP for an invalid MSR access and the #GP for an
unknown MSR. If, for instance, you wanted to implement ignore_msrs in
userspace, as you've proposed in the past, this would be extremely
helpful. Without it, userspace gets an exit because (1) the MSR access
isn't in the allow list, (2) the MSR access is invalid, or (3) the MSR
is unknown to kvm. As you've pointed out, it is easy for userspace to
distinguish (1) from the others, since it provided the allow/deny list
in the first place. But how do you distinguish (2) from (3) without
replicating the logic in the kernel?

> My stance on this is again that it's trivial to handle a few invalid MSR
> #GPs from user space and just not report anything. It should come at
> almost negligible performance cost, no?

Yes, the performance cost should be negligible, but what is the point?
We're trying to design a good API here, aren't we?

> As for your argumentation above, we have a second call chain into
> kvm_{get,set}_msr from the x86 emulator which you'd also need to cover.
>
> One thing we could do I guess is to add a parameter to ENABLE_CAP on
> KVM_CAP_X86_USER_SPACE_MSR so that it only bounces on certain return
> values, such as -ENOENT. I still fail to see cases where that's
> genuinely beneficial though.

I'd like to see two completely independent APIs, so that I can just
request a bounce on -EPERM through a deny list.  I think it's useful
to distinguish between -ENOENT and -EINVAL, but I have no issues wih
both causing an exit to userspace, if userspace has requested exits on
MSR #GPs.
Alexander Graf Aug. 24, 2020, 6:09 p.m. UTC | #8
On 24.08.20 19:23, Jim Mattson wrote:
> 
> On Sun, Aug 23, 2020 at 6:35 PM Alexander Graf <graf@amazon.com> wrote:
>>
>>
>>
>> On 21.08.20 19:58, Jim Mattson wrote:
>>>
>>> On Thu, Aug 20, 2020 at 3:55 PM Jim Mattson <jmattson@google.com> wrote:
>>>>
>>>> On Thu, Aug 20, 2020 at 2:59 PM Alexander Graf <graf@amazon.com> wrote:
>>>>
>>>>> Do we really need to do all of this dance of differentiating in kernel
>>>>> space between an exit that's there because user space asked for the exit
>>>>> and an MSR access that would just generate a #GP?
>>>>>
>>>>> At the end of the day, user space *knows* which MSRs it asked to
>>>>> receive. It can filter for them super easily.
>>>>
>>>> If no one else has an opinion, I can let this go. :-)
>>>>
>>>> However, to make the right decision in kvm_emulate_{rdmsr,wrmsr}
>>>> (without the unfortunate before and after checks that Aaron added),
>>>> kvm_{get,set}_msr should at least distinguish between "permission
>>>> denied" and "raise #GP," so I can provide a deny list without asking
>>>> for userspace exits on #GP.
>>>
>>> Actually, I think this whole discussion is moot. You no longer need
>>> the first ioctl (ask for a userspace exit on #GP). The allow/deny list
>>> is sufficient. Moreover, the allow/deny list checks can be in
>>> kvm_emulate_{rdmsr,wrmsr} before the call to kvm_{get,set}_msr, so we
>>> needn't be concerned with distinguishable error values either.
>>>
>>
>> I also care about cases where I allow in-kernel handling, but for
>> whatever reason there still would be a #GP injected into the guest. I
>> want to record those events and be able to later have data that tell me
>> why something went wrong.
>>
>> So yes, for your use case you do not care about the distinction between
>> "deny MSR access" and "report invalid MSR access". However, I do care :).
> 
> In that case, I'm going to continue to hold a hard line on the
> distinction between a #GP for an invalid MSR access and the #GP for an
> unknown MSR. If, for instance, you wanted to implement ignore_msrs in
> userspace, as you've proposed in the past, this would be extremely
> helpful. Without it, userspace gets an exit because (1) the MSR access
> isn't in the allow list, (2) the MSR access is invalid, or (3) the MSR
> is unknown to kvm. As you've pointed out, it is easy for userspace to
> distinguish (1) from the others, since it provided the allow/deny list
> in the first place. But how do you distinguish (2) from (3) without
> replicating the logic in the kernel?
> 
>> My stance on this is again that it's trivial to handle a few invalid MSR
>> #GPs from user space and just not report anything. It should come at
>> almost negligible performance cost, no?
> 
> Yes, the performance cost should be negligible, but what is the point?
> We're trying to design a good API here, aren't we?
> 
>> As for your argumentation above, we have a second call chain into
>> kvm_{get,set}_msr from the x86 emulator which you'd also need to cover.
>>
>> One thing we could do I guess is to add a parameter to ENABLE_CAP on
>> KVM_CAP_X86_USER_SPACE_MSR so that it only bounces on certain return
>> values, such as -ENOENT. I still fail to see cases where that's
>> genuinely beneficial though.
> 
> I'd like to see two completely independent APIs, so that I can just
> request a bounce on -EPERM through a deny list.  I think it's useful

Where would that bounce to? Which user space event does that trigger? 
Yet another one? Wouldn't 4 exit reasons just for MSR traps be a bit 
much? :)

> to distinguish between -ENOENT and -EINVAL, but I have no issues wih
> both causing an exit to userspace, if userspace has requested exits on
> MSR #GPs.

So imagine we took the first argument to ENABLE_CAP as filter:

   (1<<0) REPORT_ENOENT
   (1<<1) REPORT_EINVAL
   (1<<2) REPORT_EPERM
   (1<<31) REPORT_ANY

Then we also add the reason to the kvm_run exit response and user space 
can differentiate easily between the different events.


Alex



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
Jim Mattson Aug. 24, 2020, 6:34 p.m. UTC | #9
On Mon, Aug 24, 2020 at 11:09 AM Alexander Graf <graf@amazon.com> wrote:
>
>
>
> On 24.08.20 19:23, Jim Mattson wrote:
> >
> > On Sun, Aug 23, 2020 at 6:35 PM Alexander Graf <graf@amazon.com> wrote:
> >>
> >>
> >>
> >> On 21.08.20 19:58, Jim Mattson wrote:
> >>>
> >>> On Thu, Aug 20, 2020 at 3:55 PM Jim Mattson <jmattson@google.com> wrote:
> >>>>
> >>>> On Thu, Aug 20, 2020 at 2:59 PM Alexander Graf <graf@amazon.com> wrote:
> >>>>
> >>>>> Do we really need to do all of this dance of differentiating in kernel
> >>>>> space between an exit that's there because user space asked for the exit
> >>>>> and an MSR access that would just generate a #GP?
> >>>>>
> >>>>> At the end of the day, user space *knows* which MSRs it asked to
> >>>>> receive. It can filter for them super easily.
> >>>>
> >>>> If no one else has an opinion, I can let this go. :-)
> >>>>
> >>>> However, to make the right decision in kvm_emulate_{rdmsr,wrmsr}
> >>>> (without the unfortunate before and after checks that Aaron added),
> >>>> kvm_{get,set}_msr should at least distinguish between "permission
> >>>> denied" and "raise #GP," so I can provide a deny list without asking
> >>>> for userspace exits on #GP.
> >>>
> >>> Actually, I think this whole discussion is moot. You no longer need
> >>> the first ioctl (ask for a userspace exit on #GP). The allow/deny list
> >>> is sufficient. Moreover, the allow/deny list checks can be in
> >>> kvm_emulate_{rdmsr,wrmsr} before the call to kvm_{get,set}_msr, so we
> >>> needn't be concerned with distinguishable error values either.
> >>>
> >>
> >> I also care about cases where I allow in-kernel handling, but for
> >> whatever reason there still would be a #GP injected into the guest. I
> >> want to record those events and be able to later have data that tell me
> >> why something went wrong.
> >>
> >> So yes, for your use case you do not care about the distinction between
> >> "deny MSR access" and "report invalid MSR access". However, I do care :).
> >
> > In that case, I'm going to continue to hold a hard line on the
> > distinction between a #GP for an invalid MSR access and the #GP for an
> > unknown MSR. If, for instance, you wanted to implement ignore_msrs in
> > userspace, as you've proposed in the past, this would be extremely
> > helpful. Without it, userspace gets an exit because (1) the MSR access
> > isn't in the allow list, (2) the MSR access is invalid, or (3) the MSR
> > is unknown to kvm. As you've pointed out, it is easy for userspace to
> > distinguish (1) from the others, since it provided the allow/deny list
> > in the first place. But how do you distinguish (2) from (3) without
> > replicating the logic in the kernel?
> >
> >> My stance on this is again that it's trivial to handle a few invalid MSR
> >> #GPs from user space and just not report anything. It should come at
> >> almost negligible performance cost, no?
> >
> > Yes, the performance cost should be negligible, but what is the point?
> > We're trying to design a good API here, aren't we?
> >
> >> As for your argumentation above, we have a second call chain into
> >> kvm_{get,set}_msr from the x86 emulator which you'd also need to cover.
> >>
> >> One thing we could do I guess is to add a parameter to ENABLE_CAP on
> >> KVM_CAP_X86_USER_SPACE_MSR so that it only bounces on certain return
> >> values, such as -ENOENT. I still fail to see cases where that's
> >> genuinely beneficial though.
> >
> > I'd like to see two completely independent APIs, so that I can just
> > request a bounce on -EPERM through a deny list.  I think it's useful
>
> Where would that bounce to? Which user space event does that trigger?
> Yet another one? Wouldn't 4 exit reasons just for MSR traps be a bit
> much? :)

All of the exits are either KVM_EXIT_X86_RDMSR or KVM_EXIT_X86_WRMSR.
Or, we could put the direction in the msr struct and just have one
exit reason.

> > to distinguish between -ENOENT and -EINVAL, but I have no issues wih
> > both causing an exit to userspace, if userspace has requested exits on
> > MSR #GPs.
>
> So imagine we took the first argument to ENABLE_CAP as filter:
>
>    (1<<0) REPORT_ENOENT
>    (1<<1) REPORT_EINVAL
>    (1<<2) REPORT_EPERM
>    (1<<31) REPORT_ANY
>
> Then we also add the reason to the kvm_run exit response and user space
> can differentiate easily between the different events.

I think this works well. I still have to call both APIs to satisfy my
use case, but I'm willing to cave on that request. (I just realized
that there is a very good use case for an allow/deny list *without*
exits to userspace: prohibiting kvm from doing cross-vendor MSR
emulation.)
diff mbox series

Patch

diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index b66432b015d2..755610befbb5 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -367,6 +367,30 @@  TRACE_EVENT(kvm_msr,
 #define trace_kvm_msr_read_ex(ecx)         trace_kvm_msr(0, ecx, 0, true)
 #define trace_kvm_msr_write_ex(ecx, data)  trace_kvm_msr(1, ecx, data, true)
 
+TRACE_EVENT(kvm_userspace_msr,
+	TP_PROTO(bool is_write, u8 error, u32 index, u64 data),
+	TP_ARGS(is_write, error, index, data),
+
+	TP_STRUCT__entry(
+		__field(bool,	is_write)
+		__field(u8,	error)
+		__field(u32,	index)
+		__field(u64,	data)
+	),
+
+	TP_fast_assign(
+		__entry->is_write	= is_write;
+		__entry->error	= error;
+		__entry->index		= index;
+		__entry->data		= data;
+	),
+
+	TP_printk("userspace %s %x = 0x%llx, %s",
+		  __entry->is_write ? "wrmsr" : "rdmsr",
+		  __entry->index, __entry->data,
+		  __entry->error ? "error" : "no_error")
+);
+
 /*
  * Tracepoint for guest CR access.
  */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e349d51d5d65..b370b3f4b4f3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -109,6 +109,8 @@  static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
 static void store_regs(struct kvm_vcpu *vcpu);
 static int sync_regs(struct kvm_vcpu *vcpu);
 
+bool kvm_msr_user_exit(struct kvm *kvm, u32 index);
+
 struct kvm_x86_ops kvm_x86_ops __read_mostly;
 EXPORT_SYMBOL_GPL(kvm_x86_ops);
 
@@ -1629,11 +1631,19 @@  EXPORT_SYMBOL_GPL(kvm_set_msr);
 
 static int complete_emulated_msr(struct kvm_vcpu *vcpu, bool is_read)
 {
-	if (vcpu->run->msr.error) {
+	u32 ecx = vcpu->run->msr.index;
+	u64 data = vcpu->run->msr.data;
+	u8 error = vcpu->run->msr.error;
+
+	trace_kvm_userspace_msr(!is_read, error, ecx, data);
+	trace_kvm_msr(!is_read, ecx, data, !!error);
+
+	if (error) {
 		kvm_inject_gp(vcpu, 0);
+		return 1;
 	} else if (is_read) {
-		kvm_rax_write(vcpu, (u32)vcpu->run->msr.data);
-		kvm_rdx_write(vcpu, vcpu->run->msr.data >> 32);
+		kvm_rax_write(vcpu, (u32)data);
+		kvm_rdx_write(vcpu, data >> 32);
 	}
 
 	return kvm_skip_emulated_instruction(vcpu);
@@ -1653,9 +1663,6 @@  static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index,
 			      u32 exit_reason, u64 data,
 			      int (*completion)(struct kvm_vcpu *vcpu))
 {
-	if (!vcpu->kvm->arch.user_space_msr_enabled)
-		return 0;
-
 	vcpu->run->exit_reason = exit_reason;
 	vcpu->run->msr.error = 0;
 	vcpu->run->msr.pad[0] = 0;
@@ -1686,10 +1693,18 @@  int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
 	u64 data;
 	int r;
 
+	if (kvm_msr_user_exit(vcpu->kvm, ecx)) {
+		kvm_get_msr_user_space(vcpu, ecx);
+		/* Bounce to user space */
+		return 0;
+	}
+
+
 	r = kvm_get_msr(vcpu, ecx, &data);
 
 	/* MSR read failed? See if we should ask user space */
-	if (r && kvm_get_msr_user_space(vcpu, ecx)) {
+	if (r && vcpu->kvm->arch.user_space_msr_enabled) {
+		kvm_get_msr_user_space(vcpu, ecx);
 		/* Bounce to user space */
 		return 0;
 	}
@@ -1715,10 +1730,17 @@  int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
 	u64 data = kvm_read_edx_eax(vcpu);
 	int r;
 
+	if (kvm_msr_user_exit(vcpu->kvm, ecx)) {
+		kvm_set_msr_user_space(vcpu, ecx, data);
+		/* Bounce to user space */
+		return 0;
+	}
+
 	r = kvm_set_msr(vcpu, ecx, data);
 
 	/* MSR write failed? See if we should ask user space */
-	if (r && kvm_set_msr_user_space(vcpu, ecx, data)) {
+	if (r && vcpu->kvm->arch.user_space_msr_enabled) {
+		kvm_set_msr_user_space(vcpu, ecx, data);
 		/* Bounce to user space */
 		return 0;
 	}
@@ -3606,6 +3628,25 @@  static int kvm_vm_ioctl_set_exit_msrs(struct kvm *kvm,
 	return 0;
 }
 
+bool kvm_msr_user_exit(struct kvm *kvm, u32 index)
+{
+	struct kvm_msr_list *exit_msrs;
+	int i;
+
+	exit_msrs = kvm->arch.user_exit_msrs;
+
+	if (!exit_msrs)
+		return false;
+
+	for (i = 0; i < exit_msrs->nmsrs; ++i) {
+		if (exit_msrs->indices[i] == index)
+			return true;
+	}
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(kvm_msr_user_exit);
+
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r = 0;
@@ -6640,9 +6681,16 @@  static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
 	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 	int r;
 
+	if (kvm_msr_user_exit(vcpu->kvm, msr_index)) {
+		kvm_get_msr_user_space(vcpu, msr_index);
+		/* Bounce to user space */
+		return X86EMUL_IO_NEEDED;
+	}
+
 	r = kvm_get_msr(vcpu, msr_index, pdata);
 
-	if (r && kvm_get_msr_user_space(vcpu, msr_index)) {
+	if (r && vcpu->kvm->arch.user_space_msr_enabled) {
+		kvm_get_msr_user_space(vcpu, msr_index);
 		/* Bounce to user space */
 		return X86EMUL_IO_NEEDED;
 	}
@@ -6656,9 +6704,16 @@  static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
 	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 	int r;
 
+	if (kvm_msr_user_exit(vcpu->kvm, msr_index)) {
+		kvm_set_msr_user_space(vcpu, msr_index, data);
+		/* Bounce to user space */
+		return X86EMUL_IO_NEEDED;
+	}
+
 	r = kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
 
-	if (r && kvm_set_msr_user_space(vcpu, msr_index, data)) {
+	if (r && vcpu->kvm->arch.user_space_msr_enabled) {
+		kvm_set_msr_user_space(vcpu, msr_index, data);
 		/* Bounce to user space */
 		return X86EMUL_IO_NEEDED;
 	}
@@ -11090,3 +11145,4 @@  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_userspace_msr);