diff mbox series

[v3,07/12] KVM: x86: Ensure the MSR bitmap never clears userspace tracked MSRs

Message ID 20200818211533.849501-8-aaronlewis@google.com (mailing list archive)
State New, archived
Headers show
Series Allow userspace to manage MSRs | expand

Commit Message

Aaron Lewis Aug. 18, 2020, 9:15 p.m. UTC
SDM volume 3: 24.6.9 "MSR-Bitmap Address" and APM volume 2: 15.11 "MS
intercepts" describe MSR permission bitmaps.  Permission bitmaps are
used to control whether an execution of rdmsr or wrmsr will cause a
vm exit.  For userspace tracked MSRs it is required they cause a vm
exit, so the host is able to forward the MSR to userspace.  This change
adds vmx/svm support to ensure the permission bitmap is properly set to
cause a vm_exit to the host when rdmsr or wrmsr is used by one of the
userspace tracked MSRs.  Also, to avoid repeatedly setting them,
kvm_make_request() is used to coalesce these into a single call.

Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Reviewed-by: Oliver Upton <oupton@google.com>
---
 arch/x86/include/asm/kvm_host.h |  3 ++
 arch/x86/kvm/svm/svm.c          | 49 ++++++++++++++++++++++++++-------
 arch/x86/kvm/vmx/vmx.c          | 13 ++++++++-
 arch/x86/kvm/x86.c              | 16 +++++++++++
 4 files changed, 70 insertions(+), 11 deletions(-)

Comments

kernel test robot Aug. 19, 2020, 1:12 a.m. UTC | #1
Hi Aaron,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on kvm/linux-next]
[also build test WARNING on v5.9-rc1 next-20200818]
[cannot apply to kvms390/next vhost/linux-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Aaron-Lewis/Allow-userspace-to-manage-MSRs/20200819-051903
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
config: i386-randconfig-s001-20200818 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.2-183-gaa6ede3b-dirty
        # save the attached .config to linux build tree
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)

>> arch/x86/kvm/vmx/vmx.c:3823:6: sparse: sparse: symbol 'vmx_set_user_msr_intercept' was not declared. Should it be static?
   arch/x86/kvm/vmx/vmx.c: note: in included file:
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (110011 becomes 11)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (110011 becomes 11)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (100110 becomes 110)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (100490 becomes 490)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (100310 becomes 310)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (100510 becomes 510)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (100410 becomes 410)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (100490 becomes 490)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (100310 becomes 310)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (100510 becomes 510)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (100410 becomes 410)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (30203 becomes 203)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (30203 becomes 203)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (30283 becomes 283)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (30283 becomes 283)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1b019b becomes 19b)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1b021b becomes 21b)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1b029b becomes 29b)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1b031b becomes 31b)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1b041b becomes 41b)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (80c88 becomes c88)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a081a becomes 81a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a081a becomes 81a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a081a becomes 81a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (120912 becomes 912)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (120912 becomes 912)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (120912 becomes 912)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (110311 becomes 311)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (120992 becomes 992)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (120992 becomes 992)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (100610 becomes 610)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (100690 becomes 690)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (100590 becomes 590)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (80408 becomes 408)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (120a92 becomes a92)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a099a becomes 99a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a091a becomes 91a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (a048a becomes 48a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (120a92 becomes a92)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a099a becomes 99a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a091a becomes 91a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (a048a becomes 48a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (a010a becomes 10a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (a050a becomes 50a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a071a becomes 71a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a079a becomes 79a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a001a becomes 1a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a009a becomes 9a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a011a becomes 11a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a081a becomes 81a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a081a becomes 81a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a011a becomes 11a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (180198 becomes 198)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a011a becomes 11a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a051a becomes 51a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (120392 becomes 392)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (120892 becomes 892)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a081a becomes 81a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a081a becomes 81a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a011a becomes 11a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a011a becomes 11a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (100490 becomes 490)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (100490 becomes 490)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (a028a becomes 28a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (a030a becomes 30a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (a038a becomes 38a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (a040a becomes 40a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (a028a becomes 28a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (a030a becomes 30a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (a038a becomes 38a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (a040a becomes 40a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (100090 becomes 90)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (100090 becomes 90)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (180118 becomes 118)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a001a becomes 1a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (80688 becomes 688)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a009a becomes 9a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (100790 becomes 790)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (100790 becomes 790)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (180198 becomes 198)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a011a becomes 11a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (120492 becomes 492)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a061a becomes 61a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (120492 becomes 492)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a061a becomes 61a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (120412 becomes 412)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a059a becomes 59a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (120412 becomes 412)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1a059a becomes 59a)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (20402 becomes 402)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1b001b becomes 1b)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1b009b becomes 9b)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (1b011b becomes 11b)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (30083 becomes 83)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (30183 becomes 183)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (30003 becomes 3)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (30103 becomes 103)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: cast truncates bits from constant value (30303 becomes 303)
   arch/x86/kvm/vmx/evmcs.h:81:30: sparse: sparse: too many warnings
--
>> arch/x86/kvm/svm/svm.c:636:6: sparse: sparse: symbol 'svm_set_user_msr_intercept' was not declared. Should it be static?
   arch/x86/kvm/svm/svm.c:471:17: sparse: sparse: cast truncates bits from constant value (100000000 becomes 0)
   arch/x86/kvm/svm/svm.c:471:17: sparse: sparse: cast truncates bits from constant value (100000000 becomes 0)
   arch/x86/kvm/svm/svm.c:471:17: sparse: sparse: cast truncates bits from constant value (100000000 becomes 0)

Please review and possibly fold the followup patch.

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Alexander Graf Aug. 19, 2020, 3:26 p.m. UTC | #2
On 18.08.20 23:15, Aaron Lewis wrote:
> 
> SDM volume 3: 24.6.9 "MSR-Bitmap Address" and APM volume 2: 15.11 "MS
> intercepts" describe MSR permission bitmaps.  Permission bitmaps are
> used to control whether an execution of rdmsr or wrmsr will cause a
> vm exit.  For userspace tracked MSRs it is required they cause a vm
> exit, so the host is able to forward the MSR to userspace.  This change
> adds vmx/svm support to ensure the permission bitmap is properly set to
> cause a vm_exit to the host when rdmsr or wrmsr is used by one of the
> userspace tracked MSRs.  Also, to avoid repeatedly setting them,
> kvm_make_request() is used to coalesce these into a single call.
> 
> Signed-off-by: Aaron Lewis <aaronlewis@google.com>
> Reviewed-by: Oliver Upton <oupton@google.com>

This is incomplete, as it doesn't cover all of the x2apic registers. 
There are also a few MSRs that IIRC are handled differently from this 
logic, such as EFER.

I'm really curious if this is worth the effort? I would be inclined to 
say that MSRs that KVM has direct access for need special handling one 
way or another.


Alex



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
Aaron Lewis Aug. 20, 2020, 12:18 a.m. UTC | #3
On Wed, Aug 19, 2020 at 8:26 AM Alexander Graf <graf@amazon.com> wrote:
>
>
>
> On 18.08.20 23:15, Aaron Lewis wrote:
> >
> > SDM volume 3: 24.6.9 "MSR-Bitmap Address" and APM volume 2: 15.11 "MS
> > intercepts" describe MSR permission bitmaps.  Permission bitmaps are
> > used to control whether an execution of rdmsr or wrmsr will cause a
> > vm exit.  For userspace tracked MSRs it is required they cause a vm
> > exit, so the host is able to forward the MSR to userspace.  This change
> > adds vmx/svm support to ensure the permission bitmap is properly set to
> > cause a vm_exit to the host when rdmsr or wrmsr is used by one of the
> > userspace tracked MSRs.  Also, to avoid repeatedly setting them,
> > kvm_make_request() is used to coalesce these into a single call.
> >
> > Signed-off-by: Aaron Lewis <aaronlewis@google.com>
> > Reviewed-by: Oliver Upton <oupton@google.com>
>
> This is incomplete, as it doesn't cover all of the x2apic registers.
> There are also a few MSRs that IIRC are handled differently from this
> logic, such as EFER.
>
> I'm really curious if this is worth the effort? I would be inclined to
> say that MSRs that KVM has direct access for need special handling one
> way or another.
>

Can you please elaborate on this?  It was my understanding that the
permission bitmap covers the x2apic registers.  Also, I’m not sure how
EFER is handled differently, but I see there is a separate
conversation on that.

This effort does seem worthwhile as it ensures userspace is able to
manage the MSRs it is requesting, and will remain that way in the
future.


>
> Alex
>
>
>
> Amazon Development Center Germany GmbH
> Krausenstr. 38
> 10117 Berlin
> Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
> Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
> Sitz: Berlin
> Ust-ID: DE 289 237 879
>
>
Alexander Graf Aug. 20, 2020, 10:04 p.m. UTC | #4
On 20.08.20 02:18, Aaron Lewis wrote:
> 
> On Wed, Aug 19, 2020 at 8:26 AM Alexander Graf <graf@amazon.com> wrote:
>>
>>
>>
>> On 18.08.20 23:15, Aaron Lewis wrote:
>>>
>>> SDM volume 3: 24.6.9 "MSR-Bitmap Address" and APM volume 2: 15.11 "MS
>>> intercepts" describe MSR permission bitmaps.  Permission bitmaps are
>>> used to control whether an execution of rdmsr or wrmsr will cause a
>>> vm exit.  For userspace tracked MSRs it is required they cause a vm
>>> exit, so the host is able to forward the MSR to userspace.  This change
>>> adds vmx/svm support to ensure the permission bitmap is properly set to
>>> cause a vm_exit to the host when rdmsr or wrmsr is used by one of the
>>> userspace tracked MSRs.  Also, to avoid repeatedly setting them,
>>> kvm_make_request() is used to coalesce these into a single call.
>>>
>>> Signed-off-by: Aaron Lewis <aaronlewis@google.com>
>>> Reviewed-by: Oliver Upton <oupton@google.com>
>>
>> This is incomplete, as it doesn't cover all of the x2apic registers.
>> There are also a few MSRs that IIRC are handled differently from this
>> logic, such as EFER.
>>
>> I'm really curious if this is worth the effort? I would be inclined to
>> say that MSRs that KVM has direct access for need special handling one
>> way or another.
>>
> 
> Can you please elaborate on this?  It was my understanding that the
> permission bitmap covers the x2apic registers.  Also, I’m not sure how

So x2apic MSR passthrough is configured specially:

 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/kvm/vmx/vmx.c#n3796

and I think not handled by this patch?

> EFER is handled differently, but I see there is a separate
> conversation on that.

EFER is a really special beast in VT.

> This effort does seem worthwhile as it ensures userspace is able to
> manage the MSRs it is requesting, and will remain that way in the
> future.

I couldn't see why any of the passthrough MSRs are relevant to user 
space, but I tend to agree that it makes everything more consistent.


Alex



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
Jim Mattson Aug. 20, 2020, 10:35 p.m. UTC | #5
On Thu, Aug 20, 2020 at 3:04 PM Alexander Graf <graf@amazon.com> wrote:
>
>
>
> On 20.08.20 02:18, Aaron Lewis wrote:
> >
> > On Wed, Aug 19, 2020 at 8:26 AM Alexander Graf <graf@amazon.com> wrote:
> >>
> >>
> >>
> >> On 18.08.20 23:15, Aaron Lewis wrote:
> >>>
> >>> SDM volume 3: 24.6.9 "MSR-Bitmap Address" and APM volume 2: 15.11 "MS
> >>> intercepts" describe MSR permission bitmaps.  Permission bitmaps are
> >>> used to control whether an execution of rdmsr or wrmsr will cause a
> >>> vm exit.  For userspace tracked MSRs it is required they cause a vm
> >>> exit, so the host is able to forward the MSR to userspace.  This change
> >>> adds vmx/svm support to ensure the permission bitmap is properly set to
> >>> cause a vm_exit to the host when rdmsr or wrmsr is used by one of the
> >>> userspace tracked MSRs.  Also, to avoid repeatedly setting them,
> >>> kvm_make_request() is used to coalesce these into a single call.
> >>>
> >>> Signed-off-by: Aaron Lewis <aaronlewis@google.com>
> >>> Reviewed-by: Oliver Upton <oupton@google.com>
> >>
> >> This is incomplete, as it doesn't cover all of the x2apic registers.
> >> There are also a few MSRs that IIRC are handled differently from this
> >> logic, such as EFER.
> >>
> >> I'm really curious if this is worth the effort? I would be inclined to
> >> say that MSRs that KVM has direct access for need special handling one
> >> way or another.
> >>
> >
> > Can you please elaborate on this?  It was my understanding that the
> > permission bitmap covers the x2apic registers.  Also, I’m not sure how
>
> So x2apic MSR passthrough is configured specially:
>
>
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/kvm/vmx/vmx.c#n3796
>
> and I think not handled by this patch?

By happenstance only, I think, since there is also a call there to
vmx_disable_intercept_for_msr() for the TPR when x2APIC is enabled.
Aaron Lewis Aug. 21, 2020, 2:27 p.m. UTC | #6
On Thu, Aug 20, 2020 at 3:35 PM Jim Mattson <jmattson@google.com> wrote:
>
> On Thu, Aug 20, 2020 at 3:04 PM Alexander Graf <graf@amazon.com> wrote:
> >
> >
> >
> > On 20.08.20 02:18, Aaron Lewis wrote:
> > >
> > > On Wed, Aug 19, 2020 at 8:26 AM Alexander Graf <graf@amazon.com> wrote:
> > >>
> > >>
> > >>
> > >> On 18.08.20 23:15, Aaron Lewis wrote:
> > >>>
> > >>> SDM volume 3: 24.6.9 "MSR-Bitmap Address" and APM volume 2: 15.11 "MS
> > >>> intercepts" describe MSR permission bitmaps.  Permission bitmaps are
> > >>> used to control whether an execution of rdmsr or wrmsr will cause a
> > >>> vm exit.  For userspace tracked MSRs it is required they cause a vm
> > >>> exit, so the host is able to forward the MSR to userspace.  This change
> > >>> adds vmx/svm support to ensure the permission bitmap is properly set to
> > >>> cause a vm_exit to the host when rdmsr or wrmsr is used by one of the
> > >>> userspace tracked MSRs.  Also, to avoid repeatedly setting them,
> > >>> kvm_make_request() is used to coalesce these into a single call.
> > >>>
> > >>> Signed-off-by: Aaron Lewis <aaronlewis@google.com>
> > >>> Reviewed-by: Oliver Upton <oupton@google.com>
> > >>
> > >> This is incomplete, as it doesn't cover all of the x2apic registers.
> > >> There are also a few MSRs that IIRC are handled differently from this
> > >> logic, such as EFER.
> > >>
> > >> I'm really curious if this is worth the effort? I would be inclined to
> > >> say that MSRs that KVM has direct access for need special handling one
> > >> way or another.
> > >>
> > >
> > > Can you please elaborate on this?  It was my understanding that the
> > > permission bitmap covers the x2apic registers.  Also, I’m not sure how
> >
> > So x2apic MSR passthrough is configured specially:
> >
> >
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/kvm/vmx/vmx.c#n3796
> >
> > and I think not handled by this patch?
>
> By happenstance only, I think, since there is also a call there to
> vmx_disable_intercept_for_msr() for the TPR when x2APIC is enabled.

If we want to be more explicit about it we could add
kvm_make_request(KVM_REQ_USER_MSR_UPDATE, vcpu) After the bitmap is
modified, but that doesn't seem to be necessary as Jim pointed out as
there are calls to vmx_disable_intercept_for_msr() there which will
set the update request for us.  And we only have to worry about that
if the bitmap is cleared which means MSR_BITMAP_MODE_X2APIC_APICV is
set, and that flag can only be set if MSR_BITMAP_MODE_X2APIC is set.
So, AFAICT that is covered by my changes.
Alexander Graf Aug. 21, 2020, 4:07 p.m. UTC | #7
On 21.08.20 16:27, Aaron Lewis wrote:
> 
> On Thu, Aug 20, 2020 at 3:35 PM Jim Mattson <jmattson@google.com> wrote:
>>
>> On Thu, Aug 20, 2020 at 3:04 PM Alexander Graf <graf@amazon.com> wrote:
>>>
>>>
>>>
>>> On 20.08.20 02:18, Aaron Lewis wrote:
>>>>
>>>> On Wed, Aug 19, 2020 at 8:26 AM Alexander Graf <graf@amazon.com> wrote:
>>>>>
>>>>>
>>>>>
>>>>> On 18.08.20 23:15, Aaron Lewis wrote:
>>>>>>
>>>>>> SDM volume 3: 24.6.9 "MSR-Bitmap Address" and APM volume 2: 15.11 "MS
>>>>>> intercepts" describe MSR permission bitmaps.  Permission bitmaps are
>>>>>> used to control whether an execution of rdmsr or wrmsr will cause a
>>>>>> vm exit.  For userspace tracked MSRs it is required they cause a vm
>>>>>> exit, so the host is able to forward the MSR to userspace.  This change
>>>>>> adds vmx/svm support to ensure the permission bitmap is properly set to
>>>>>> cause a vm_exit to the host when rdmsr or wrmsr is used by one of the
>>>>>> userspace tracked MSRs.  Also, to avoid repeatedly setting them,
>>>>>> kvm_make_request() is used to coalesce these into a single call.
>>>>>>
>>>>>> Signed-off-by: Aaron Lewis <aaronlewis@google.com>
>>>>>> Reviewed-by: Oliver Upton <oupton@google.com>
>>>>>
>>>>> This is incomplete, as it doesn't cover all of the x2apic registers.
>>>>> There are also a few MSRs that IIRC are handled differently from this
>>>>> logic, such as EFER.
>>>>>
>>>>> I'm really curious if this is worth the effort? I would be inclined to
>>>>> say that MSRs that KVM has direct access for need special handling one
>>>>> way or another.
>>>>>
>>>>
>>>> Can you please elaborate on this?  It was my understanding that the
>>>> permission bitmap covers the x2apic registers.  Also, I’m not sure how
>>>
>>> So x2apic MSR passthrough is configured specially:
>>>
>>>
>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/kvm/vmx/vmx.c#n3796
>>>
>>> and I think not handled by this patch?
>>
>> By happenstance only, I think, since there is also a call there to
>> vmx_disable_intercept_for_msr() for the TPR when x2APIC is enabled.
> 
> If we want to be more explicit about it we could add
> kvm_make_request(KVM_REQ_USER_MSR_UPDATE, vcpu) After the bitmap is
> modified, but that doesn't seem to be necessary as Jim pointed out as
> there are calls to vmx_disable_intercept_for_msr() there which will
> set the update request for us.  And we only have to worry about that
> if the bitmap is cleared which means MSR_BITMAP_MODE_X2APIC_APICV is
> set, and that flag can only be set if MSR_BITMAP_MODE_X2APIC is set.
> So, AFAICT that is covered by my changes.
> 

I don't understand - for most x2APIC MSRs, 
vmx_{en,dis}able_intercept_for_msr() never gets called, no?


Alex



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
Aaron Lewis Aug. 21, 2020, 4:43 p.m. UTC | #8
On Fri, Aug 21, 2020 at 9:08 AM Alexander Graf <graf@amazon.com> wrote:
>
>
>
> On 21.08.20 16:27, Aaron Lewis wrote:
> >
> > On Thu, Aug 20, 2020 at 3:35 PM Jim Mattson <jmattson@google.com> wrote:
> >>
> >> On Thu, Aug 20, 2020 at 3:04 PM Alexander Graf <graf@amazon.com> wrote:
> >>>
> >>>
> >>>
> >>> On 20.08.20 02:18, Aaron Lewis wrote:
> >>>>
> >>>> On Wed, Aug 19, 2020 at 8:26 AM Alexander Graf <graf@amazon.com> wrote:
> >>>>>
> >>>>>
> >>>>>
> >>>>> On 18.08.20 23:15, Aaron Lewis wrote:
> >>>>>>
> >>>>>> SDM volume 3: 24.6.9 "MSR-Bitmap Address" and APM volume 2: 15.11 "MS
> >>>>>> intercepts" describe MSR permission bitmaps.  Permission bitmaps are
> >>>>>> used to control whether an execution of rdmsr or wrmsr will cause a
> >>>>>> vm exit.  For userspace tracked MSRs it is required they cause a vm
> >>>>>> exit, so the host is able to forward the MSR to userspace.  This change
> >>>>>> adds vmx/svm support to ensure the permission bitmap is properly set to
> >>>>>> cause a vm_exit to the host when rdmsr or wrmsr is used by one of the
> >>>>>> userspace tracked MSRs.  Also, to avoid repeatedly setting them,
> >>>>>> kvm_make_request() is used to coalesce these into a single call.
> >>>>>>
> >>>>>> Signed-off-by: Aaron Lewis <aaronlewis@google.com>
> >>>>>> Reviewed-by: Oliver Upton <oupton@google.com>
> >>>>>
> >>>>> This is incomplete, as it doesn't cover all of the x2apic registers.
> >>>>> There are also a few MSRs that IIRC are handled differently from this
> >>>>> logic, such as EFER.
> >>>>>
> >>>>> I'm really curious if this is worth the effort? I would be inclined to
> >>>>> say that MSRs that KVM has direct access for need special handling one
> >>>>> way or another.
> >>>>>
> >>>>
> >>>> Can you please elaborate on this?  It was my understanding that the
> >>>> permission bitmap covers the x2apic registers.  Also, I’m not sure how
> >>>
> >>> So x2apic MSR passthrough is configured specially:
> >>>
> >>>
> >>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/kvm/vmx/vmx.c#n3796
> >>>
> >>> and I think not handled by this patch?
> >>
> >> By happenstance only, I think, since there is also a call there to
> >> vmx_disable_intercept_for_msr() for the TPR when x2APIC is enabled.
> >
> > If we want to be more explicit about it we could add
> > kvm_make_request(KVM_REQ_USER_MSR_UPDATE, vcpu) After the bitmap is
> > modified, but that doesn't seem to be necessary as Jim pointed out as
> > there are calls to vmx_disable_intercept_for_msr() there which will
> > set the update request for us.  And we only have to worry about that
> > if the bitmap is cleared which means MSR_BITMAP_MODE_X2APIC_APICV is
> > set, and that flag can only be set if MSR_BITMAP_MODE_X2APIC is set.
> > So, AFAICT that is covered by my changes.
> >
>
> I don't understand - for most x2APIC MSRs,
> vmx_{en,dis}able_intercept_for_msr() never gets called, no?
>

Sorry, to be clear.  We just need it to be called once for us to be
covered.  When it's invoked we set a flag, so the next time vm enter
is called we run over the list of MSRs userspace cares about and
ensure they are all set correctly.  So, if the x2APIC permission
bitmaps are modified directly and cleared, we just need for
vmx_disable_intercept_for_msr() to be called at least once to tell us
to run over the list and set the bits for all MSRs userspace is
tracking.

>
> Alex
>
>
>
> Amazon Development Center Germany GmbH
> Krausenstr. 38
> 10117 Berlin
> Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
> Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
> Sitz: Berlin
> Ust-ID: DE 289 237 879
>
>
kernel test robot Aug. 26, 2020, 3:48 p.m. UTC | #9
Hi Aaron,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on kvm/linux-next]
[also build test WARNING on v5.9-rc2 next-20200826]
[cannot apply to kvms390/next vhost/linux-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Aaron-Lewis/Allow-userspace-to-manage-MSRs/20200819-051903
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
config: x86_64-randconfig-a001-20200826 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 7cfcecece0e0430937cf529ce74d3a071a4dedc6)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install x86_64 cross compiling tool for clang build
        # apt-get install binutils-x86-64-linux-gnu
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> arch/x86/kvm/vmx/vmx.c:3823:6: warning: no previous prototype for function 'vmx_set_user_msr_intercept' [-Wmissing-prototypes]
   void vmx_set_user_msr_intercept(struct kvm_vcpu *vcpu, u32 msr)
        ^
   arch/x86/kvm/vmx/vmx.c:3823:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   void vmx_set_user_msr_intercept(struct kvm_vcpu *vcpu, u32 msr)
   ^
   static 
   1 warning generated.

# https://github.com/0day-ci/linux/commit/e78e2c7f2ae3e9e6be9768f1616b043406ae24dd
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Aaron-Lewis/Allow-userspace-to-manage-MSRs/20200819-051903
git checkout e78e2c7f2ae3e9e6be9768f1616b043406ae24dd
vim +/vmx_set_user_msr_intercept +3823 arch/x86/kvm/vmx/vmx.c

  3822	
> 3823	void vmx_set_user_msr_intercept(struct kvm_vcpu *vcpu, u32 msr)
  3824	{
  3825		vmx_enable_intercept_for_msr(vcpu, msr, MSR_TYPE_RW);
  3826	}
  3827	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6c4c5b972395..65e9dcc19cc2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -87,6 +87,7 @@ 
 #define KVM_REQ_HV_TLB_FLUSH \
 	KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_APF_READY		KVM_ARCH_REQ(28)
+#define KVM_REQ_USER_MSR_UPDATE KVM_ARCH_REQ(29)
 
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -1242,6 +1243,8 @@  struct kvm_x86_ops {
 	int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
 
 	void (*migrate_timers)(struct kvm_vcpu *vcpu);
+
+	void (*set_user_msr_intercept)(struct kvm_vcpu *vcpu, u32 msr);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 56e9cf284c2a..c49d121ee102 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -583,13 +583,27 @@  static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
 	return !!test_bit(bit_write,  &tmp);
 }
 
+static void __set_msr_interception(u32 *msrpm, u32 msr, int read, int write,
+				   u32 offset)
+{
+	u8 bit_read, bit_write;
+	unsigned long tmp;
+
+	bit_read  = 2 * (msr & 0x0f);
+	bit_write = 2 * (msr & 0x0f) + 1;
+	tmp       = msrpm[offset];
+
+	read  ? clear_bit(bit_read,  &tmp) : set_bit(bit_read,  &tmp);
+	write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
+
+	msrpm[offset] = tmp;
+}
+
 static void set_msr_interception(struct kvm_vcpu *vcpu, u32 msr, int read,
 				 int write)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	u32 *msrpm = svm->msrpm;
-	u8 bit_read, bit_write;
-	unsigned long tmp;
 	u32 offset;
 
 	/*
@@ -598,17 +612,30 @@  static void set_msr_interception(struct kvm_vcpu *vcpu, u32 msr, int read,
 	 */
 	WARN_ON(!valid_msr_intercept(msr));
 
-	offset    = svm_msrpm_offset(msr);
-	bit_read  = 2 * (msr & 0x0f);
-	bit_write = 2 * (msr & 0x0f) + 1;
-	tmp       = msrpm[offset];
-
+	offset = svm_msrpm_offset(msr);
 	BUG_ON(offset == MSR_INVALID);
 
-	read  ? clear_bit(bit_read,  &tmp) : set_bit(bit_read,  &tmp);
-	write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
+	__set_msr_interception(msrpm, msr, read, write, offset);
 
-	msrpm[offset] = tmp;
+	if (read || write)
+		kvm_make_request(KVM_REQ_USER_MSR_UPDATE, vcpu);
+}
+
+static void set_user_msr_interception(struct kvm_vcpu *vcpu, u32 msr, int read,
+				      int write)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	u32 *msrpm = svm->msrpm;
+	u32 offset;
+
+	offset = svm_msrpm_offset(msr);
+	if (offset != MSR_INVALID)
+		__set_msr_interception(msrpm, msr, read, write, offset);
+}
+
+void svm_set_user_msr_intercept(struct kvm_vcpu *vcpu, u32 msr)
+{
+	set_user_msr_interception(vcpu, msr, 0, 0);
 }
 
 static void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm)
@@ -4153,6 +4180,8 @@  static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
 
 	.apic_init_signal_blocked = svm_apic_init_signal_blocked,
+
+	.set_user_msr_intercept = svm_set_user_msr_intercept,
 };
 
 static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index de03df72e742..12478ea7aac7 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3725,6 +3725,10 @@  static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu,
 			__clear_bit(msr, msr_bitmap + 0xc00 / f);
 
 	}
+
+	if (type & MSR_TYPE_R || type & MSR_TYPE_W) {
+		kvm_make_request(KVM_REQ_USER_MSR_UPDATE, vcpu);
+	}
 }
 
 static __always_inline void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu,
@@ -3792,7 +3796,7 @@  static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
 }
 
 static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu,
-					 unsigned long *msr_bitmap, u8 mode)
+					unsigned long *msr_bitmap, u8 mode)
 {
 	int msr;
 
@@ -3816,6 +3820,11 @@  static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu,
 	}
 }
 
+void vmx_set_user_msr_intercept(struct kvm_vcpu *vcpu, u32 msr)
+{
+	vmx_enable_intercept_for_msr(vcpu, msr, MSR_TYPE_RW);
+}
+
 void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -8002,6 +8011,8 @@  static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
 	.apic_init_signal_blocked = vmx_apic_init_signal_blocked,
 	.migrate_timers = vmx_migrate_timers,
+
+	.set_user_msr_intercept = vmx_set_user_msr_intercept,
 };
 
 static __init int hardware_setup(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b370b3f4b4f3..44cbcf22ec36 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3647,6 +3647,19 @@  bool kvm_msr_user_exit(struct kvm *kvm, u32 index)
 }
 EXPORT_SYMBOL_GPL(kvm_msr_user_exit);
 
+static void kvm_set_user_msr_intercepts(struct kvm_vcpu *vcpu)
+{
+	struct kvm_msr_list *msr_list = vcpu->kvm->arch.user_exit_msrs;
+	u32 i, msr;
+
+	if (msr_list) {
+		for (i = 0; i < msr_list->nmsrs; i++) {
+			msr = msr_list->indices[i];
+			kvm_x86_ops.set_user_msr_intercept(vcpu, msr);
+		}
+	}
+}
+
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r = 0;
@@ -8823,6 +8836,9 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_vcpu_update_apicv(vcpu);
 		if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
 			kvm_check_async_pf_completion(vcpu);
+
+		if (kvm_check_request(KVM_REQ_USER_MSR_UPDATE, vcpu))
+			kvm_set_user_msr_intercepts(vcpu);
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {