diff mbox series

[for-4.20] x86/PV: further harden guest memory accesses against speculative abuse

Message ID a615a96e-95d2-442f-b57d-0bb51142c977@suse.com (mailing list archive)
State New
Headers show
Series [for-4.20] x86/PV: further harden guest memory accesses against speculative abuse | expand

Commit Message

Jan Beulich Jan. 27, 2025, 10:15 a.m. UTC
The original implementation has two issues: For one it doesn't preserve
non-canonical-ness of inputs in the range 0x8000000000000000 through
0x80007fffffffffff. Bogus guest pointers in that range would not cause a
(#GP) fault upon access, when they should.

And then there is an AMD-specific aspect, where only the low 48 bits of
an address are used for speculative execution; the architecturally
mandated #GP for non-canonical addresses would be raised at a later
execution stage. Therefore to prevent Xen controlled data to make it
into any of the caches in a guest controllable manner, we need to
additionally ensure that for non-canonical inputs bit 47 would be clear.

See the code comment for how addressing both is being achieved.

Fixes: 4dc181599142 ("x86/PV: harden guest memory accesses against speculative abuse")
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
RFC: When scratch2 isn't %r8...%r15, the MOV, CMOVNB, and XOR will have
     unnecessary REX prefixes emitted, as users of the macro pass in 64-
     bit registers. Similar to what was done to be able to use SETcc (in
     the earlier alternative code sequence), we could derive %e.. from
     %r.. to shrink code size some; there are a few dozen instances of
     this code, after all. (An alternative, requiring to touch the use
     sites, would be to constrain the scratch registers to rAX...rDI and
     pass in only the last two characters of the names [e.g. "di", i.e.
     also without the leading %]. That would make it straightforward to
     use both %r.. and %e.. at the same time.)
---
v2: Drop the non-RCR alternative.

Comments

Oleksii Kurochko Jan. 27, 2025, 10:46 a.m. UTC | #1
On 1/27/25 11:15 AM, Jan Beulich wrote:
> The original implementation has two issues: For one it doesn't preserve
> non-canonical-ness of inputs in the range 0x8000000000000000 through
> 0x80007fffffffffff. Bogus guest pointers in that range would not cause a
> (#GP) fault upon access, when they should.
>
> And then there is an AMD-specific aspect, where only the low 48 bits of
> an address are used for speculative execution; the architecturally
> mandated #GP for non-canonical addresses would be raised at a later
> execution stage. Therefore to prevent Xen controlled data to make it
> into any of the caches in a guest controllable manner, we need to
> additionally ensure that for non-canonical inputs bit 47 would be clear.
>
> See the code comment for how addressing both is being achieved.
>
> Fixes: 4dc181599142 ("x86/PV: harden guest memory accesses against speculative abuse")
> Signed-off-by: Jan Beulich<jbeulich@suse.com>

With getting proper Acked-by/Reviewed-by:
   R-Acked-by: Oleksii Kurochko<oleksii.kurochko@gmail.com>

Thanks.

~ Oleksii

> ---
> RFC: When scratch2 isn't %r8...%r15, the MOV, CMOVNB, and XOR will have
>       unnecessary REX prefixes emitted, as users of the macro pass in 64-
>       bit registers. Similar to what was done to be able to use SETcc (in
>       the earlier alternative code sequence), we could derive %e.. from
>       %r.. to shrink code size some; there are a few dozen instances of
>       this code, after all. (An alternative, requiring to touch the use
>       sites, would be to constrain the scratch registers to rAX...rDI and
>       pass in only the last two characters of the names [e.g. "di", i.e.
>       also without the leading %]. That would make it straightforward to
>       use both %r.. and %e.. at the same time.)
> ---
> v2: Drop the non-RCR alternative.
>
> --- a/xen/arch/x86/include/asm/asm-defns.h
> +++ b/xen/arch/x86/include/asm/asm-defns.h
> @@ -1,3 +1,5 @@
> +#include <asm/page-bits.h>
> +
>   #ifndef HAVE_AS_CLAC_STAC
>   .macro clac
>       .byte 0x0f, 0x01, 0xca
> @@ -65,17 +67,36 @@
>   .macro guest_access_mask_ptr ptr:req, scratch1:req, scratch2:req
>   #if defined(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS)
>       /*
> -     * Here we want
> -     *
> -     * ptr &= ~0ull >> (ptr < HYPERVISOR_VIRT_END);
> -     *
> +     * Here we want to adjust \ptr such that
> +     * - if it's within Xen range, it becomes non-canonical,
> +     * - otherwise if it's (non-)canonical on input, it retains that property,
> +     * - if the result is non-canonical, bit 47 is clear (to avoid
> +     *   potentially populating the cache with Xen data),
>        * but guaranteed without any conditional branches (hence in assembly).
> +     *
> +     * To achieve this we determine which bit to forcibly clear: Either bit 47
> +     * (in case the address is below HYPERVISOR_VIRT_END) or bit 63.  Further
> +     * we determine whether for forcably set bit 63: In case we first cleared
> +     * it, we'll merely restore the original address.  In case we ended up
> +     * clearing bit 47 (i.e. the address was either non-canonical or within Xen
> +     * range), setting the bit will yield a guaranteed non-canonical address.
> +     * If we didn't clear a bit, we also won't set one: The address was in the
> +     * low half of address space in that case with bit 47 already clear.  The
> +     * address can thus be left unchanged, whether canonical or not.
>        */
>       mov $(HYPERVISOR_VIRT_END - 1), \scratch1
> -    mov $~0, \scratch2
> +    mov $(VADDR_BITS - 1), \scratch2
>       cmp \ptr, \scratch1
> +    /*
> +     * Not needed: The value we have in \scratch1 will be truncated to 6 bits,
> +     * thus yielding the value we need.
> +    mov $63, \scratch1
> +     */
> +    cmovnb \scratch2, \scratch1
> +    xor \scratch2, \scratch2
> +    btr \scratch1, \ptr
>       rcr $1, \scratch2
> -    and \scratch2, \ptr
> +    or \scratch2, \ptr
>   #elif defined(CONFIG_DEBUG) && defined(CONFIG_PV)
>       xor $~\@, \scratch1
>       xor $~\@, \scratch2
Roger Pau Monné Jan. 27, 2025, 11:31 a.m. UTC | #2
On Mon, Jan 27, 2025 at 11:15:22AM +0100, Jan Beulich wrote:
> The original implementation has two issues: For one it doesn't preserve
> non-canonical-ness of inputs in the range 0x8000000000000000 through
> 0x80007fffffffffff. Bogus guest pointers in that range would not cause a
> (#GP) fault upon access, when they should.
> 
> And then there is an AMD-specific aspect, where only the low 48 bits of
> an address are used for speculative execution; the architecturally
> mandated #GP for non-canonical addresses would be raised at a later
> execution stage. Therefore to prevent Xen controlled data to make it
> into any of the caches in a guest controllable manner, we need to
> additionally ensure that for non-canonical inputs bit 47 would be clear.
> 
> See the code comment for how addressing both is being achieved.
> 
> Fixes: 4dc181599142 ("x86/PV: harden guest memory accesses against speculative abuse")
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> RFC: When scratch2 isn't %r8...%r15, the MOV, CMOVNB, and XOR will have
>      unnecessary REX prefixes emitted, as users of the macro pass in 64-
>      bit registers. Similar to what was done to be able to use SETcc (in
>      the earlier alternative code sequence), we could derive %e.. from
>      %r.. to shrink code size some; there are a few dozen instances of
>      this code, after all. (An alternative, requiring to touch the use
>      sites, would be to constrain the scratch registers to rAX...rDI and
>      pass in only the last two characters of the names [e.g. "di", i.e.
>      also without the leading %]. That would make it straightforward to
>      use both %r.. and %e.. at the same time.)
> ---
> v2: Drop the non-RCR alternative.
> 
> --- a/xen/arch/x86/include/asm/asm-defns.h
> +++ b/xen/arch/x86/include/asm/asm-defns.h
> @@ -1,3 +1,5 @@
> +#include <asm/page-bits.h>
> +
>  #ifndef HAVE_AS_CLAC_STAC
>  .macro clac
>      .byte 0x0f, 0x01, 0xca
> @@ -65,17 +67,36 @@
>  .macro guest_access_mask_ptr ptr:req, scratch1:req, scratch2:req
>  #if defined(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS)
>      /*
> -     * Here we want
> -     *
> -     * ptr &= ~0ull >> (ptr < HYPERVISOR_VIRT_END);
> -     *
> +     * Here we want to adjust \ptr such that
> +     * - if it's within Xen range, it becomes non-canonical,
> +     * - otherwise if it's (non-)canonical on input, it retains that property,
> +     * - if the result is non-canonical, bit 47 is clear (to avoid
> +     *   potentially populating the cache with Xen data),

It's my understanding from the commit message that this toggling of
bit 47 is only done due to AMD behavior, as speculative execution
there ignores any higher than 47, and hence non-canonical inputs are
no detected when speculatively executing?

It might be worth explicitly mentioning this in the comment.

>       * but guaranteed without any conditional branches (hence in assembly).
> +     *
> +     * To achieve this we determine which bit to forcibly clear: Either bit 47
> +     * (in case the address is below HYPERVISOR_VIRT_END) or bit 63.  Further
> +     * we determine whether for forcably set bit 63: In case we first cleared
> +     * it, we'll merely restore the original address.  In case we ended up
> +     * clearing bit 47 (i.e. the address was either non-canonical or within Xen
> +     * range), setting the bit will yield a guaranteed non-canonical address.
> +     * If we didn't clear a bit, we also won't set one: The address was in the
> +     * low half of address space in that case with bit 47 already clear.  The
> +     * address can thus be left unchanged, whether canonical or not.

Since for AMD we have to do the bit 47 clearing, won't it be enough to
do something like:

ptr &= (ptr < HYPERVISOR_VIRT_END) ? ~(1ULL <<  (VADDR_BITS - 1) : ~0ULL;

So only care to clear bit 47 when ptr is below HYPERVISOR_VIRT_END?
As that would already diverge accesses into the Xen address space
without having to toggle bit 63?

Thanks, Roger.
Jan Beulich Jan. 27, 2025, 12:06 p.m. UTC | #3
On 27.01.2025 12:31, Roger Pau Monné wrote:
> On Mon, Jan 27, 2025 at 11:15:22AM +0100, Jan Beulich wrote:
>> --- a/xen/arch/x86/include/asm/asm-defns.h
>> +++ b/xen/arch/x86/include/asm/asm-defns.h
>> @@ -1,3 +1,5 @@
>> +#include <asm/page-bits.h>
>> +
>>  #ifndef HAVE_AS_CLAC_STAC
>>  .macro clac
>>      .byte 0x0f, 0x01, 0xca
>> @@ -65,17 +67,36 @@
>>  .macro guest_access_mask_ptr ptr:req, scratch1:req, scratch2:req
>>  #if defined(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS)
>>      /*
>> -     * Here we want
>> -     *
>> -     * ptr &= ~0ull >> (ptr < HYPERVISOR_VIRT_END);
>> -     *
>> +     * Here we want to adjust \ptr such that
>> +     * - if it's within Xen range, it becomes non-canonical,
>> +     * - otherwise if it's (non-)canonical on input, it retains that property,
>> +     * - if the result is non-canonical, bit 47 is clear (to avoid
>> +     *   potentially populating the cache with Xen data),
> 
> It's my understanding from the commit message that this toggling of
> bit 47 is only done due to AMD behavior, as speculative execution
> there ignores any higher than 47, and hence non-canonical inputs are
> no detected when speculatively executing?
> 
> It might be worth explicitly mentioning this in the comment.

Hmm, to be honest I'd rather not (and keep mentioning AMD to the
description). First and foremost because if I mention it here, I
strictly also ought to mention Hygon, I think. In the description I
feel a little easier about not specifically saying so. (But yes, if
you strongly think I should mention vendors here, I'd be okay-ish to
add "on AMD-like hardware" before the closing paren on the 2nd
bullet point.)

Further, with such a consideration, would we perhaps also want to
consider simplifying the transformation when AMD=n in .config? (I
could see us doing so, but not as late in a release cycle as we're
now at. Plus I say so without having thought about whether a
simplification is actually possible.)

>>       * but guaranteed without any conditional branches (hence in assembly).
>> +     *
>> +     * To achieve this we determine which bit to forcibly clear: Either bit 47
>> +     * (in case the address is below HYPERVISOR_VIRT_END) or bit 63.  Further
>> +     * we determine whether for forcably set bit 63: In case we first cleared
>> +     * it, we'll merely restore the original address.  In case we ended up
>> +     * clearing bit 47 (i.e. the address was either non-canonical or within Xen
>> +     * range), setting the bit will yield a guaranteed non-canonical address.
>> +     * If we didn't clear a bit, we also won't set one: The address was in the
>> +     * low half of address space in that case with bit 47 already clear.  The
>> +     * address can thus be left unchanged, whether canonical or not.
> 
> Since for AMD we have to do the bit 47 clearing, won't it be enough to
> do something like:
> 
> ptr &= (ptr < HYPERVISOR_VIRT_END) ? ~(1ULL <<  (VADDR_BITS - 1) : ~0ULL;
> 
> So only care to clear bit 47 when ptr is below HYPERVISOR_VIRT_END?
> As that would already diverge accesses into the Xen address space
> without having to toggle bit 63?

No, this may transform a non-canonical input into a canonical address
(accesses through which then won't #GP as expected), just for a different
address range than where we had the same mistake before.

Jan
Roger Pau Monné Jan. 27, 2025, 2:14 p.m. UTC | #4
On Mon, Jan 27, 2025 at 01:06:51PM +0100, Jan Beulich wrote:
> On 27.01.2025 12:31, Roger Pau Monné wrote:
> > On Mon, Jan 27, 2025 at 11:15:22AM +0100, Jan Beulich wrote:
> >> --- a/xen/arch/x86/include/asm/asm-defns.h
> >> +++ b/xen/arch/x86/include/asm/asm-defns.h
> >> @@ -1,3 +1,5 @@
> >> +#include <asm/page-bits.h>
> >> +
> >>  #ifndef HAVE_AS_CLAC_STAC
> >>  .macro clac
> >>      .byte 0x0f, 0x01, 0xca
> >> @@ -65,17 +67,36 @@
> >>  .macro guest_access_mask_ptr ptr:req, scratch1:req, scratch2:req
> >>  #if defined(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS)
> >>      /*
> >> -     * Here we want
> >> -     *
> >> -     * ptr &= ~0ull >> (ptr < HYPERVISOR_VIRT_END);
> >> -     *
> >> +     * Here we want to adjust \ptr such that
> >> +     * - if it's within Xen range, it becomes non-canonical,
> >> +     * - otherwise if it's (non-)canonical on input, it retains that property,
> >> +     * - if the result is non-canonical, bit 47 is clear (to avoid
> >> +     *   potentially populating the cache with Xen data),
> > 
> > It's my understanding from the commit message that this toggling of
> > bit 47 is only done due to AMD behavior, as speculative execution
> > there ignores any higher than 47, and hence non-canonical inputs are
> > no detected when speculatively executing?
> > 
> > It might be worth explicitly mentioning this in the comment.
> 
> Hmm, to be honest I'd rather not (and keep mentioning AMD to the
> description). First and foremost because if I mention it here, I
> strictly also ought to mention Hygon, I think. In the description I
> feel a little easier about not specifically saying so. (But yes, if
> you strongly think I should mention vendors here, I'd be okay-ish to
> add "on AMD-like hardware" before the closing paren on the 2nd
> bullet point.)
> 
> Further, with such a consideration, would we perhaps also want to
> consider simplifying the transformation when AMD=n in .config? (I
> could see us doing so, but not as late in a release cycle as we're
> now at. Plus I say so without having thought about whether a
> simplification is actually possible.)
> 
> >>       * but guaranteed without any conditional branches (hence in assembly).
> >> +     *
> >> +     * To achieve this we determine which bit to forcibly clear: Either bit 47
> >> +     * (in case the address is below HYPERVISOR_VIRT_END) or bit 63.  Further
> >> +     * we determine whether for forcably set bit 63: In case we first cleared
> >> +     * it, we'll merely restore the original address.  In case we ended up
> >> +     * clearing bit 47 (i.e. the address was either non-canonical or within Xen
> >> +     * range), setting the bit will yield a guaranteed non-canonical address.
> >> +     * If we didn't clear a bit, we also won't set one: The address was in the
> >> +     * low half of address space in that case with bit 47 already clear.  The
> >> +     * address can thus be left unchanged, whether canonical or not.
> > 
> > Since for AMD we have to do the bit 47 clearing, won't it be enough to
> > do something like:
> > 
> > ptr &= (ptr < HYPERVISOR_VIRT_END) ? ~(1ULL <<  (VADDR_BITS - 1) : ~0ULL;
> > 
> > So only care to clear bit 47 when ptr is below HYPERVISOR_VIRT_END?
> > As that would already diverge accesses into the Xen address space
> > without having to toggle bit 63?
> 
> No, this may transform a non-canonical input into a canonical address
> (accesses through which then won't #GP as expected), just for a different
> address range than where we had the same mistake before.

Indeed, you are right.  With the expanded comment:

Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

Thanks, Roger.
diff mbox series

Patch

--- a/xen/arch/x86/include/asm/asm-defns.h
+++ b/xen/arch/x86/include/asm/asm-defns.h
@@ -1,3 +1,5 @@ 
+#include <asm/page-bits.h>
+
 #ifndef HAVE_AS_CLAC_STAC
 .macro clac
     .byte 0x0f, 0x01, 0xca
@@ -65,17 +67,36 @@ 
 .macro guest_access_mask_ptr ptr:req, scratch1:req, scratch2:req
 #if defined(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS)
     /*
-     * Here we want
-     *
-     * ptr &= ~0ull >> (ptr < HYPERVISOR_VIRT_END);
-     *
+     * Here we want to adjust \ptr such that
+     * - if it's within Xen range, it becomes non-canonical,
+     * - otherwise if it's (non-)canonical on input, it retains that property,
+     * - if the result is non-canonical, bit 47 is clear (to avoid
+     *   potentially populating the cache with Xen data),
      * but guaranteed without any conditional branches (hence in assembly).
+     *
+     * To achieve this we determine which bit to forcibly clear: Either bit 47
+     * (in case the address is below HYPERVISOR_VIRT_END) or bit 63.  Further
+     * we determine whether for forcably set bit 63: In case we first cleared
+     * it, we'll merely restore the original address.  In case we ended up
+     * clearing bit 47 (i.e. the address was either non-canonical or within Xen
+     * range), setting the bit will yield a guaranteed non-canonical address.
+     * If we didn't clear a bit, we also won't set one: The address was in the
+     * low half of address space in that case with bit 47 already clear.  The
+     * address can thus be left unchanged, whether canonical or not.
      */
     mov $(HYPERVISOR_VIRT_END - 1), \scratch1
-    mov $~0, \scratch2
+    mov $(VADDR_BITS - 1), \scratch2
     cmp \ptr, \scratch1
+    /*
+     * Not needed: The value we have in \scratch1 will be truncated to 6 bits,
+     * thus yielding the value we need.
+    mov $63, \scratch1
+     */
+    cmovnb \scratch2, \scratch1
+    xor \scratch2, \scratch2
+    btr \scratch1, \ptr
     rcr $1, \scratch2
-    and \scratch2, \ptr
+    or \scratch2, \ptr
 #elif defined(CONFIG_DEBUG) && defined(CONFIG_PV)
     xor $~\@, \scratch1
     xor $~\@, \scratch2