Message ID | 20210506184925.290359-1-jacobhxu@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [kvm-unit-tests,v2] x86: Do not assign values to unaligned pointer to 128 bits | expand |
On Thu, May 06, 2021, Jacob Xu wrote: > When compiled with clang, the following statement gets converted into a > movaps instructions. > mem->u[0] = 5; mem->u[1] = 6; mem->u[2] = 7; mem->u[3] = 8; > > Since mem is an unaligned pointer to a union of an sse, we get a GP when > running. > > All we want is to make the values between mem and v different for this > testcase, so let's just memset the pointer at mem, and convert to > uint32_t pointer. Then the compiler will not assume the pointer is > aligned to 128 bits. > > Fixes: e5e76263b5 ("x86: add additional test cases for sse exceptions to > emulator.c") > > Signed-off-by: Jacob Xu <jacobhxu@google.com> > --- > x86/emulator.c | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/x86/emulator.c b/x86/emulator.c > index 9705073..a2c7e5b 100644 > --- a/x86/emulator.c > +++ b/x86/emulator.c > @@ -716,12 +716,12 @@ static __attribute__((target("sse2"))) void test_sse_exceptions(void *cross_mem) > > // test unaligned access for movups, movupd and movaps > v.u[0] = 1; v.u[1] = 2; v.u[2] = 3; v.u[3] = 4; > - mem->u[0] = 5; mem->u[1] = 6; mem->u[2] = 7; mem->u[3] = 8; > + memset((uint32_t *)mem, 0xdecafbad, sizeof(mem)); memset() takes a void *, which it casts to an char, i.e. it works on one byte at a time. Casting to a uint32_t won't make it write the full "0xdecafbad", it will just repease 0xad over and over. The size needs to be sizeof(*mem), i.e. the size of the object that mem points to, not the size of the pointer's storage. > asm("movups %1, %0" : "=m"(*mem) : "x"(v.sse)); > report(sseeq(&v, mem), "movups unaligned"); > > v.u[0] = 1; v.u[1] = 2; v.u[2] = 3; v.u[3] = 4; > - mem->u[0] = 5; mem->u[1] = 6; mem->u[2] = 7; mem->u[3] = 8; > + memset((uint32_t *)mem, 0xdecafbad, sizeof(mem)); > asm("movupd %1, %0" : "=m"(*mem) : "x"(v.sse)); > report(sseeq(&v, mem), "movupd unaligned"); > exceptions = 0; > @@ -734,7 +734,7 @@ static __attribute__((target("sse2"))) void test_sse_exceptions(void *cross_mem) > // setup memory for cross page access > mem = (sse_union *)(&bytes[4096-8]); > v.u[0] = 1; v.u[1] = 2; v.u[2] = 3; v.u[3] = 4; > - mem->u[0] = 5; mem->u[1] = 6; mem->u[2] = 7; mem->u[3] = 8; > + memset((uint32_t *)mem, 0xdecafbad, sizeof(mem)); > > asm("movups %1, %0" : "=m"(*mem) : "x"(v.sse)); > report(sseeq(&v, mem), "movups unaligned crosspage"); > -- > 2.31.1.607.g51e8a6a459-goog >
> memset() takes a void *, which it casts to an char, i.e. it works on one byte at a time. Huh, TIL. Based on this I'd thought that I don't need a cast at all, but doing so actually results in a movaps instruction. I've changed the cast back to (uint8_t *). > The size needs to be sizeof(*mem) Bah, thanks for catching that. I've updated it below. --- x86/emulator.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/x86/emulator.c b/x86/emulator.c index 9705073..ea23ef1 100644 --- a/x86/emulator.c +++ b/x86/emulator.c @@ -716,12 +716,12 @@ static __attribute__((target("sse2"))) void test_sse_exceptions(void *cross_mem) // test unaligned access for movups, movupd and movaps v.u[0] = 1; v.u[1] = 2; v.u[2] = 3; v.u[3] = 4; - mem->u[0] = 5; mem->u[1] = 6; mem->u[2] = 7; mem->u[3] = 8; + memset((uint8_t *)mem, 5, sizeof(*mem)); asm("movups %1, %0" : "=m"(*mem) : "x"(v.sse)); report(sseeq(&v, mem), "movups unaligned"); v.u[0] = 1; v.u[1] = 2; v.u[2] = 3; v.u[3] = 4; - mem->u[0] = 5; mem->u[1] = 6; mem->u[2] = 7; mem->u[3] = 8; + memset((uint8_t *)mem, 5, sizeof(*mem)); asm("movupd %1, %0" : "=m"(*mem) : "x"(v.sse)); report(sseeq(&v, mem), "movupd unaligned"); exceptions = 0; @@ -734,7 +734,7 @@ static __attribute__((target("sse2"))) void test_sse_exceptions(void *cross_mem) // setup memory for cross page access mem = (sse_union *)(&bytes[4096-8]); v.u[0] = 1; v.u[1] = 2; v.u[2] = 3; v.u[3] = 4; - mem->u[0] = 5; mem->u[1] = 6; mem->u[2] = 7; mem->u[3] = 8; + memset((uint8_t *)mem, 5, sizeof(*mem)); asm("movups %1, %0" : "=m"(*mem) : "x"(v.sse)); report(sseeq(&v, mem), "movups unaligned crosspage"); -- On Thu, May 6, 2021 at 11:58 AM Sean Christopherson <seanjc@google.com> wrote: > > On Thu, May 06, 2021, Jacob Xu wrote: > > When compiled with clang, the following statement gets converted into a > > movaps instructions. > > mem->u[0] = 5; mem->u[1] = 6; mem->u[2] = 7; mem->u[3] = 8; > > > > Since mem is an unaligned pointer to a union of an sse, we get a GP when > > running. > > > > All we want is to make the values between mem and v different for this > > testcase, so let's just memset the pointer at mem, and convert to > > uint32_t pointer. Then the compiler will not assume the pointer is > > aligned to 128 bits. > > > > Fixes: e5e76263b5 ("x86: add additional test cases for sse exceptions to > > emulator.c") > > > > Signed-off-by: Jacob Xu <jacobhxu@google.com> > > --- > > x86/emulator.c | 6 +++--- > > 1 file changed, 3 insertions(+), 3 deletions(-) > > > > diff --git a/x86/emulator.c b/x86/emulator.c > > index 9705073..a2c7e5b 100644 > > --- a/x86/emulator.c > > +++ b/x86/emulator.c > > @@ -716,12 +716,12 @@ static __attribute__((target("sse2"))) void test_sse_exceptions(void *cross_mem) > > > > // test unaligned access for movups, movupd and movaps > > v.u[0] = 1; v.u[1] = 2; v.u[2] = 3; v.u[3] = 4; > > - mem->u[0] = 5; mem->u[1] = 6; mem->u[2] = 7; mem->u[3] = 8; > > + memset((uint32_t *)mem, 0xdecafbad, sizeof(mem)); > > memset() takes a void *, which it casts to an char, i.e. it works on one byte at > a time. Casting to a uint32_t won't make it write the full "0xdecafbad", it will > just repease 0xad over and over. > > The size needs to be sizeof(*mem), i.e. the size of the object that mem points to, > not the size of the pointer's storage. > > > asm("movups %1, %0" : "=m"(*mem) : "x"(v.sse)); > > report(sseeq(&v, mem), "movups unaligned"); > > > > v.u[0] = 1; v.u[1] = 2; v.u[2] = 3; v.u[3] = 4; > > - mem->u[0] = 5; mem->u[1] = 6; mem->u[2] = 7; mem->u[3] = 8; > > + memset((uint32_t *)mem, 0xdecafbad, sizeof(mem)); > > asm("movupd %1, %0" : "=m"(*mem) : "x"(v.sse)); > > report(sseeq(&v, mem), "movupd unaligned"); > > exceptions = 0; > > @@ -734,7 +734,7 @@ static __attribute__((target("sse2"))) void test_sse_exceptions(void *cross_mem) > > // setup memory for cross page access > > mem = (sse_union *)(&bytes[4096-8]); > > v.u[0] = 1; v.u[1] = 2; v.u[2] = 3; v.u[3] = 4; > > - mem->u[0] = 5; mem->u[1] = 6; mem->u[2] = 7; mem->u[3] = 8; > > + memset((uint32_t *)mem, 0xdecafbad, sizeof(mem)); > > > > asm("movups %1, %0" : "=m"(*mem) : "x"(v.sse)); > > report(sseeq(&v, mem), "movups unaligned crosspage"); > > -- > > 2.31.1.607.g51e8a6a459-goog > >
On Thu, May 06, 2021, Jacob Xu wrote: > > memset() takes a void *, which it casts to an char, i.e. it works on one byte at > a time. > Huh, TIL. Based on this I'd thought that I don't need a cast at all, > but doing so actually results in a movaps instruction. Ewwww. That's likely because emulator.c does: #define memset __builtin_memset and the compiler is clever enough to know that __attribute__((vector_size(16))) means the variable is (supposed to be) aligned. > I've changed the cast back to (uint8_t *). I assume removing the above #define and grabbing memset() from string.c fixes the movaps generation? If so, that has my vote, as opposed to fudging around the compiler by casting to uint8_t *. As evidenced by this issue, using the compiler's memset() in kvm-unit-tests seems inherently dangerous since the tests are often doing intentionally stupid things.
On Thu, May 6, 2021 at 12:14 PM Jacob Xu <jacobhxu@google.com> wrote: > > > memset() takes a void *, which it casts to an char, i.e. it works on one byte at > a time. > Huh, TIL. Based on this I'd thought that I don't need a cast at all, > but doing so actually results in a movaps instruction. > I've changed the cast back to (uint8_t *). I'm pretty sure you're just getting lucky. If 'mem' is not 16-byte aligned, the behavior of the code is undefined. The compiler does not have to discard what it can infer about the alignment just because you cast 'mem' to a type with weaker alignment constraints. Why does 'mem' need to have type 'sse_union *'? Why can't it just be declared as 'uint8_t *'? Just add a "memory" clobbers to the inline asm statements that use 'mem' as an SSE operand. Of course, passing it as an argument to sseeq() also implies 16-byte alignment. Perhaps sseeq should take uint32_t pointers as arguments rather than sse_union pointers. I'm not convinced that the sse_union buys us anything other than trouble.
> The compiler does not > have to discard what it can infer about the alignment just because you > cast 'mem' to a type with weaker alignment constraints. > > Why does 'mem' need to have type 'sse_union *'? Why can't it just be > declared as 'uint8_t *'? Huh, I see. I'll just delete sse_union then and use uint32_t instead. > Ewwww. That's likely because emulator.c does: > #define memset __builtin_memset > As evidenced by this issue, using the compiler's memset() in kvm-unit-tests seems > inherently dangerous since the tests are often doing intentionally stupid things. I'll make a separate patch to remove this from emulator.c On Thu, May 6, 2021 at 1:11 PM Jim Mattson <jmattson@google.com> wrote: > > On Thu, May 6, 2021 at 12:14 PM Jacob Xu <jacobhxu@google.com> wrote: > > > > > memset() takes a void *, which it casts to an char, i.e. it works on one byte at > > a time. > > Huh, TIL. Based on this I'd thought that I don't need a cast at all, > > but doing so actually results in a movaps instruction. > > I've changed the cast back to (uint8_t *). > > I'm pretty sure you're just getting lucky. If 'mem' is not 16-byte > aligned, the behavior of the code is undefined. The compiler does not > have to discard what it can infer about the alignment just because you > cast 'mem' to a type with weaker alignment constraints. > > Why does 'mem' need to have type 'sse_union *'? Why can't it just be > declared as 'uint8_t *'? Just add a "memory" clobbers to the inline > asm statements that use 'mem' as an SSE operand. > > Of course, passing it as an argument to sseeq() also implies 16-byte > alignment. Perhaps sseeq should take uint32_t pointers as arguments > rather than sse_union pointers. I'm not convinced that the sse_union > buys us anything other than trouble.
diff --git a/x86/emulator.c b/x86/emulator.c index 9705073..a2c7e5b 100644 --- a/x86/emulator.c +++ b/x86/emulator.c @@ -716,12 +716,12 @@ static __attribute__((target("sse2"))) void test_sse_exceptions(void *cross_mem) // test unaligned access for movups, movupd and movaps v.u[0] = 1; v.u[1] = 2; v.u[2] = 3; v.u[3] = 4; - mem->u[0] = 5; mem->u[1] = 6; mem->u[2] = 7; mem->u[3] = 8; + memset((uint32_t *)mem, 0xdecafbad, sizeof(mem)); asm("movups %1, %0" : "=m"(*mem) : "x"(v.sse)); report(sseeq(&v, mem), "movups unaligned"); v.u[0] = 1; v.u[1] = 2; v.u[2] = 3; v.u[3] = 4; - mem->u[0] = 5; mem->u[1] = 6; mem->u[2] = 7; mem->u[3] = 8; + memset((uint32_t *)mem, 0xdecafbad, sizeof(mem)); asm("movupd %1, %0" : "=m"(*mem) : "x"(v.sse)); report(sseeq(&v, mem), "movupd unaligned"); exceptions = 0; @@ -734,7 +734,7 @@ static __attribute__((target("sse2"))) void test_sse_exceptions(void *cross_mem) // setup memory for cross page access mem = (sse_union *)(&bytes[4096-8]); v.u[0] = 1; v.u[1] = 2; v.u[2] = 3; v.u[3] = 4; - mem->u[0] = 5; mem->u[1] = 6; mem->u[2] = 7; mem->u[3] = 8; + memset((uint32_t *)mem, 0xdecafbad, sizeof(mem)); asm("movups %1, %0" : "=m"(*mem) : "x"(v.sse)); report(sseeq(&v, mem), "movups unaligned crosspage");
When compiled with clang, the following statement gets converted into a movaps instructions. mem->u[0] = 5; mem->u[1] = 6; mem->u[2] = 7; mem->u[3] = 8; Since mem is an unaligned pointer to a union of an sse, we get a GP when running. All we want is to make the values between mem and v different for this testcase, so let's just memset the pointer at mem, and convert to uint32_t pointer. Then the compiler will not assume the pointer is aligned to 128 bits. Fixes: e5e76263b5 ("x86: add additional test cases for sse exceptions to emulator.c") Signed-off-by: Jacob Xu <jacobhxu@google.com> --- x86/emulator.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)