Message ID | 20240619154530.163232-34-iii@linux.ibm.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | kmsan: Enable on s390 | expand |
On Wed, Jun 19, 2024 at 5:45 PM Ilya Leoshkevich <iii@linux.ibm.com> wrote: > > put_user() uses inline assembly with precise constraints, so Clang is > in principle capable of instrumenting it automatically. Unfortunately, > one of the constraints contains a dereferenced user pointer, and Clang > does not currently distinguish user and kernel pointers. Therefore > KMSAN attempts to access shadow for user pointers, which is not a right > thing to do. By the way, how does this problem manifest? I was expecting KMSAN to generate dummy shadow accesses in this case, and reading/writing 1-8 bytes from dummy shadow shouldn't be a problem. (On the other hand, not inlining the get_user/put_user functions is probably still faster than retrieving the dummy shadow, so I'm fine either way) > > An obvious fix to add __no_sanitize_memory to __put_user_fn() does not > work, since it's __always_inline. And __always_inline cannot be removed > due to the __put_user_bad() trick. > > A different obvious fix of using the "a" instead of the "+Q" constraint > degrades the code quality, which is very important here, since it's a > hot path. > > Instead, repurpose the __put_user_asm() macro to define > __put_user_{char,short,int,long}_noinstr() functions and mark them with > __no_sanitize_memory. For the non-KMSAN builds make them > __always_inline in order to keep the generated code quality. Also > define __put_user_{char,short,int,long}() functions, which call the > aforementioned ones and which *are* instrumented, because they call > KMSAN hooks, which may be implemented as macros. > > The same applies to get_user() as well. > > Acked-by: Heiko Carstens <hca@linux.ibm.com> > Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com> > --- > arch/s390/include/asm/uaccess.h | 111 +++++++++++++++++++++++--------- > 1 file changed, 79 insertions(+), 32 deletions(-) > > diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h > index 81ae8a98e7ec..70f0edc00c2a 100644 > --- a/arch/s390/include/asm/uaccess.h > +++ b/arch/s390/include/asm/uaccess.h > @@ -78,13 +78,24 @@ union oac { > > int __noreturn __put_user_bad(void); > > -#define __put_user_asm(to, from, size) \ > -({ \ > +#ifdef CONFIG_KMSAN > +#define get_put_user_noinstr_attributes \ > + noinline __maybe_unused __no_sanitize_memory > +#else > +#define get_put_user_noinstr_attributes __always_inline > +#endif > + > +#define DEFINE_PUT_USER(type) \ > +static get_put_user_noinstr_attributes int \ > +__put_user_##type##_noinstr(unsigned type __user *to, \ > + unsigned type *from, \ > + unsigned long size) \ > +{ \ > union oac __oac_spec = { \ > .oac1.as = PSW_BITS_AS_SECONDARY, \ > .oac1.a = 1, \ > }; \ > - int __rc; \ > + int rc; \ > \ > asm volatile( \ > " lr 0,%[spec]\n" \ > @@ -93,12 +104,28 @@ int __noreturn __put_user_bad(void); > "2:\n" \ > EX_TABLE_UA_STORE(0b, 2b, %[rc]) \ > EX_TABLE_UA_STORE(1b, 2b, %[rc]) \ > - : [rc] "=&d" (__rc), [_to] "+Q" (*(to)) \ > + : [rc] "=&d" (rc), [_to] "+Q" (*(to)) \ > : [_size] "d" (size), [_from] "Q" (*(from)), \ > [spec] "d" (__oac_spec.val) \ > : "cc", "0"); \ > - __rc; \ > -}) > + return rc; \ > +} \ > + \ > +static __always_inline int \ > +__put_user_##type(unsigned type __user *to, unsigned type *from, \ > + unsigned long size) \ > +{ \ > + int rc; \ > + \ > + rc = __put_user_##type##_noinstr(to, from, size); \ > + instrument_put_user(*from, to, size); \ > + return rc; \ > +} > + > +DEFINE_PUT_USER(char); > +DEFINE_PUT_USER(short); > +DEFINE_PUT_USER(int); > +DEFINE_PUT_USER(long); > > static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) > { > @@ -106,24 +133,24 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon > > switch (size) { > case 1: > - rc = __put_user_asm((unsigned char __user *)ptr, > - (unsigned char *)x, > - size); > + rc = __put_user_char((unsigned char __user *)ptr, > + (unsigned char *)x, > + size); > break; > case 2: > - rc = __put_user_asm((unsigned short __user *)ptr, > - (unsigned short *)x, > - size); > + rc = __put_user_short((unsigned short __user *)ptr, > + (unsigned short *)x, > + size); > break; > case 4: > - rc = __put_user_asm((unsigned int __user *)ptr, > + rc = __put_user_int((unsigned int __user *)ptr, > (unsigned int *)x, > size); > break; > case 8: > - rc = __put_user_asm((unsigned long __user *)ptr, > - (unsigned long *)x, > - size); > + rc = __put_user_long((unsigned long __user *)ptr, > + (unsigned long *)x, > + size); > break; > default: > __put_user_bad(); > @@ -134,13 +161,17 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon > > int __noreturn __get_user_bad(void); > > -#define __get_user_asm(to, from, size) \ > -({ \ > +#define DEFINE_GET_USER(type) \ > +static get_put_user_noinstr_attributes int \ > +__get_user_##type##_noinstr(unsigned type *to, \ > + unsigned type __user *from, \ > + unsigned long size) \ > +{ \ > union oac __oac_spec = { \ > .oac2.as = PSW_BITS_AS_SECONDARY, \ > .oac2.a = 1, \ > }; \ > - int __rc; \ > + int rc; \ > \ > asm volatile( \ > " lr 0,%[spec]\n" \ > @@ -149,13 +180,29 @@ int __noreturn __get_user_bad(void); > "2:\n" \ > EX_TABLE_UA_LOAD_MEM(0b, 2b, %[rc], %[_to], %[_ksize]) \ > EX_TABLE_UA_LOAD_MEM(1b, 2b, %[rc], %[_to], %[_ksize]) \ > - : [rc] "=&d" (__rc), "=Q" (*(to)) \ > + : [rc] "=&d" (rc), "=Q" (*(to)) \ > : [_size] "d" (size), [_from] "Q" (*(from)), \ > [spec] "d" (__oac_spec.val), [_to] "a" (to), \ > [_ksize] "K" (size) \ > : "cc", "0"); \ > - __rc; \ > -}) > + return rc; \ > +} \ > + \ > +static __always_inline int \ > +__get_user_##type(unsigned type *to, unsigned type __user *from, \ > + unsigned long size) \ > +{ \ > + int rc; \ > + \ > + rc = __get_user_##type##_noinstr(to, from, size); \ > + instrument_get_user(*to); \ > + return rc; \ > +} > + > +DEFINE_GET_USER(char); > +DEFINE_GET_USER(short); > +DEFINE_GET_USER(int); > +DEFINE_GET_USER(long); > > static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) > { > @@ -163,24 +210,24 @@ static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsign > > switch (size) { > case 1: > - rc = __get_user_asm((unsigned char *)x, > - (unsigned char __user *)ptr, > - size); > + rc = __get_user_char((unsigned char *)x, > + (unsigned char __user *)ptr, > + size); > break; > case 2: > - rc = __get_user_asm((unsigned short *)x, > - (unsigned short __user *)ptr, > - size); > + rc = __get_user_short((unsigned short *)x, > + (unsigned short __user *)ptr, > + size); > break; > case 4: > - rc = __get_user_asm((unsigned int *)x, > + rc = __get_user_int((unsigned int *)x, > (unsigned int __user *)ptr, > size); > break; > case 8: > - rc = __get_user_asm((unsigned long *)x, > - (unsigned long __user *)ptr, > - size); > + rc = __get_user_long((unsigned long *)x, > + (unsigned long __user *)ptr, > + size); > break; > default: > __get_user_bad(); > -- > 2.45.1 > > -- > You received this message because you are subscribed to the Google Groups "kasan-dev" group. > To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@googlegroups.com. > To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/20240619154530.163232-34-iii%40linux.ibm.com.
On Thu, Jun 20, 2024 at 1:19 PM Ilya Leoshkevich <iii@linux.ibm.com> wrote: > > On Thu, 2024-06-20 at 10:36 +0200, Alexander Potapenko wrote: > > On Wed, Jun 19, 2024 at 5:45 PM Ilya Leoshkevich <iii@linux.ibm.com> > > wrote: > > > > > > put_user() uses inline assembly with precise constraints, so Clang > > > is > > > in principle capable of instrumenting it automatically. > > > Unfortunately, > > > one of the constraints contains a dereferenced user pointer, and > > > Clang > > > does not currently distinguish user and kernel pointers. Therefore > > > KMSAN attempts to access shadow for user pointers, which is not a > > > right > > > thing to do. > > > > By the way, how does this problem manifest? > > I was expecting KMSAN to generate dummy shadow accesses in this case, > > and reading/writing 1-8 bytes from dummy shadow shouldn't be a > > problem. > > > > (On the other hand, not inlining the get_user/put_user functions is > > probably still faster than retrieving the dummy shadow, so I'm fine > > either way) > > We have two problems here: not only clang can't distinguish user and > kernel pointers, the KMSAN runtime - which is supposed to clean that > up - can't do that either due to overlapping kernel and user address > spaces on s390. So the instrumentation ultimately tries to access the > real shadow. > > I forgot what the consequences of that were exactly, so I reverted the > patch and now I get: > > Unable to handle kernel pointer dereference in virtual kernel address > space > Failing address: 000003fed25fa000 TEID: 000003fed25fa403 > Fault in home space mode while using kernel ASCE. > AS:0000000005a70007 R3:00000000824d8007 S:0000000000000020 > Oops: 0010 ilc:2 [#1] SMP > Modules linked in: > CPU: 3 PID: 1 Comm: init Tainted: G B N 6.10.0-rc4- > g8aadb00f495e #11 > Hardware name: IBM 3931 A01 704 (KVM/Linux) > Krnl PSW : 0704c00180000000 000003ffe288975a (memset+0x3a/0xa0) > R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 > Krnl GPRS: 0000000000000000 000003fed25fa180 000003fed25fa180 > 000003ffe28897a6 > 0000000000000007 000003ffe0000000 0000000000000000 > 000002ee06e68190 > 000002ee06f19000 000003fed25fa180 000003ffd25fa180 > 000003ffd25fa180 > 0000000000000008 0000000000000000 000003ffe17262e0 > 0000037ee000f730 > Krnl Code: 000003ffe288974c: 41101100 la %r1,256(%r1) > 000003ffe2889750: a737fffb brctg > %r3,000003ffe2889746 > #000003ffe2889754: c03000000029 larl > %r3,000003ffe28897a6 > >000003ffe288975a: 44403000 ex %r4,0(%r3) > 000003ffe288975e: 07fe bcr 15,%r14 > 000003ffe2889760: a74f0001 cghi %r4,1 > 000003ffe2889764: b9040012 lgr %r1,%r2 > 000003ffe2889768: a784001c brc > 8,000003ffe28897a0 > Call Trace: > [<000003ffe288975a>] memset+0x3a/0xa0 > ([<000003ffe17262bc>] kmsan_internal_set_shadow_origin+0x21c/0x3a0) > [<000003ffe1725fb6>] kmsan_internal_unpoison_memory+0x26/0x30 > [<000003ffe1c1c646>] create_elf_tables+0x13c6/0x2620 > [<000003ffe1c0ebaa>] load_elf_binary+0x50da/0x68f0 > [<000003ffe18c41fc>] bprm_execve+0x201c/0x2f40 > [<000003ffe18bff9a>] kernel_execve+0x2cda/0x2d00 > [<000003ffe49b745a>] kernel_init+0x9ba/0x1630 > [<000003ffe000cd5c>] __ret_from_fork+0xbc/0x180 > [<000003ffe4a1907a>] ret_from_fork+0xa/0x30 > Last Breaking-Event-Address: > [<000003ffe2889742>] memset+0x22/0xa0 > Kernel panic - not syncing: Fatal exception: panic_on_oops > > So is_bad_asm_addr() returned false for a userspace address. > Why? Because it happened to collide with the kernel modules area: > precisely the effect of overlapping. > > VMALLOC_START: 0x37ee0000000 > VMALLOC_END: 0x3a960000000 > MODULES_VADDR: 0x3ff60000000 > Address: 0x3ffd157a580 > MODULES_END: 0x3ffe0000000 I see, thanks for the clarification! > Now the question is, why do we crash when accessing shadow for modules? > I'll need to investigate, this does not look normal. But even if that > worked, we clearly wouldn't want userspace accesses to pollute module > shadow, so I think we need this patch in its current form. Ok, it indeed makes sense. Reviewed-by: Alexander Potapenko <glider@google.com>
On Thu, 2024-06-20 at 13:19 +0200, Ilya Leoshkevich wrote: > On Thu, 2024-06-20 at 10:36 +0200, Alexander Potapenko wrote: > > On Wed, Jun 19, 2024 at 5:45 PM Ilya Leoshkevich > > <iii@linux.ibm.com> > > wrote: > > > > > > put_user() uses inline assembly with precise constraints, so > > > Clang > > > is > > > in principle capable of instrumenting it automatically. > > > Unfortunately, > > > one of the constraints contains a dereferenced user pointer, and > > > Clang > > > does not currently distinguish user and kernel pointers. > > > Therefore > > > KMSAN attempts to access shadow for user pointers, which is not a > > > right > > > thing to do. > > > > By the way, how does this problem manifest? > > I was expecting KMSAN to generate dummy shadow accesses in this > > case, > > and reading/writing 1-8 bytes from dummy shadow shouldn't be a > > problem. > > > > (On the other hand, not inlining the get_user/put_user functions is > > probably still faster than retrieving the dummy shadow, so I'm fine > > either way) > > We have two problems here: not only clang can't distinguish user and > kernel pointers, the KMSAN runtime - which is supposed to clean that > up - can't do that either due to overlapping kernel and user address > spaces on s390. So the instrumentation ultimately tries to access the > real shadow. > > I forgot what the consequences of that were exactly, so I reverted > the > patch and now I get: > > Unable to handle kernel pointer dereference in virtual kernel address > space > Failing address: 000003fed25fa000 TEID: 000003fed25fa403 > Fault in home space mode while using kernel ASCE. > AS:0000000005a70007 R3:00000000824d8007 S:0000000000000020 > Oops: 0010 ilc:2 [#1] SMP > Modules linked in: > CPU: 3 PID: 1 Comm: init Tainted: G B N 6.10.0-rc4- > g8aadb00f495e #11 > Hardware name: IBM 3931 A01 704 (KVM/Linux) > Krnl PSW : 0704c00180000000 000003ffe288975a (memset+0x3a/0xa0) > R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 > EA:3 > Krnl GPRS: 0000000000000000 000003fed25fa180 000003fed25fa180 > 000003ffe28897a6 > 0000000000000007 000003ffe0000000 0000000000000000 > 000002ee06e68190 > 000002ee06f19000 000003fed25fa180 000003ffd25fa180 > 000003ffd25fa180 > 0000000000000008 0000000000000000 000003ffe17262e0 > 0000037ee000f730 > Krnl Code: 000003ffe288974c: 41101100 la %r1,256(%r1) > 000003ffe2889750: a737fffb brctg > %r3,000003ffe2889746 > #000003ffe2889754: c03000000029 larl > %r3,000003ffe28897a6 > >000003ffe288975a: 44403000 ex %r4,0(%r3) > 000003ffe288975e: 07fe bcr 15,%r14 > 000003ffe2889760: a74f0001 cghi %r4,1 > 000003ffe2889764: b9040012 lgr %r1,%r2 > 000003ffe2889768: a784001c brc > 8,000003ffe28897a0 > Call Trace: > [<000003ffe288975a>] memset+0x3a/0xa0 > ([<000003ffe17262bc>] kmsan_internal_set_shadow_origin+0x21c/0x3a0) > [<000003ffe1725fb6>] kmsan_internal_unpoison_memory+0x26/0x30 > [<000003ffe1c1c646>] create_elf_tables+0x13c6/0x2620 > [<000003ffe1c0ebaa>] load_elf_binary+0x50da/0x68f0 > [<000003ffe18c41fc>] bprm_execve+0x201c/0x2f40 > [<000003ffe18bff9a>] kernel_execve+0x2cda/0x2d00 > [<000003ffe49b745a>] kernel_init+0x9ba/0x1630 > [<000003ffe000cd5c>] __ret_from_fork+0xbc/0x180 > [<000003ffe4a1907a>] ret_from_fork+0xa/0x30 > Last Breaking-Event-Address: > [<000003ffe2889742>] memset+0x22/0xa0 > Kernel panic - not syncing: Fatal exception: panic_on_oops > > So is_bad_asm_addr() returned false for a userspace address. > Why? Because it happened to collide with the kernel modules area: > precisely the effect of overlapping. > > VMALLOC_START: 0x37ee0000000 > VMALLOC_END: 0x3a960000000 > MODULES_VADDR: 0x3ff60000000 > Address: 0x3ffd157a580 > MODULES_END: 0x3ffe0000000 > > Now the question is, why do we crash when accessing shadow for > modules? So, Alexander G. and I have figured it out. KMSAN maps vmalloc/modules metadata lazily - when the corresponding memory is allocated. Here we have a completely random address that did not come from a prior vmalloc()/execmem_alloc(), so the corresponding metadata pages are missing. We could probably detect this situation and perform the lazy initialization in this case as well, but I don't know if it's worth the effort. > I'll need to investigate, this does not look normal. But even if that > worked, we clearly wouldn't want userspace accesses to pollute module > shadow, so I think we need this patch in its current form. > > [...]
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 81ae8a98e7ec..70f0edc00c2a 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -78,13 +78,24 @@ union oac { int __noreturn __put_user_bad(void); -#define __put_user_asm(to, from, size) \ -({ \ +#ifdef CONFIG_KMSAN +#define get_put_user_noinstr_attributes \ + noinline __maybe_unused __no_sanitize_memory +#else +#define get_put_user_noinstr_attributes __always_inline +#endif + +#define DEFINE_PUT_USER(type) \ +static get_put_user_noinstr_attributes int \ +__put_user_##type##_noinstr(unsigned type __user *to, \ + unsigned type *from, \ + unsigned long size) \ +{ \ union oac __oac_spec = { \ .oac1.as = PSW_BITS_AS_SECONDARY, \ .oac1.a = 1, \ }; \ - int __rc; \ + int rc; \ \ asm volatile( \ " lr 0,%[spec]\n" \ @@ -93,12 +104,28 @@ int __noreturn __put_user_bad(void); "2:\n" \ EX_TABLE_UA_STORE(0b, 2b, %[rc]) \ EX_TABLE_UA_STORE(1b, 2b, %[rc]) \ - : [rc] "=&d" (__rc), [_to] "+Q" (*(to)) \ + : [rc] "=&d" (rc), [_to] "+Q" (*(to)) \ : [_size] "d" (size), [_from] "Q" (*(from)), \ [spec] "d" (__oac_spec.val) \ : "cc", "0"); \ - __rc; \ -}) + return rc; \ +} \ + \ +static __always_inline int \ +__put_user_##type(unsigned type __user *to, unsigned type *from, \ + unsigned long size) \ +{ \ + int rc; \ + \ + rc = __put_user_##type##_noinstr(to, from, size); \ + instrument_put_user(*from, to, size); \ + return rc; \ +} + +DEFINE_PUT_USER(char); +DEFINE_PUT_USER(short); +DEFINE_PUT_USER(int); +DEFINE_PUT_USER(long); static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) { @@ -106,24 +133,24 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon switch (size) { case 1: - rc = __put_user_asm((unsigned char __user *)ptr, - (unsigned char *)x, - size); + rc = __put_user_char((unsigned char __user *)ptr, + (unsigned char *)x, + size); break; case 2: - rc = __put_user_asm((unsigned short __user *)ptr, - (unsigned short *)x, - size); + rc = __put_user_short((unsigned short __user *)ptr, + (unsigned short *)x, + size); break; case 4: - rc = __put_user_asm((unsigned int __user *)ptr, + rc = __put_user_int((unsigned int __user *)ptr, (unsigned int *)x, size); break; case 8: - rc = __put_user_asm((unsigned long __user *)ptr, - (unsigned long *)x, - size); + rc = __put_user_long((unsigned long __user *)ptr, + (unsigned long *)x, + size); break; default: __put_user_bad(); @@ -134,13 +161,17 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon int __noreturn __get_user_bad(void); -#define __get_user_asm(to, from, size) \ -({ \ +#define DEFINE_GET_USER(type) \ +static get_put_user_noinstr_attributes int \ +__get_user_##type##_noinstr(unsigned type *to, \ + unsigned type __user *from, \ + unsigned long size) \ +{ \ union oac __oac_spec = { \ .oac2.as = PSW_BITS_AS_SECONDARY, \ .oac2.a = 1, \ }; \ - int __rc; \ + int rc; \ \ asm volatile( \ " lr 0,%[spec]\n" \ @@ -149,13 +180,29 @@ int __noreturn __get_user_bad(void); "2:\n" \ EX_TABLE_UA_LOAD_MEM(0b, 2b, %[rc], %[_to], %[_ksize]) \ EX_TABLE_UA_LOAD_MEM(1b, 2b, %[rc], %[_to], %[_ksize]) \ - : [rc] "=&d" (__rc), "=Q" (*(to)) \ + : [rc] "=&d" (rc), "=Q" (*(to)) \ : [_size] "d" (size), [_from] "Q" (*(from)), \ [spec] "d" (__oac_spec.val), [_to] "a" (to), \ [_ksize] "K" (size) \ : "cc", "0"); \ - __rc; \ -}) + return rc; \ +} \ + \ +static __always_inline int \ +__get_user_##type(unsigned type *to, unsigned type __user *from, \ + unsigned long size) \ +{ \ + int rc; \ + \ + rc = __get_user_##type##_noinstr(to, from, size); \ + instrument_get_user(*to); \ + return rc; \ +} + +DEFINE_GET_USER(char); +DEFINE_GET_USER(short); +DEFINE_GET_USER(int); +DEFINE_GET_USER(long); static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) { @@ -163,24 +210,24 @@ static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsign switch (size) { case 1: - rc = __get_user_asm((unsigned char *)x, - (unsigned char __user *)ptr, - size); + rc = __get_user_char((unsigned char *)x, + (unsigned char __user *)ptr, + size); break; case 2: - rc = __get_user_asm((unsigned short *)x, - (unsigned short __user *)ptr, - size); + rc = __get_user_short((unsigned short *)x, + (unsigned short __user *)ptr, + size); break; case 4: - rc = __get_user_asm((unsigned int *)x, + rc = __get_user_int((unsigned int *)x, (unsigned int __user *)ptr, size); break; case 8: - rc = __get_user_asm((unsigned long *)x, - (unsigned long __user *)ptr, - size); + rc = __get_user_long((unsigned long *)x, + (unsigned long __user *)ptr, + size); break; default: __get_user_bad();