Message ID | 20220826150807.723137-5-glider@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Add KernelMemorySanitizer infrastructure | expand |
On Fri, 26 Aug 2022 17:07:27 +0200 Alexander Potapenko <glider@google.com> wrote: > Use hooks from instrumented.h to notify bug detection tools about > usercopy events in variations of get_user() and put_user(). And this one blows up x86_64 allmodconfig builds. > --- a/arch/x86/include/asm/uaccess.h > +++ b/arch/x86/include/asm/uaccess.h > @@ -5,6 +5,7 @@ > * User space memory access functions > */ > #include <linux/compiler.h> > +#include <linux/instrumented.h> > #include <linux/kasan-checks.h> > #include <linux/string.h> > #include <asm/asm.h> instrumented.h looks like a higher-level thing than uaccess.h, so this inclusion is an inappropriate layering. Or maybe not. In file included from ./include/linux/kernel.h:22, from ./arch/x86/include/asm/percpu.h:27, from ./arch/x86/include/asm/nospec-branch.h:14, from ./arch/x86/include/asm/paravirt_types.h:40, from ./arch/x86/include/asm/ptrace.h:97, from ./arch/x86/include/asm/math_emu.h:5, from ./arch/x86/include/asm/processor.h:13, from ./arch/x86/include/asm/timex.h:5, from ./include/linux/timex.h:67, from ./include/linux/time32.h:13, from ./include/linux/time.h:60, from ./include/linux/stat.h:19, from ./include/linux/module.h:13, from init/do_mounts.c:2: ./include/linux/page-flags.h: In function 'page_fixed_fake_head': ./include/linux/page-flags.h:226:36: error: invalid use of undefined type 'const struct page' 226 | test_bit(PG_head, &page->flags)) { | ^~ [25000 lines snipped] And kmsan-add-kmsan-runtime-core.patch introduces additional build errors with x86_64 allmodconfig. This is all with CONFIG_KMSAN=n I'll disable the patch series. Please do much more compilation testing - multiple architectures, allnoconfig, allmodconfig, allyesconfig, defconfig, randconfig, etc. Good luck, it looks ugly :(
On Sat, Aug 27, 2022 at 6:17 AM Andrew Morton <akpm@linux-foundation.org> wrote: > > On Fri, 26 Aug 2022 17:07:27 +0200 Alexander Potapenko <glider@google.com> wrote: > > > Use hooks from instrumented.h to notify bug detection tools about > > usercopy events in variations of get_user() and put_user(). > > And this one blows up x86_64 allmodconfig builds. How do I reproduce this? I tried running `make mrproper; make allmodconfig; make -j64` (or allyesconfig, allnoconfig) on both KMSAN tree (https://github.com/google/kmsan/commit/ac3859c02d7f40f59992737d63afcacda0a972ec, which is Linux v6.0-rc2 plus the 44 KMSAN patches) and linux-mm/mm-stable @ec6624452e36158d0813758d837f7a2263a4109d with KMSAN patches applied on top of it. All builds were successful. I then tried to cherry-pick just the first 4 commits to mm-stable and see if allmodconfig works - it resulted in numerous "implicit declaration of function ‘instrument_get_user’" errors (quite silly of me), but nothing looking like the errors you posted. I'll try to build-test every patch in the series after fixing the missing declarations, but so far I don't see other problems. Could you share the mmotm commit id which resulted in the failures? > > --- a/arch/x86/include/asm/uaccess.h > > +++ b/arch/x86/include/asm/uaccess.h > > @@ -5,6 +5,7 @@ > > * User space memory access functions > > */ > > #include <linux/compiler.h> > > +#include <linux/instrumented.h> > > #include <linux/kasan-checks.h> > > #include <linux/string.h> > > #include <asm/asm.h> > > instrumented.h looks like a higher-level thing than uaccess.h, so this > inclusion is an inappropriate layering. Or maybe not. > > In file included from ./include/linux/kernel.h:22, > from ./arch/x86/include/asm/percpu.h:27, > from ./arch/x86/include/asm/nospec-branch.h:14, > from ./arch/x86/include/asm/paravirt_types.h:40, > from ./arch/x86/include/asm/ptrace.h:97, > from ./arch/x86/include/asm/math_emu.h:5, > from ./arch/x86/include/asm/processor.h:13, > from ./arch/x86/include/asm/timex.h:5, > from ./include/linux/timex.h:67, > from ./include/linux/time32.h:13, > from ./include/linux/time.h:60, > from ./include/linux/stat.h:19, > from ./include/linux/module.h:13, > from init/do_mounts.c:2: > ./include/linux/page-flags.h: In function 'page_fixed_fake_head': > ./include/linux/page-flags.h:226:36: error: invalid use of undefined type 'const struct page' > 226 | test_bit(PG_head, &page->flags)) { > | ^~ > > [25000 lines snipped] > > > And kmsan-add-kmsan-runtime-core.patch introduces additional build > errors with x86_64 allmodconfig. > > This is all with CONFIG_KMSAN=n > > I'll disable the patch series. Please do much more compilation testing > - multiple architectures, allnoconfig, allmodconfig, allyesconfig, > defconfig, randconfig, etc. Good luck, it looks ugly :( > -- Alexander Potapenko Software Engineer Google Germany GmbH Erika-Mann-Straße, 33 80636 München Geschäftsführer: Paul Manicle, Liana Sebastian Registergericht und -nummer: Hamburg, HRB 86891 Sitz der Gesellschaft: Hamburg
On Mon, 29 Aug 2022 16:57:31 +0200 Alexander Potapenko <glider@google.com> wrote: > On Sat, Aug 27, 2022 at 6:17 AM Andrew Morton <akpm@linux-foundation.org> wrote: > > > > On Fri, 26 Aug 2022 17:07:27 +0200 Alexander Potapenko <glider@google.com> wrote: > > > > > Use hooks from instrumented.h to notify bug detection tools about > > > usercopy events in variations of get_user() and put_user(). > > > > And this one blows up x86_64 allmodconfig builds. > > How do I reproduce this? > I tried running `make mrproper; make allmodconfig; make -j64` (or > allyesconfig, allnoconfig) on both KMSAN tree > (https://github.com/google/kmsan/commit/ac3859c02d7f40f59992737d63afcacda0a972ec, > which is Linux v6.0-rc2 plus the 44 KMSAN patches) and > linux-mm/mm-stable @ec6624452e36158d0813758d837f7a2263a4109d with > KMSAN patches applied on top of it. > All builds were successful. > > I then tried to cherry-pick just the first 4 commits to mm-stable and > see if allmodconfig works - it resulted in numerous "implicit > declaration of function ‘instrument_get_user’" errors (quite silly of > me), but nothing looking like the errors you posted. > I'll try to build-test every patch in the series after fixing the > missing declarations, but so far I don't see other problems. > > Could you share the mmotm commit id which resulted in the failures? I just pushed out a tree which exhibits this with gcc-12.1.1 and with gcc-11.1.0. Tag is mm-everything-2022-08-29-19-17. The problem is introduced by d0d9a44d2210 ("kmsan: add KMSAN runtime core") make mrproper make allmodconfig make init/do_mounts.o In file included from ./include/linux/kernel.h:22, from ./arch/x86/include/asm/percpu.h:27, from ./arch/x86/include/asm/nospec-branch.h:14, from ./arch/x86/include/asm/paravirt_types.h:40, from ./arch/x86/include/asm/ptrace.h:97, from ./arch/x86/include/asm/math_emu.h:5, from ./arch/x86/include/asm/processor.h:13, from ./arch/x86/include/asm/timex.h:5, from ./include/linux/timex.h:67, from ./include/linux/time32.h:13, from ./include/linux/time.h:60, from ./include/linux/stat.h:19, from ./include/linux/module.h:13, from init/do_mounts.c:2: ./include/linux/page-flags.h: In function ‘page_fixed_fake_head’: ./include/linux/page-flags.h:226:36: error: invalid use of undefined type ‘const struct page’ 226 | test_bit(PG_head, &page->flags)) { | ^~ ./include/linux/bitops.h:50:44: note: in definition of macro ‘bitop’ 50 | __builtin_constant_p((uintptr_t)(addr) != (uintptr_t)NULL) && \ | ^~~~ ./include/linux/page-flags.h:226:13: note: in expansion of macro ‘test_bit’ 226 | test_bit(PG_head, &page->flags)) { | ^~~~~~~~ ...
On Mon, Aug 29, 2022 at 9:24 PM Andrew Morton <akpm@linux-foundation.org> wrote: > > On Mon, 29 Aug 2022 16:57:31 +0200 Alexander Potapenko <glider@google.com> wrote: > > > On Sat, Aug 27, 2022 at 6:17 AM Andrew Morton <akpm@linux-foundation.org> wrote: > > > > > > On Fri, 26 Aug 2022 17:07:27 +0200 Alexander Potapenko <glider@google.com> wrote: > > > > > > > Use hooks from instrumented.h to notify bug detection tools about > > > > usercopy events in variations of get_user() and put_user(). > > > > > > And this one blows up x86_64 allmodconfig builds. > > > > How do I reproduce this? > > I tried running `make mrproper; make allmodconfig; make -j64` (or > > allyesconfig, allnoconfig) on both KMSAN tree > > (https://github.com/google/kmsan/commit/ac3859c02d7f40f59992737d63afcacda0a972ec, > > which is Linux v6.0-rc2 plus the 44 KMSAN patches) and > > linux-mm/mm-stable @ec6624452e36158d0813758d837f7a2263a4109d with > > KMSAN patches applied on top of it. > > All builds were successful. > > > > I then tried to cherry-pick just the first 4 commits to mm-stable and > > see if allmodconfig works - it resulted in numerous "implicit > > declaration of function ‘instrument_get_user’" errors (quite silly of > > me), but nothing looking like the errors you posted. > > I'll try to build-test every patch in the series after fixing the > > missing declarations, but so far I don't see other problems. > > > > Could you share the mmotm commit id which resulted in the failures? > > I just pushed out a tree which exhibits this with gcc-12.1.1 and with > gcc-11.1.0. Tag is mm-everything-2022-08-29-19-17. > > The problem is introduced by d0d9a44d2210 ("kmsan: add KMSAN runtime core") > > make mrproper > make allmodconfig > make init/do_mounts.o > > In file included from ./include/linux/kernel.h:22, > from ./arch/x86/include/asm/percpu.h:27, > from ./arch/x86/include/asm/nospec-branch.h:14, > from ./arch/x86/include/asm/paravirt_types.h:40, > from ./arch/x86/include/asm/ptrace.h:97, > from ./arch/x86/include/asm/math_emu.h:5, > from ./arch/x86/include/asm/processor.h:13, > from ./arch/x86/include/asm/timex.h:5, > from ./include/linux/timex.h:67, > from ./include/linux/time32.h:13, > from ./include/linux/time.h:60, > from ./include/linux/stat.h:19, > from ./include/linux/module.h:13, > from init/do_mounts.c:2: > ./include/linux/page-flags.h: In function ‘page_fixed_fake_head’: > ./include/linux/page-flags.h:226:36: error: invalid use of undefined type ‘const struct page’ > 226 | test_bit(PG_head, &page->flags)) { > | ^~ > ./include/linux/bitops.h:50:44: note: in definition of macro ‘bitop’ > 50 | __builtin_constant_p((uintptr_t)(addr) != (uintptr_t)NULL) && \ > | ^~~~ > ./include/linux/page-flags.h:226:13: note: in expansion of macro ‘test_bit’ > 226 | test_bit(PG_head, &page->flags)) { > | ^~~~~~~~ > ... Gotcha, this is a circular dependency: mm_types.h -> sched.h -> kmsan.h -> gfp.h -> mmzone.h -> page-flags.h -> mm_types.h, where the inclusion of sched.h into mm_types.h was only introduced in "mm: multi-gen LRU: support page table walks" - that's why the problem was missing in other trees. In fact sched.h only needs the definitions of `struct kmsan_context_state` and `struct kmsan_ctx` from kmsan.h, so I am splitting them off into kmsan_types.h to break this circle. Doing so also helped catch a couple of missing/incorrect inclusions of KMSAN headers in subsystems. I'll fix those and do more testing.
Le 26/08/2022 à 17:07, Alexander Potapenko a écrit : > Use hooks from instrumented.h to notify bug detection tools about > usercopy events in variations of get_user() and put_user(). > > Signed-off-by: Alexander Potapenko <glider@google.com> > --- > v5: > -- handle put_user(), make sure to not evaluate pointer/value twice > > Link: https://linux-review.googlesource.com/id/Ia9f12bfe5832623250e20f1859fdf5cc485a2fce > --- > arch/x86/include/asm/uaccess.h | 22 +++++++++++++++------- > 1 file changed, 15 insertions(+), 7 deletions(-) > > diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h > index 913e593a3b45f..c1b8982899eca 100644 > --- a/arch/x86/include/asm/uaccess.h > +++ b/arch/x86/include/asm/uaccess.h > @@ -5,6 +5,7 @@ > * User space memory access functions > */ > #include <linux/compiler.h> > +#include <linux/instrumented.h> > #include <linux/kasan-checks.h> > #include <linux/string.h> > #include <asm/asm.h> > @@ -103,6 +104,7 @@ extern int __get_user_bad(void); > : "=a" (__ret_gu), "=r" (__val_gu), \ > ASM_CALL_CONSTRAINT \ > : "0" (ptr), "i" (sizeof(*(ptr)))); \ > + instrument_get_user(__val_gu); \ Where is that instrument_get_user() defined ? I can't find it neither in v6.0-rc3 nor in linux-next. > (x) = (__force __typeof__(*(ptr))) __val_gu; \ > __builtin_expect(__ret_gu, 0); \ > }) Christophe
On Tue, Aug 30, 2022 at 5:06 PM Christophe Leroy <christophe.leroy@csgroup.eu> wrote: > > > > Le 26/08/2022 à 17:07, Alexander Potapenko a écrit : > > Use hooks from instrumented.h to notify bug detection tools about > > usercopy events in variations of get_user() and put_user(). > > > > Signed-off-by: Alexander Potapenko <glider@google.com> > > --- > > v5: > > -- handle put_user(), make sure to not evaluate pointer/value twice > > > > Link: https://linux-review.googlesource.com/id/Ia9f12bfe5832623250e20f1859fdf5cc485a2fce > > --- > > arch/x86/include/asm/uaccess.h | 22 +++++++++++++++------- > > 1 file changed, 15 insertions(+), 7 deletions(-) > > > > diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h > > index 913e593a3b45f..c1b8982899eca 100644 > > --- a/arch/x86/include/asm/uaccess.h > > +++ b/arch/x86/include/asm/uaccess.h > > @@ -5,6 +5,7 @@ > > * User space memory access functions > > */ > > #include <linux/compiler.h> > > +#include <linux/instrumented.h> > > #include <linux/kasan-checks.h> > > #include <linux/string.h> > > #include <asm/asm.h> > > @@ -103,6 +104,7 @@ extern int __get_user_bad(void); > > : "=a" (__ret_gu), "=r" (__val_gu), \ > > ASM_CALL_CONSTRAINT \ > > : "0" (ptr), "i" (sizeof(*(ptr)))); \ > > + instrument_get_user(__val_gu); \ > > Where is that instrument_get_user() defined ? I can't find it neither in > v6.0-rc3 nor in linux-next. > > > (x) = (__force __typeof__(*(ptr))) __val_gu; \ > > __builtin_expect(__ret_gu, 0); \ > > }) > > Christophe Yeah, as mentioned above, I should've put an empty declaration of it in include/linux/instrumented.h, but failed to. I'll fix this in v6. The "real" implementation of instrument_get_user() will appear in "instrumented.h: add KMSAN support"
On Tue, 30 Aug 2022 16:23:44 +0200 Alexander Potapenko <glider@google.com> wrote: > > from init/do_mounts.c:2: > > ./include/linux/page-flags.h: In function ‘page_fixed_fake_head’: > > ./include/linux/page-flags.h:226:36: error: invalid use of undefined type ‘const struct page’ > > 226 | test_bit(PG_head, &page->flags)) { > > | ^~ > > ./include/linux/bitops.h:50:44: note: in definition of macro ‘bitop’ > > 50 | __builtin_constant_p((uintptr_t)(addr) != (uintptr_t)NULL) && \ > > | ^~~~ > > ./include/linux/page-flags.h:226:13: note: in expansion of macro ‘test_bit’ > > 226 | test_bit(PG_head, &page->flags)) { > > | ^~~~~~~~ > > ... > > Gotcha, this is a circular dependency: mm_types.h -> sched.h -> > kmsan.h -> gfp.h -> mmzone.h -> page-flags.h -> mm_types.h, where the > inclusion of sched.h into mm_types.h was only introduced in "mm: > multi-gen LRU: support page table walks" - that's why the problem was > missing in other trees. Ah, thanks for digging that out. Yu, that inclusion is regrettable. I don't think mm_types.h is an appropriate site for implementing lru_gen_use_mm() anyway. Adding a new header is always the right fix for these things. I'd suggest adding a new mglru.h (or whatever) and putting most/all of the mglru material in there. Also, the addition to kernel/sched/core.c wasn't clearly changelogged, is uncommented and I doubt if the sched developers know about it, let alone reviewed it. Please give them a heads-up. The addition looks fairly benign, but core context_switch() is the sort of thing which people get rather defensive about and putting mm-specific stuff in there might be challenged. Some quantitative justification of this optimization would be appropriate. > In fact sched.h only needs the definitions of `struct > kmsan_context_state` and `struct kmsan_ctx` from kmsan.h, so I am > splitting them off into kmsan_types.h to break this circle. > Doing so also helped catch a couple of missing/incorrect inclusions of > KMSAN headers in subsystems.
On Tue, Aug 30, 2022 at 4:05 PM Andrew Morton <akpm@linux-foundation.org> wrote: > > On Tue, 30 Aug 2022 16:23:44 +0200 Alexander Potapenko <glider@google.com> wrote: > > > > from init/do_mounts.c:2: > > > ./include/linux/page-flags.h: In function ‘page_fixed_fake_head’: > > > ./include/linux/page-flags.h:226:36: error: invalid use of undefined type ‘const struct page’ > > > 226 | test_bit(PG_head, &page->flags)) { > > > | ^~ > > > ./include/linux/bitops.h:50:44: note: in definition of macro ‘bitop’ > > > 50 | __builtin_constant_p((uintptr_t)(addr) != (uintptr_t)NULL) && \ > > > | ^~~~ > > > ./include/linux/page-flags.h:226:13: note: in expansion of macro ‘test_bit’ > > > 226 | test_bit(PG_head, &page->flags)) { > > > | ^~~~~~~~ > > > ... > > > > Gotcha, this is a circular dependency: mm_types.h -> sched.h -> > > kmsan.h -> gfp.h -> mmzone.h -> page-flags.h -> mm_types.h, where the > > inclusion of sched.h into mm_types.h was only introduced in "mm: > > multi-gen LRU: support page table walks" - that's why the problem was > > missing in other trees. > > Ah, thanks for digging that out. > > Yu, that inclusion is regrettable. Sorry for the trouble -- it's also superfluous because we don't call lru_gen_use_mm() when switching to the kernel. I've queued the following for now. --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -3,7 +3,6 @@ #define _LINUX_MM_TYPES_H #include <linux/mm_types_task.h> -#include <linux/sched.h> #include <linux/auxvec.h> #include <linux/kref.h> @@ -742,8 +741,7 @@ static inline void lru_gen_init_mm(struct mm_struct *mm) static inline void lru_gen_use_mm(struct mm_struct *mm) { - if (!(current->flags & PF_KTHREAD)) - WRITE_ONCE(mm->lru_gen.bitmap, -1); + WRITE_ONCE(mm->lru_gen.bitmap, -1); } #else /* !CONFIG_LRU_GEN */
On Tue, 30 Aug 2022 16:25:24 -0600 Yu Zhao <yuzhao@google.com> wrote: > On Tue, Aug 30, 2022 at 4:05 PM Andrew Morton <akpm@linux-foundation.org> wrote: > > > > On Tue, 30 Aug 2022 16:23:44 +0200 Alexander Potapenko <glider@google.com> wrote: > > > > > > from init/do_mounts.c:2: > > > > ./include/linux/page-flags.h: In function ‘page_fixed_fake_head’: > > > > ./include/linux/page-flags.h:226:36: error: invalid use of undefined type ‘const struct page’ > > > > 226 | test_bit(PG_head, &page->flags)) { > > > > | ^~ > > > > ./include/linux/bitops.h:50:44: note: in definition of macro ‘bitop’ > > > > 50 | __builtin_constant_p((uintptr_t)(addr) != (uintptr_t)NULL) && \ > > > > | ^~~~ > > > > ./include/linux/page-flags.h:226:13: note: in expansion of macro ‘test_bit’ > > > > 226 | test_bit(PG_head, &page->flags)) { > > > > | ^~~~~~~~ > > > > ... > > > > > > Gotcha, this is a circular dependency: mm_types.h -> sched.h -> > > > kmsan.h -> gfp.h -> mmzone.h -> page-flags.h -> mm_types.h, where the > > > inclusion of sched.h into mm_types.h was only introduced in "mm: > > > multi-gen LRU: support page table walks" - that's why the problem was > > > missing in other trees. > > > > Ah, thanks for digging that out. > > > > Yu, that inclusion is regrettable. > > Sorry for the trouble -- it's also superfluous because we don't call > lru_gen_use_mm() when switching to the kernel. > > I've queued the following for now. Well, the rest of us want it too. > --- a/include/linux/mm_types.h > +++ b/include/linux/mm_types.h > @@ -3,7 +3,6 @@ > #define _LINUX_MM_TYPES_H > > #include <linux/mm_types_task.h> > -#include <linux/sched.h> > > #include <linux/auxvec.h> > #include <linux/kref.h> > @@ -742,8 +741,7 @@ static inline void lru_gen_init_mm(struct mm_struct *mm) > > static inline void lru_gen_use_mm(struct mm_struct *mm) > { > - if (!(current->flags & PF_KTHREAD)) > - WRITE_ONCE(mm->lru_gen.bitmap, -1); > + WRITE_ONCE(mm->lru_gen.bitmap, -1); > } Doesn't apply. I did: --- a/include/linux/mm_types.h~mm-multi-gen-lru-support-page-table-walks-fix +++ a/include/linux/mm_types.h @@ -3,7 +3,6 @@ #define _LINUX_MM_TYPES_H #include <linux/mm_types_task.h> -#include <linux/sched.h> #include <linux/auxvec.h> #include <linux/kref.h> @@ -742,11 +741,7 @@ static inline void lru_gen_init_mm(struc static inline void lru_gen_use_mm(struct mm_struct *mm) { - /* unlikely but not a bug when racing with lru_gen_migrate_mm() */ - VM_WARN_ON_ONCE(list_empty(&mm->lru_gen.list)); - - if (!(current->flags & PF_KTHREAD)) - WRITE_ONCE(mm->lru_gen.bitmap, -1); + WRITE_ONCE(mm->lru_gen.bitmap, -1); } #else /* !CONFIG_LRU_GEN */
On Tue, Aug 30, 2022 at 5:00 PM Andrew Morton <akpm@linux-foundation.org> wrote: > > On Tue, 30 Aug 2022 16:25:24 -0600 Yu Zhao <yuzhao@google.com> wrote: > > > On Tue, Aug 30, 2022 at 4:05 PM Andrew Morton <akpm@linux-foundation.org> wrote: > > > > > > On Tue, 30 Aug 2022 16:23:44 +0200 Alexander Potapenko <glider@google.com> wrote: > > > > > > > > from init/do_mounts.c:2: > > > > > ./include/linux/page-flags.h: In function ‘page_fixed_fake_head’: > > > > > ./include/linux/page-flags.h:226:36: error: invalid use of undefined type ‘const struct page’ > > > > > 226 | test_bit(PG_head, &page->flags)) { > > > > > | ^~ > > > > > ./include/linux/bitops.h:50:44: note: in definition of macro ‘bitop’ > > > > > 50 | __builtin_constant_p((uintptr_t)(addr) != (uintptr_t)NULL) && \ > > > > > | ^~~~ > > > > > ./include/linux/page-flags.h:226:13: note: in expansion of macro ‘test_bit’ > > > > > 226 | test_bit(PG_head, &page->flags)) { > > > > > | ^~~~~~~~ > > > > > ... > > > > > > > > Gotcha, this is a circular dependency: mm_types.h -> sched.h -> > > > > kmsan.h -> gfp.h -> mmzone.h -> page-flags.h -> mm_types.h, where the > > > > inclusion of sched.h into mm_types.h was only introduced in "mm: > > > > multi-gen LRU: support page table walks" - that's why the problem was > > > > missing in other trees. > > > > > > Ah, thanks for digging that out. > > > > > > Yu, that inclusion is regrettable. > > > > Sorry for the trouble -- it's also superfluous because we don't call > > lru_gen_use_mm() when switching to the kernel. > > > > I've queued the following for now. > > Well, the rest of us want it too. > > > --- a/include/linux/mm_types.h > > +++ b/include/linux/mm_types.h > > @@ -3,7 +3,6 @@ > > #define _LINUX_MM_TYPES_H > > > > #include <linux/mm_types_task.h> > > -#include <linux/sched.h> > > > > #include <linux/auxvec.h> > > #include <linux/kref.h> > > @@ -742,8 +741,7 @@ static inline void lru_gen_init_mm(struct mm_struct *mm) > > > > static inline void lru_gen_use_mm(struct mm_struct *mm) > > { > > - if (!(current->flags & PF_KTHREAD)) > > - WRITE_ONCE(mm->lru_gen.bitmap, -1); > > + WRITE_ONCE(mm->lru_gen.bitmap, -1); > > } > > Doesn't apply. I did: > > --- a/include/linux/mm_types.h~mm-multi-gen-lru-support-page-table-walks-fix > +++ a/include/linux/mm_types.h > @@ -3,7 +3,6 @@ > #define _LINUX_MM_TYPES_H > > #include <linux/mm_types_task.h> > -#include <linux/sched.h> > > #include <linux/auxvec.h> > #include <linux/kref.h> > @@ -742,11 +741,7 @@ static inline void lru_gen_init_mm(struc > > static inline void lru_gen_use_mm(struct mm_struct *mm) > { > - /* unlikely but not a bug when racing with lru_gen_migrate_mm() */ > - VM_WARN_ON_ONCE(list_empty(&mm->lru_gen.list)); Yes. I got a report that somebody tripped over this "unlikely" condition (and ascertained it's not a bug). So I deleted this part as well. Will refresh the series around rc5. Thanks.
On Wed, Aug 31, 2022 at 1:07 AM Yu Zhao <yuzhao@google.com> wrote: > > On Tue, Aug 30, 2022 at 5:00 PM Andrew Morton <akpm@linux-foundation.org> wrote: > > > > On Tue, 30 Aug 2022 16:25:24 -0600 Yu Zhao <yuzhao@google.com> wrote: > > > > > On Tue, Aug 30, 2022 at 4:05 PM Andrew Morton <akpm@linux-foundation.org> wrote: > > > > > > > > On Tue, 30 Aug 2022 16:23:44 +0200 Alexander Potapenko <glider@google.com> wrote: > > > > > > > > > > from init/do_mounts.c:2: > > > > > > ./include/linux/page-flags.h: In function ‘page_fixed_fake_head’: > > > > > > ./include/linux/page-flags.h:226:36: error: invalid use of undefined type ‘const struct page’ > > > > > > 226 | test_bit(PG_head, &page->flags)) { > > > > > > | ^~ > > > > > > ./include/linux/bitops.h:50:44: note: in definition of macro ‘bitop’ > > > > > > 50 | __builtin_constant_p((uintptr_t)(addr) != (uintptr_t)NULL) && \ > > > > > > | ^~~~ > > > > > > ./include/linux/page-flags.h:226:13: note: in expansion of macro ‘test_bit’ > > > > > > 226 | test_bit(PG_head, &page->flags)) { > > > > > > | ^~~~~~~~ > > > > > > ... > > > > > > > > > > Gotcha, this is a circular dependency: mm_types.h -> sched.h -> > > > > > kmsan.h -> gfp.h -> mmzone.h -> page-flags.h -> mm_types.h, where the > > > > > inclusion of sched.h into mm_types.h was only introduced in "mm: > > > > > multi-gen LRU: support page table walks" - that's why the problem was > > > > > missing in other trees. > > > > > > > > Ah, thanks for digging that out. > > > > > > > > Yu, that inclusion is regrettable. > > > > > > Sorry for the trouble -- it's also superfluous because we don't call > > > lru_gen_use_mm() when switching to the kernel. > > > > > > I've queued the following for now. > > > > Well, the rest of us want it too. > > > > > --- a/include/linux/mm_types.h > > > +++ b/include/linux/mm_types.h > > > @@ -3,7 +3,6 @@ > > > #define _LINUX_MM_TYPES_H > > > > > > #include <linux/mm_types_task.h> > > > -#include <linux/sched.h> > > > > > > #include <linux/auxvec.h> > > > #include <linux/kref.h> > > > @@ -742,8 +741,7 @@ static inline void lru_gen_init_mm(struct mm_struct *mm) > > > > > > static inline void lru_gen_use_mm(struct mm_struct *mm) > > > { > > > - if (!(current->flags & PF_KTHREAD)) > > > - WRITE_ONCE(mm->lru_gen.bitmap, -1); > > > + WRITE_ONCE(mm->lru_gen.bitmap, -1); > > > } > > > > Doesn't apply. I did: > > > > --- a/include/linux/mm_types.h~mm-multi-gen-lru-support-page-table-walks-fix > > +++ a/include/linux/mm_types.h > > @@ -3,7 +3,6 @@ > > #define _LINUX_MM_TYPES_H > > > > #include <linux/mm_types_task.h> > > -#include <linux/sched.h> > > > > #include <linux/auxvec.h> > > #include <linux/kref.h> > > @@ -742,11 +741,7 @@ static inline void lru_gen_init_mm(struc > > > > static inline void lru_gen_use_mm(struct mm_struct *mm) > > { > > - /* unlikely but not a bug when racing with lru_gen_migrate_mm() */ > > - VM_WARN_ON_ONCE(list_empty(&mm->lru_gen.list)); > > Yes. I got a report that somebody tripped over this "unlikely" > condition (and ascertained it's not a bug). So I deleted this part as > well. > > Will refresh the series around rc5. Thanks. Guess I'd still proceed with splitting up kmsan.h just to be on the safe side.
On Tue, Aug 30, 2022 at 4:05 PM Andrew Morton <akpm@linux-foundation.org> wrote: ... > Yu, that inclusion is regrettable. I don't think mm_types.h is an > appropriate site for implementing lru_gen_use_mm() anyway. Adding a > new header is always the right fix for these things. I'd suggest > adding a new mglru.h (or whatever) and putting most/all of the mglru > material in there. > > Also, the addition to kernel/sched/core.c wasn't clearly changelogged, > is uncommented and I doubt if the sched developers know about it, let > alone reviewed it. Please give them a heads-up. Adding Ingo, Peter, Juri and Vincent. I added lru_gen_use_mm() (one store operation) to context_switch() in kernel/sched/core.c, and I would appreciate it if you could take a look and let me know if you have any concerns: https://lore.kernel.org/r/20220815071332.627393-9-yuzhao@google.com/ I'll resend the series in a week or so, and cc you when that happens. > The addition looks fairly benign, but core context_switch() is the > sort of thing which people get rather defensive about and putting > mm-specific stuff in there might be challenged. Some quantitative > justification of this optimization would be appropriate. The commit message (from the above link) touches on the theory only: This patch uses the following optimizations when walking page tables: 1. It tracks the usage of mm_struct's between context switches so that page table walkers can skip processes that have been sleeping since the last iteration. Let me expand on this. TLDR: lru_gen_use_mm() introduces an extra store operation whenever switching to a new mm_struct, which sets a flag for page reclaim to clear. For systems that are NOT under memory pressure: 1. This is a new overhead. 2. I don't think it's measurable, hence can't be the last straw. 3. Assume it can be measured, the belief is that underutilized systems should be sacrificed (to some degree) for the greater good. For systems that are under memory pressure: 1. When this flag is set on a mm_struct, page reclaim knows that this mm_struct has been used since the last time it cleared this flag. So it's worth checking out this mm_struct (to clear the accessed bit). 2. The similar idea has been used on Android and ChromeOS: when an app or a tab goes to the background, these systems (conditionally) call MADV_COLD. The majority of GUI applications don't implement this idea. MGLRU opts to do it for the benefit of them. How it benefits server applications is unknown (uninteresting). 3. This optimization benefits arm64 v8.2+ more than x86, since x86 supports the accessed bit in non-leaf entries and therefore the search space can be reduced based on that. On a 4GB ARM system with 40 Chrome tabs opened and 5 tabs in active use, this optimization improves page table walk performance by about 5%. The overall benefit is small but measurable under heavy memory pressure. 4. The idea can be reused by other MM components, e.g., khugepaged. Thanks.
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 913e593a3b45f..c1b8982899eca 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -5,6 +5,7 @@ * User space memory access functions */ #include <linux/compiler.h> +#include <linux/instrumented.h> #include <linux/kasan-checks.h> #include <linux/string.h> #include <asm/asm.h> @@ -103,6 +104,7 @@ extern int __get_user_bad(void); : "=a" (__ret_gu), "=r" (__val_gu), \ ASM_CALL_CONSTRAINT \ : "0" (ptr), "i" (sizeof(*(ptr)))); \ + instrument_get_user(__val_gu); \ (x) = (__force __typeof__(*(ptr))) __val_gu; \ __builtin_expect(__ret_gu, 0); \ }) @@ -192,9 +194,11 @@ extern void __put_user_nocheck_8(void); int __ret_pu; \ void __user *__ptr_pu; \ register __typeof__(*(ptr)) __val_pu asm("%"_ASM_AX); \ - __chk_user_ptr(ptr); \ - __ptr_pu = (ptr); \ - __val_pu = (x); \ + __typeof__(*(ptr)) __x = (x); /* eval x once */ \ + __typeof__(ptr) __ptr = (ptr); /* eval ptr once */ \ + __chk_user_ptr(__ptr); \ + __ptr_pu = __ptr; \ + __val_pu = __x; \ asm volatile("call __" #fn "_%P[size]" \ : "=c" (__ret_pu), \ ASM_CALL_CONSTRAINT \ @@ -202,6 +206,7 @@ extern void __put_user_nocheck_8(void); "r" (__val_pu), \ [size] "i" (sizeof(*(ptr))) \ :"ebx"); \ + instrument_put_user(__x, __ptr, sizeof(*(ptr))); \ __builtin_expect(__ret_pu, 0); \ }) @@ -248,23 +253,25 @@ extern void __put_user_nocheck_8(void); #define __put_user_size(x, ptr, size, label) \ do { \ + __typeof__(*(ptr)) __x = (x); /* eval x once */ \ __chk_user_ptr(ptr); \ switch (size) { \ case 1: \ - __put_user_goto(x, ptr, "b", "iq", label); \ + __put_user_goto(__x, ptr, "b", "iq", label); \ break; \ case 2: \ - __put_user_goto(x, ptr, "w", "ir", label); \ + __put_user_goto(__x, ptr, "w", "ir", label); \ break; \ case 4: \ - __put_user_goto(x, ptr, "l", "ir", label); \ + __put_user_goto(__x, ptr, "l", "ir", label); \ break; \ case 8: \ - __put_user_goto_u64(x, ptr, label); \ + __put_user_goto_u64(__x, ptr, label); \ break; \ default: \ __put_user_bad(); \ } \ + instrument_put_user(__x, ptr, size); \ } while (0) #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT @@ -305,6 +312,7 @@ do { \ default: \ (x) = __get_user_bad(); \ } \ + instrument_get_user(x); \ } while (0) #define __get_user_asm(x, addr, itype, ltype, label) \
Use hooks from instrumented.h to notify bug detection tools about usercopy events in variations of get_user() and put_user(). Signed-off-by: Alexander Potapenko <glider@google.com> --- v5: -- handle put_user(), make sure to not evaluate pointer/value twice Link: https://linux-review.googlesource.com/id/Ia9f12bfe5832623250e20f1859fdf5cc485a2fce --- arch/x86/include/asm/uaccess.h | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-)