Message ID | 20221123064510.16225-1-jgross@suse.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [v2] mm: introduce arch_has_hw_nonleaf_pmd_young() | expand |
On 23.11.22 07:45, Juergen Gross wrote: > When running as a Xen PV guests commit eed9a328aa1a ("mm: x86: add > CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG") can cause a protection violation > in pmdp_test_and_clear_young(): > > BUG: unable to handle page fault for address: ffff8880083374d0 > #PF: supervisor write access in kernel mode > #PF: error_code(0x0003) - permissions violation > PGD 3026067 P4D 3026067 PUD 3027067 PMD 7fee5067 PTE 8010000008337065 > Oops: 0003 [#1] PREEMPT SMP NOPTI > CPU: 7 PID: 158 Comm: kswapd0 Not tainted 6.1.0-rc5-20221118-doflr+ #1 > RIP: e030:pmdp_test_and_clear_young+0x25/0x40 > > This happens because the Xen hypervisor can't emulate direct writes to > page table entries other than PTEs. > > This can easily be fixed by introducing arch_has_hw_nonleaf_pmd_young() > similar to arch_has_hw_pte_young() and test that instead of > CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG. > > Fixes: eed9a328aa1a ("mm: x86: add CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG") > Reported-by: Sander Eikelenboom <linux@eikelenboom.it> > Signed-off-by: Juergen Gross <jgross@suse.com> > Acked-by: Yu Zhao <yuzhao@google.com> > Tested-by: Sander Eikelenboom <linux@eikelenboom.it> > --- > V2: > - correct function name in commit message to match patch > --- > arch/x86/include/asm/pgtable.h | 8 ++++++++ > include/linux/pgtable.h | 11 +++++++++++ > mm/vmscan.c | 10 +++++----- > 3 files changed, 24 insertions(+), 5 deletions(-) > > diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h > index 5059799bebe3..c567a6ed17ce 100644 > --- a/arch/x86/include/asm/pgtable.h > +++ b/arch/x86/include/asm/pgtable.h > @@ -1438,6 +1438,14 @@ static inline bool arch_has_hw_pte_young(void) > return true; > } > > +#ifdef CONFIG_XEN_PV > +#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young > +static inline bool arch_has_hw_nonleaf_pmd_young(void) > +{ > + return !cpu_feature_enabled(X86_FEATURE_XENPV); > +} > +#endif > + > #ifdef CONFIG_PAGE_TABLE_CHECK > static inline bool pte_user_accessible_page(pte_t pte) > { > diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h > index a108b60a6962..58fc7e2d9575 100644 > --- a/include/linux/pgtable.h > +++ b/include/linux/pgtable.h > @@ -260,6 +260,17 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, > #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ > #endif > > +#ifndef arch_has_hw_nonleaf_pmd_young > +/* > + * Return whether the accessed bit in non-leaf PMD entries is supported on the > + * local CPU. > + */ > +static inline bool arch_has_hw_nonleaf_pmd_young(void) > +{ > + return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG); > +} > +#endif > + > #ifndef arch_has_hw_pte_young > /* > * Return whether the accessed bit is supported on the local CPU. > diff --git a/mm/vmscan.c b/mm/vmscan.c > index 04d8b88e5216..a04ac3b18326 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -3975,7 +3975,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area > goto next; > > if (!pmd_trans_huge(pmd[i])) { > - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && > + if (arch_has_hw_nonleaf_pmd_young() && > get_cap(LRU_GEN_NONLEAF_YOUNG)) > pmdp_test_and_clear_young(vma, addr, pmd + i); > goto next; > @@ -4073,14 +4073,14 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, > #endif > walk->mm_stats[MM_NONLEAF_TOTAL]++; > > -#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG > - if (get_cap(LRU_GEN_NONLEAF_YOUNG)) { > + if (arch_has_hw_nonleaf_pmd_young() && > + get_cap(LRU_GEN_NONLEAF_YOUNG)) { > if (!pmd_young(val)) > continue; > > walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos); > } > -#endif > + > if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i)) > continue; > > @@ -5354,7 +5354,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c > if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK)) > caps |= BIT(LRU_GEN_MM_WALK); > > - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG)) > + if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) > caps |= BIT(LRU_GEN_NONLEAF_YOUNG); > > return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps); Acked-by: David Hildenbrand <david@redhat.com> # core changes
Hi Jürgen, On Wed, Nov 23, 2022 at 7:53 AM Juergen Gross <jgross@suse.com> wrote: > When running as a Xen PV guests commit eed9a328aa1a ("mm: x86: add > CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG") can cause a protection violation > in pmdp_test_and_clear_young(): > > BUG: unable to handle page fault for address: ffff8880083374d0 > #PF: supervisor write access in kernel mode > #PF: error_code(0x0003) - permissions violation > PGD 3026067 P4D 3026067 PUD 3027067 PMD 7fee5067 PTE 8010000008337065 > Oops: 0003 [#1] PREEMPT SMP NOPTI > CPU: 7 PID: 158 Comm: kswapd0 Not tainted 6.1.0-rc5-20221118-doflr+ #1 > RIP: e030:pmdp_test_and_clear_young+0x25/0x40 > > This happens because the Xen hypervisor can't emulate direct writes to > page table entries other than PTEs. > > This can easily be fixed by introducing arch_has_hw_nonleaf_pmd_young() > similar to arch_has_hw_pte_young() and test that instead of > CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG. > > Fixes: eed9a328aa1a ("mm: x86: add CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG") > Reported-by: Sander Eikelenboom <linux@eikelenboom.it> > Signed-off-by: Juergen Gross <jgross@suse.com> > Acked-by: Yu Zhao <yuzhao@google.com> > Tested-by: Sander Eikelenboom <linux@eikelenboom.it> > --- > V2: > - correct function name in commit message to match patch Thanks for your patch, which is now commit 3f85e711d5af4fb4 ("mm: introduce arch_has_hw_nonleaf_pmd_young()") in next-20221124. noreply@ellerman.id.au reported a build failure for m68k/allmodconfig, which I have bisected to this commit. > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -4073,14 +4073,14 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, > #endif > walk->mm_stats[MM_NONLEAF_TOTAL]++; > > -#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG > - if (get_cap(LRU_GEN_NONLEAF_YOUNG)) { > + if (arch_has_hw_nonleaf_pmd_young() && > + get_cap(LRU_GEN_NONLEAF_YOUNG)) { > if (!pmd_young(val)) mm/vmscan.c:4102:30: error: implicit declaration of function 'pmd_young'; did you mean 'pte_young'? [-Werror=implicit-function-declaration] pmd_young() seems to be defined only on a handful of architectures. > continue; > > walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos); > } > -#endif > + > if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i)) > continue; > Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds
Hi, On 24.11.22 15:08, Geert Uytterhoeven wrote: > Hi Jürgen, > > On Wed, Nov 23, 2022 at 7:53 AM Juergen Gross <jgross@suse.com> wrote: >> When running as a Xen PV guests commit eed9a328aa1a ("mm: x86: add >> CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG") can cause a protection violation >> in pmdp_test_and_clear_young(): >> >> BUG: unable to handle page fault for address: ffff8880083374d0 >> #PF: supervisor write access in kernel mode >> #PF: error_code(0x0003) - permissions violation >> PGD 3026067 P4D 3026067 PUD 3027067 PMD 7fee5067 PTE 8010000008337065 >> Oops: 0003 [#1] PREEMPT SMP NOPTI >> CPU: 7 PID: 158 Comm: kswapd0 Not tainted 6.1.0-rc5-20221118-doflr+ #1 >> RIP: e030:pmdp_test_and_clear_young+0x25/0x40 >> >> This happens because the Xen hypervisor can't emulate direct writes to >> page table entries other than PTEs. >> >> This can easily be fixed by introducing arch_has_hw_nonleaf_pmd_young() >> similar to arch_has_hw_pte_young() and test that instead of >> CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG. >> >> Fixes: eed9a328aa1a ("mm: x86: add CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG") >> Reported-by: Sander Eikelenboom <linux@eikelenboom.it> >> Signed-off-by: Juergen Gross <jgross@suse.com> >> Acked-by: Yu Zhao <yuzhao@google.com> >> Tested-by: Sander Eikelenboom <linux@eikelenboom.it> >> --- >> V2: >> - correct function name in commit message to match patch > > Thanks for your patch, which is now commit 3f85e711d5af4fb4 ("mm: > introduce arch_has_hw_nonleaf_pmd_young()") in next-20221124. > > noreply@ellerman.id.au reported a build failure for m68k/allmodconfig, > which I have bisected to this commit. > >> --- a/mm/vmscan.c >> +++ b/mm/vmscan.c > >> @@ -4073,14 +4073,14 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, >> #endif >> walk->mm_stats[MM_NONLEAF_TOTAL]++; >> >> -#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG >> - if (get_cap(LRU_GEN_NONLEAF_YOUNG)) { >> + if (arch_has_hw_nonleaf_pmd_young() && >> + get_cap(LRU_GEN_NONLEAF_YOUNG)) { >> if (!pmd_young(val)) > > mm/vmscan.c:4102:30: error: implicit declaration of function > 'pmd_young'; did you mean 'pte_young'? > [-Werror=implicit-function-declaration] > > pmd_young() seems to be defined only on a handful of architectures. What would be the preferred fix for that? I could offer: - use V1 of the patch - add the #ifdefs again to this patch (which would be kind of weird) - use the attached patch Juergen
On Thu, Nov 24, 2022 at 7:30 AM Juergen Gross <jgross@suse.com> wrote: > > Hi, > > On 24.11.22 15:08, Geert Uytterhoeven wrote: > > Hi Jürgen, > > > > On Wed, Nov 23, 2022 at 7:53 AM Juergen Gross <jgross@suse.com> wrote: > >> When running as a Xen PV guests commit eed9a328aa1a ("mm: x86: add > >> CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG") can cause a protection violation > >> in pmdp_test_and_clear_young(): > >> > >> BUG: unable to handle page fault for address: ffff8880083374d0 > >> #PF: supervisor write access in kernel mode > >> #PF: error_code(0x0003) - permissions violation > >> PGD 3026067 P4D 3026067 PUD 3027067 PMD 7fee5067 PTE 8010000008337065 > >> Oops: 0003 [#1] PREEMPT SMP NOPTI > >> CPU: 7 PID: 158 Comm: kswapd0 Not tainted 6.1.0-rc5-20221118-doflr+ #1 > >> RIP: e030:pmdp_test_and_clear_young+0x25/0x40 > >> > >> This happens because the Xen hypervisor can't emulate direct writes to > >> page table entries other than PTEs. > >> > >> This can easily be fixed by introducing arch_has_hw_nonleaf_pmd_young() > >> similar to arch_has_hw_pte_young() and test that instead of > >> CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG. > >> > >> Fixes: eed9a328aa1a ("mm: x86: add CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG") > >> Reported-by: Sander Eikelenboom <linux@eikelenboom.it> > >> Signed-off-by: Juergen Gross <jgross@suse.com> > >> Acked-by: Yu Zhao <yuzhao@google.com> > >> Tested-by: Sander Eikelenboom <linux@eikelenboom.it> > >> --- > >> V2: > >> - correct function name in commit message to match patch > > > > Thanks for your patch, which is now commit 3f85e711d5af4fb4 ("mm: > > introduce arch_has_hw_nonleaf_pmd_young()") in next-20221124. > > > > noreply@ellerman.id.au reported a build failure for m68k/allmodconfig, > > which I have bisected to this commit. > > > >> --- a/mm/vmscan.c > >> +++ b/mm/vmscan.c > > > >> @@ -4073,14 +4073,14 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, > >> #endif > >> walk->mm_stats[MM_NONLEAF_TOTAL]++; > >> > >> -#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG > >> - if (get_cap(LRU_GEN_NONLEAF_YOUNG)) { > >> + if (arch_has_hw_nonleaf_pmd_young() && > >> + get_cap(LRU_GEN_NONLEAF_YOUNG)) { > >> if (!pmd_young(val)) > > > > mm/vmscan.c:4102:30: error: implicit declaration of function > > 'pmd_young'; did you mean 'pte_young'? > > [-Werror=implicit-function-declaration] > > > > pmd_young() seems to be defined only on a handful of architectures. > > What would be the preferred fix for that? > > I could offer: > > - use V1 of the patch > - add the #ifdefs again to this patch (which would be kind of weird) > - use the attached patch Your patch looks good to me: Acked-by: Yu Zhao <yuzhao@google.com> Thanks.
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5059799bebe3..c567a6ed17ce 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1438,6 +1438,14 @@ static inline bool arch_has_hw_pte_young(void) return true; } +#ifdef CONFIG_XEN_PV +#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young +static inline bool arch_has_hw_nonleaf_pmd_young(void) +{ + return !cpu_feature_enabled(X86_FEATURE_XENPV); +} +#endif + #ifdef CONFIG_PAGE_TABLE_CHECK static inline bool pte_user_accessible_page(pte_t pte) { diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index a108b60a6962..58fc7e2d9575 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -260,6 +260,17 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef arch_has_hw_nonleaf_pmd_young +/* + * Return whether the accessed bit in non-leaf PMD entries is supported on the + * local CPU. + */ +static inline bool arch_has_hw_nonleaf_pmd_young(void) +{ + return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG); +} +#endif + #ifndef arch_has_hw_pte_young /* * Return whether the accessed bit is supported on the local CPU. diff --git a/mm/vmscan.c b/mm/vmscan.c index 04d8b88e5216..a04ac3b18326 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3975,7 +3975,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area goto next; if (!pmd_trans_huge(pmd[i])) { - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && + if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) pmdp_test_and_clear_young(vma, addr, pmd + i); goto next; @@ -4073,14 +4073,14 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, #endif walk->mm_stats[MM_NONLEAF_TOTAL]++; -#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG - if (get_cap(LRU_GEN_NONLEAF_YOUNG)) { + if (arch_has_hw_nonleaf_pmd_young() && + get_cap(LRU_GEN_NONLEAF_YOUNG)) { if (!pmd_young(val)) continue; walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos); } -#endif + if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i)) continue; @@ -5354,7 +5354,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK)) caps |= BIT(LRU_GEN_MM_WALK); - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG)) + if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) caps |= BIT(LRU_GEN_NONLEAF_YOUNG); return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);