diff mbox series

[mm-unstable,2/4] mm/khugepaged: consistently order cc->is_khugepaged and pte_* checks

Message ID 20220720140603.1958773-3-zokeefe@google.com (mailing list archive)
State New
Headers show
Series mm: fixes for userspace hugepage collapse, v7 | expand

Commit Message

Zach O'Keefe July 20, 2022, 2:06 p.m. UTC
cc->is_khugepaged is used to predicate the khugepaged-only behavior
of enforcing khugepaged heuristics limited by the sysfs knobs
khugepaged_max_ptes_[none|swap|shared].

In branches where khugepaged_max_ptes_* is checked, consistently check
cc->is_khugepaged first.  Also, local counters (for comparison vs
khugepaged_max_ptes_* limits) were previously incremented in the
comparison expression.  Some of these counters (unmapped) are
additionally used outside of khugepaged_max_ptes_* enforcement, and
all counters are communicated in tracepoints.  Move the correct
accounting of these counters before branching statements to avoid future
errors due to C's short-circuiting evaluation.

Fixes: 9fab4752a181 ("mm/khugepaged: add flag to predicate khugepaged-only behavior")
Link: https://lore.kernel.org/linux-mm/Ys2qJm6FaOQcxkha@google.com/
Signed-off-by: Zach O'Keefe <zokeefe@google.com>
---
 mm/khugepaged.c | 49 +++++++++++++++++++++++++++++--------------------
 1 file changed, 29 insertions(+), 20 deletions(-)

Comments

Yang Shi July 20, 2022, 5:27 p.m. UTC | #1
On Wed, Jul 20, 2022 at 7:06 AM Zach O'Keefe <zokeefe@google.com> wrote:
>
> cc->is_khugepaged is used to predicate the khugepaged-only behavior
> of enforcing khugepaged heuristics limited by the sysfs knobs
> khugepaged_max_ptes_[none|swap|shared].
>
> In branches where khugepaged_max_ptes_* is checked, consistently check
> cc->is_khugepaged first.  Also, local counters (for comparison vs
> khugepaged_max_ptes_* limits) were previously incremented in the
> comparison expression.  Some of these counters (unmapped) are
> additionally used outside of khugepaged_max_ptes_* enforcement, and
> all counters are communicated in tracepoints.  Move the correct
> accounting of these counters before branching statements to avoid future
> errors due to C's short-circuiting evaluation.

Yeah, it is safer to not depend on the order of branch statements to
inc the counter.

Reviewed-by: Yang Shi <shy828301@gmail.com>

>
> Fixes: 9fab4752a181 ("mm/khugepaged: add flag to predicate khugepaged-only behavior")
> Link: https://lore.kernel.org/linux-mm/Ys2qJm6FaOQcxkha@google.com/
> Signed-off-by: Zach O'Keefe <zokeefe@google.com>
> ---
>  mm/khugepaged.c | 49 +++++++++++++++++++++++++++++--------------------
>  1 file changed, 29 insertions(+), 20 deletions(-)
>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index ecd28bfeab60..290422577172 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -574,9 +574,10 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>                 pte_t pteval = *_pte;
>                 if (pte_none(pteval) || (pte_present(pteval) &&
>                                 is_zero_pfn(pte_pfn(pteval)))) {
> +                       ++none_or_zero;
>                         if (!userfaultfd_armed(vma) &&
> -                           (++none_or_zero <= khugepaged_max_ptes_none ||
> -                            !cc->is_khugepaged)) {
> +                           (!cc->is_khugepaged ||
> +                            none_or_zero <= khugepaged_max_ptes_none)) {
>                                 continue;
>                         } else {
>                                 result = SCAN_EXCEED_NONE_PTE;
> @@ -596,11 +597,14 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>
>                 VM_BUG_ON_PAGE(!PageAnon(page), page);
>
> -               if (cc->is_khugepaged && page_mapcount(page) > 1 &&
> -                   ++shared > khugepaged_max_ptes_shared) {
> -                       result = SCAN_EXCEED_SHARED_PTE;
> -                       count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
> -                       goto out;
> +               if (page_mapcount(page) > 1) {
> +                       ++shared;
> +                       if (cc->is_khugepaged &&
> +                           shared > khugepaged_max_ptes_shared) {
> +                               result = SCAN_EXCEED_SHARED_PTE;
> +                               count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
> +                               goto out;
> +                       }
>                 }
>
>                 if (PageCompound(page)) {
> @@ -1170,8 +1174,9 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
>              _pte++, _address += PAGE_SIZE) {
>                 pte_t pteval = *_pte;
>                 if (is_swap_pte(pteval)) {
> -                       if (++unmapped <= khugepaged_max_ptes_swap ||
> -                           !cc->is_khugepaged) {
> +                       ++unmapped;
> +                       if (!cc->is_khugepaged ||
> +                           unmapped <= khugepaged_max_ptes_swap) {
>                                 /*
>                                  * Always be strict with uffd-wp
>                                  * enabled swap entries.  Please see
> @@ -1189,9 +1194,10 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
>                         }
>                 }
>                 if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
> +                       ++none_or_zero;
>                         if (!userfaultfd_armed(vma) &&
> -                           (++none_or_zero <= khugepaged_max_ptes_none ||
> -                            !cc->is_khugepaged)) {
> +                           (!cc->is_khugepaged ||
> +                            none_or_zero <= khugepaged_max_ptes_none)) {
>                                 continue;
>                         } else {
>                                 result = SCAN_EXCEED_NONE_PTE;
> @@ -1221,12 +1227,14 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
>                         goto out_unmap;
>                 }
>
> -               if (cc->is_khugepaged &&
> -                   page_mapcount(page) > 1 &&
> -                   ++shared > khugepaged_max_ptes_shared) {
> -                       result = SCAN_EXCEED_SHARED_PTE;
> -                       count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
> -                       goto out_unmap;
> +               if (page_mapcount(page) > 1) {
> +                       ++shared;
> +                       if (cc->is_khugepaged &&
> +                           shared > khugepaged_max_ptes_shared) {
> +                               result = SCAN_EXCEED_SHARED_PTE;
> +                               count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
> +                               goto out_unmap;
> +                       }
>                 }
>
>                 page = compound_head(page);
> @@ -1961,8 +1969,9 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
>                         continue;
>
>                 if (xa_is_value(page)) {
> +                       ++swap;
>                         if (cc->is_khugepaged &&
> -                           ++swap > khugepaged_max_ptes_swap) {
> +                           swap > khugepaged_max_ptes_swap) {
>                                 result = SCAN_EXCEED_SWAP_PTE;
>                                 count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
>                                 break;
> @@ -2013,8 +2022,8 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
>         rcu_read_unlock();
>
>         if (result == SCAN_SUCCEED) {
> -               if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none &&
> -                   cc->is_khugepaged) {
> +               if (cc->is_khugepaged &&
> +                   present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
>                         result = SCAN_EXCEED_NONE_PTE;
>                         count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
>                 } else {
> --
> 2.37.0.170.g444d1eabd0-goog
>
Zach O'Keefe July 20, 2022, 7:09 p.m. UTC | #2
On Jul 20 10:27, Yang Shi wrote:
> On Wed, Jul 20, 2022 at 7:06 AM Zach O'Keefe <zokeefe@google.com> wrote:
> >
> > cc->is_khugepaged is used to predicate the khugepaged-only behavior
> > of enforcing khugepaged heuristics limited by the sysfs knobs
> > khugepaged_max_ptes_[none|swap|shared].
> >
> > In branches where khugepaged_max_ptes_* is checked, consistently check
> > cc->is_khugepaged first.  Also, local counters (for comparison vs
> > khugepaged_max_ptes_* limits) were previously incremented in the
> > comparison expression.  Some of these counters (unmapped) are
> > additionally used outside of khugepaged_max_ptes_* enforcement, and
> > all counters are communicated in tracepoints.  Move the correct
> > accounting of these counters before branching statements to avoid future
> > errors due to C's short-circuiting evaluation.
> 
> Yeah, it is safer to not depend on the order of branch statements to
> inc the counter.
> 

Only cost me a couple hours when I got bit by this after naively moving checks
around :) Hopefully I can save the next person.

Also, thanks for the reviews, Yang!

> Reviewed-by: Yang Shi <shy828301@gmail.com>
> 
> >
> > Fixes: 9fab4752a181 ("mm/khugepaged: add flag to predicate khugepaged-only behavior")
> > Link: https://lore.kernel.org/linux-mm/Ys2qJm6FaOQcxkha@google.com/
> > Signed-off-by: Zach O'Keefe <zokeefe@google.com>
> > ---
> >  mm/khugepaged.c | 49 +++++++++++++++++++++++++++++--------------------
> >  1 file changed, 29 insertions(+), 20 deletions(-)
> >
> > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > index ecd28bfeab60..290422577172 100644
> > --- a/mm/khugepaged.c
> > +++ b/mm/khugepaged.c
> > @@ -574,9 +574,10 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
> >                 pte_t pteval = *_pte;
> >                 if (pte_none(pteval) || (pte_present(pteval) &&
> >                                 is_zero_pfn(pte_pfn(pteval)))) {
> > +                       ++none_or_zero;
> >                         if (!userfaultfd_armed(vma) &&
> > -                           (++none_or_zero <= khugepaged_max_ptes_none ||
> > -                            !cc->is_khugepaged)) {
> > +                           (!cc->is_khugepaged ||
> > +                            none_or_zero <= khugepaged_max_ptes_none)) {
> >                                 continue;
> >                         } else {
> >                                 result = SCAN_EXCEED_NONE_PTE;
> > @@ -596,11 +597,14 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
> >
> >                 VM_BUG_ON_PAGE(!PageAnon(page), page);
> >
> > -               if (cc->is_khugepaged && page_mapcount(page) > 1 &&
> > -                   ++shared > khugepaged_max_ptes_shared) {
> > -                       result = SCAN_EXCEED_SHARED_PTE;
> > -                       count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
> > -                       goto out;
> > +               if (page_mapcount(page) > 1) {
> > +                       ++shared;
> > +                       if (cc->is_khugepaged &&
> > +                           shared > khugepaged_max_ptes_shared) {
> > +                               result = SCAN_EXCEED_SHARED_PTE;
> > +                               count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
> > +                               goto out;
> > +                       }
> >                 }
> >
> >                 if (PageCompound(page)) {
> > @@ -1170,8 +1174,9 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
> >              _pte++, _address += PAGE_SIZE) {
> >                 pte_t pteval = *_pte;
> >                 if (is_swap_pte(pteval)) {
> > -                       if (++unmapped <= khugepaged_max_ptes_swap ||
> > -                           !cc->is_khugepaged) {
> > +                       ++unmapped;
> > +                       if (!cc->is_khugepaged ||
> > +                           unmapped <= khugepaged_max_ptes_swap) {
> >                                 /*
> >                                  * Always be strict with uffd-wp
> >                                  * enabled swap entries.  Please see
> > @@ -1189,9 +1194,10 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
> >                         }
> >                 }
> >                 if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
> > +                       ++none_or_zero;
> >                         if (!userfaultfd_armed(vma) &&
> > -                           (++none_or_zero <= khugepaged_max_ptes_none ||
> > -                            !cc->is_khugepaged)) {
> > +                           (!cc->is_khugepaged ||
> > +                            none_or_zero <= khugepaged_max_ptes_none)) {
> >                                 continue;
> >                         } else {
> >                                 result = SCAN_EXCEED_NONE_PTE;
> > @@ -1221,12 +1227,14 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
> >                         goto out_unmap;
> >                 }
> >
> > -               if (cc->is_khugepaged &&
> > -                   page_mapcount(page) > 1 &&
> > -                   ++shared > khugepaged_max_ptes_shared) {
> > -                       result = SCAN_EXCEED_SHARED_PTE;
> > -                       count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
> > -                       goto out_unmap;
> > +               if (page_mapcount(page) > 1) {
> > +                       ++shared;
> > +                       if (cc->is_khugepaged &&
> > +                           shared > khugepaged_max_ptes_shared) {
> > +                               result = SCAN_EXCEED_SHARED_PTE;
> > +                               count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
> > +                               goto out_unmap;
> > +                       }
> >                 }
> >
> >                 page = compound_head(page);
> > @@ -1961,8 +1969,9 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
> >                         continue;
> >
> >                 if (xa_is_value(page)) {
> > +                       ++swap;
> >                         if (cc->is_khugepaged &&
> > -                           ++swap > khugepaged_max_ptes_swap) {
> > +                           swap > khugepaged_max_ptes_swap) {
> >                                 result = SCAN_EXCEED_SWAP_PTE;
> >                                 count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
> >                                 break;
> > @@ -2013,8 +2022,8 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
> >         rcu_read_unlock();
> >
> >         if (result == SCAN_SUCCEED) {
> > -               if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none &&
> > -                   cc->is_khugepaged) {
> > +               if (cc->is_khugepaged &&
> > +                   present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
> >                         result = SCAN_EXCEED_NONE_PTE;
> >                         count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
> >                 } else {
> > --
> > 2.37.0.170.g444d1eabd0-goog
> >
David Rientjes July 21, 2022, 12:42 a.m. UTC | #3
On Wed, 20 Jul 2022, Zach O'Keefe wrote:

> cc->is_khugepaged is used to predicate the khugepaged-only behavior
> of enforcing khugepaged heuristics limited by the sysfs knobs
> khugepaged_max_ptes_[none|swap|shared].
> 
> In branches where khugepaged_max_ptes_* is checked, consistently check
> cc->is_khugepaged first.  Also, local counters (for comparison vs
> khugepaged_max_ptes_* limits) were previously incremented in the
> comparison expression.  Some of these counters (unmapped) are
> additionally used outside of khugepaged_max_ptes_* enforcement, and
> all counters are communicated in tracepoints.  Move the correct
> accounting of these counters before branching statements to avoid future
> errors due to C's short-circuiting evaluation.
> 
> Fixes: 9fab4752a181 ("mm/khugepaged: add flag to predicate khugepaged-only behavior")
> Link: https://lore.kernel.org/linux-mm/Ys2qJm6FaOQcxkha@google.com/
> Signed-off-by: Zach O'Keefe <zokeefe@google.com>

Acked-by: David Rientjes <rientjes@google.com>
diff mbox series

Patch

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index ecd28bfeab60..290422577172 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -574,9 +574,10 @@  static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 		pte_t pteval = *_pte;
 		if (pte_none(pteval) || (pte_present(pteval) &&
 				is_zero_pfn(pte_pfn(pteval)))) {
+			++none_or_zero;
 			if (!userfaultfd_armed(vma) &&
-			    (++none_or_zero <= khugepaged_max_ptes_none ||
-			     !cc->is_khugepaged)) {
+			    (!cc->is_khugepaged ||
+			     none_or_zero <= khugepaged_max_ptes_none)) {
 				continue;
 			} else {
 				result = SCAN_EXCEED_NONE_PTE;
@@ -596,11 +597,14 @@  static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 
 		VM_BUG_ON_PAGE(!PageAnon(page), page);
 
-		if (cc->is_khugepaged && page_mapcount(page) > 1 &&
-		    ++shared > khugepaged_max_ptes_shared) {
-			result = SCAN_EXCEED_SHARED_PTE;
-			count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
-			goto out;
+		if (page_mapcount(page) > 1) {
+			++shared;
+			if (cc->is_khugepaged &&
+			    shared > khugepaged_max_ptes_shared) {
+				result = SCAN_EXCEED_SHARED_PTE;
+				count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
+				goto out;
+			}
 		}
 
 		if (PageCompound(page)) {
@@ -1170,8 +1174,9 @@  static int hpage_collapse_scan_pmd(struct mm_struct *mm,
 	     _pte++, _address += PAGE_SIZE) {
 		pte_t pteval = *_pte;
 		if (is_swap_pte(pteval)) {
-			if (++unmapped <= khugepaged_max_ptes_swap ||
-			    !cc->is_khugepaged) {
+			++unmapped;
+			if (!cc->is_khugepaged ||
+			    unmapped <= khugepaged_max_ptes_swap) {
 				/*
 				 * Always be strict with uffd-wp
 				 * enabled swap entries.  Please see
@@ -1189,9 +1194,10 @@  static int hpage_collapse_scan_pmd(struct mm_struct *mm,
 			}
 		}
 		if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
+			++none_or_zero;
 			if (!userfaultfd_armed(vma) &&
-			    (++none_or_zero <= khugepaged_max_ptes_none ||
-			     !cc->is_khugepaged)) {
+			    (!cc->is_khugepaged ||
+			     none_or_zero <= khugepaged_max_ptes_none)) {
 				continue;
 			} else {
 				result = SCAN_EXCEED_NONE_PTE;
@@ -1221,12 +1227,14 @@  static int hpage_collapse_scan_pmd(struct mm_struct *mm,
 			goto out_unmap;
 		}
 
-		if (cc->is_khugepaged &&
-		    page_mapcount(page) > 1 &&
-		    ++shared > khugepaged_max_ptes_shared) {
-			result = SCAN_EXCEED_SHARED_PTE;
-			count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
-			goto out_unmap;
+		if (page_mapcount(page) > 1) {
+			++shared;
+			if (cc->is_khugepaged &&
+			    shared > khugepaged_max_ptes_shared) {
+				result = SCAN_EXCEED_SHARED_PTE;
+				count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
+				goto out_unmap;
+			}
 		}
 
 		page = compound_head(page);
@@ -1961,8 +1969,9 @@  static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
 			continue;
 
 		if (xa_is_value(page)) {
+			++swap;
 			if (cc->is_khugepaged &&
-			    ++swap > khugepaged_max_ptes_swap) {
+			    swap > khugepaged_max_ptes_swap) {
 				result = SCAN_EXCEED_SWAP_PTE;
 				count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
 				break;
@@ -2013,8 +2022,8 @@  static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
 	rcu_read_unlock();
 
 	if (result == SCAN_SUCCEED) {
-		if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none &&
-		    cc->is_khugepaged) {
+		if (cc->is_khugepaged &&
+		    present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
 			result = SCAN_EXCEED_NONE_PTE;
 			count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
 		} else {