diff mbox series

[v11,7/9] hugetlb: support file_region coalescing again

Message ID 20200203232248.104733-7-almasrymina@google.com (mailing list archive)
State New, archived
Headers show
Series [v11,1/9] hugetlb_cgroup: Add hugetlb_cgroup reservation counter | expand

Commit Message

Mina Almasry Feb. 3, 2020, 11:22 p.m. UTC
An earlier patch in this series disabled file_region coalescing in order
to hang the hugetlb_cgroup uncharge info on the file_region entries.

This patch re-adds support for coalescing of file_region entries.
Essentially everytime we add an entry, we check to see if the
hugetlb_cgroup uncharge info is the same as any adjacent entries. If it
is, instead of adding an entry we simply extend the appropriate entry.

This is an important performance optimization as private mappings add
their entries page by page, and we could incur big performance costs for
large mappings with lots of file_region entries in their resv_map.

Signed-off-by: Mina Almasry <almasrymina@google.com>

---
 mm/hugetlb.c | 62 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 52 insertions(+), 10 deletions(-)

--
2.25.0.341.g760bfbb309-goog

Comments

Mike Kravetz Feb. 7, 2020, 12:17 a.m. UTC | #1
On 2/3/20 3:22 PM, Mina Almasry wrote:
> An earlier patch in this series disabled file_region coalescing in order
> to hang the hugetlb_cgroup uncharge info on the file_region entries.
> 
> This patch re-adds support for coalescing of file_region entries.
> Essentially everytime we add an entry, we check to see if the
> hugetlb_cgroup uncharge info is the same as any adjacent entries. If it
> is, instead of adding an entry we simply extend the appropriate entry.
> 
> This is an important performance optimization as private mappings add
> their entries page by page, and we could incur big performance costs for
> large mappings with lots of file_region entries in their resv_map.
> 
> Signed-off-by: Mina Almasry <almasrymina@google.com>
> 
> ---
>  mm/hugetlb.c | 62 +++++++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 52 insertions(+), 10 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index ec0b55ea1506e..058dd9c8269cf 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -272,6 +272,22 @@ static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg,
>  #endif
>  }
> 
> +static bool has_same_uncharge_info(struct file_region *rg,
> +				   struct hugetlb_cgroup *h_cg,
> +				   struct hstate *h)
> +{
> +#ifdef CONFIG_CGROUP_HUGETLB
> +	return rg &&
> +	       rg->reservation_counter ==
> +		       &h_cg->rsvd_hugepage[hstate_index(h)] &&
> +	       rg->pages_per_hpage == pages_per_huge_page(h) &&
> +	       rg->css == &h_cg->css;
> +
> +#else
> +	return true;
> +#endif
> +}
> +
>  /* Must be called with resv->lock held. Calling this with count_only == true
>   * will count the number of pages to be added but will not modify the linked
>   * list. If regions_needed != NULL and count_only == true, then regions_needed
> @@ -286,7 +302,7 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
>  	long add = 0;
>  	struct list_head *head = &resv->regions;
>  	long last_accounted_offset = f;
> -	struct file_region *rg = NULL, *trg = NULL, *nrg = NULL;
> +	struct file_region *rg = NULL, *trg = NULL, *nrg = NULL, *prg = NULL;
> 
>  	if (regions_needed)
>  		*regions_needed = 0;
> @@ -318,16 +334,34 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,

I seem to be missing something.  For context, here is the beginning of that
loop:

	/* In this loop, we essentially handle an entry for the range
	 * [last_accounted_offset, rg->from), at every iteration, with some
	 * bounds checking.
	 */
	list_for_each_entry_safe(rg, trg, head, link) {
		/* Skip irrelevant regions that start before our range. */
		if (rg->from < f) {
			/* If this region ends after the last accounted offset,
			 * then we need to update last_accounted_offset.
			 */
			if (rg->to > last_accounted_offset)
				last_accounted_offset = rg->to;
			continue;
		}

		/* When we find a region that starts beyond our range, we've
		 * finished.
		 */
		if (rg->from > t)
			break;

Suppose the resv_map contains one entry [0,2) and we are going to add
[2,4).  Will we not 'continue' after the first entry and then exit loop
without setting prg?  So, there is no chance for coalescing?
Mina Almasry Feb. 7, 2020, 6:44 p.m. UTC | #2
On Thu, Feb 6, 2020 at 4:17 PM Mike Kravetz <mike.kravetz@oracle.com> wrote:
>
> On 2/3/20 3:22 PM, Mina Almasry wrote:
> > An earlier patch in this series disabled file_region coalescing in order
> > to hang the hugetlb_cgroup uncharge info on the file_region entries.
> >
> > This patch re-adds support for coalescing of file_region entries.
> > Essentially everytime we add an entry, we check to see if the
> > hugetlb_cgroup uncharge info is the same as any adjacent entries. If it
> > is, instead of adding an entry we simply extend the appropriate entry.
> >
> > This is an important performance optimization as private mappings add
> > their entries page by page, and we could incur big performance costs for
> > large mappings with lots of file_region entries in their resv_map.
> >
> > Signed-off-by: Mina Almasry <almasrymina@google.com>
> >
> > ---
> >  mm/hugetlb.c | 62 +++++++++++++++++++++++++++++++++++++++++++---------
> >  1 file changed, 52 insertions(+), 10 deletions(-)
> >
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index ec0b55ea1506e..058dd9c8269cf 100644
> > --- a/mm/hugetlb.c
> > +++ b/mm/hugetlb.c
> > @@ -272,6 +272,22 @@ static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg,
> >  #endif
> >  }
> >
> > +static bool has_same_uncharge_info(struct file_region *rg,
> > +                                struct hugetlb_cgroup *h_cg,
> > +                                struct hstate *h)
> > +{
> > +#ifdef CONFIG_CGROUP_HUGETLB
> > +     return rg &&
> > +            rg->reservation_counter ==
> > +                    &h_cg->rsvd_hugepage[hstate_index(h)] &&
> > +            rg->pages_per_hpage == pages_per_huge_page(h) &&
> > +            rg->css == &h_cg->css;
> > +
> > +#else
> > +     return true;
> > +#endif
> > +}
> > +
> >  /* Must be called with resv->lock held. Calling this with count_only == true
> >   * will count the number of pages to be added but will not modify the linked
> >   * list. If regions_needed != NULL and count_only == true, then regions_needed
> > @@ -286,7 +302,7 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
> >       long add = 0;
> >       struct list_head *head = &resv->regions;
> >       long last_accounted_offset = f;
> > -     struct file_region *rg = NULL, *trg = NULL, *nrg = NULL;
> > +     struct file_region *rg = NULL, *trg = NULL, *nrg = NULL, *prg = NULL;
> >
> >       if (regions_needed)
> >               *regions_needed = 0;
> > @@ -318,16 +334,34 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
>
> I seem to be missing something.  For context, here is the beginning of that
> loop:
>
>         /* In this loop, we essentially handle an entry for the range
>          * [last_accounted_offset, rg->from), at every iteration, with some
>          * bounds checking.
>          */
>         list_for_each_entry_safe(rg, trg, head, link) {
>                 /* Skip irrelevant regions that start before our range. */
>                 if (rg->from < f) {
>                         /* If this region ends after the last accounted offset,
>                          * then we need to update last_accounted_offset.
>                          */
>                         if (rg->to > last_accounted_offset)
>                                 last_accounted_offset = rg->to;
>                         continue;
>                 }
>
>                 /* When we find a region that starts beyond our range, we've
>                  * finished.
>                  */
>                 if (rg->from > t)
>                         break;
>
> Suppose the resv_map contains one entry [0,2) and we are going to add
> [2,4).  Will we not 'continue' after the first entry and then exit loop
> without setting prg?  So, there is no chance for coalescing?
>

I think you're right; prg needs to be set on all loop exits, including
the continue and break. I'm thinking with that added, the logic should
work, but I need to find a good way to test this. I thought I had good
test coverage but apparently not. I'll fix this in the next iteration.


> --
> Mike Kravetz
>
> >               if (rg->from > last_accounted_offset) {
> >                       add += rg->from - last_accounted_offset;
> >                       if (!count_only) {
> > -                             nrg = get_file_region_entry_from_cache(
> > -                                     resv, last_accounted_offset, rg->from);
> > -                             record_hugetlb_cgroup_uncharge_info(h_cg, nrg,
> > -                                                                 h);
> > -                             list_add(&nrg->link, rg->link.prev);
> > +                             /* Check if the last region can be extended. */
> > +                             if (prg && prg->to == last_accounted_offset &&
> > +                                 has_same_uncharge_info(prg, h_cg, h)) {
> > +                                     prg->to = rg->from;
> > +                             /* Check if the next region can be extended. */
> > +                             } else if (has_same_uncharge_info(rg, h_cg,
> > +                                                               h)) {
> > +                                     rg->from = last_accounted_offset;
> > +                             /* If neither of the regions can be extended,
> > +                              * add a region.
> > +                              */
> > +                             } else {
> > +                                     nrg = get_file_region_entry_from_cache(
> > +                                             resv, last_accounted_offset,
> > +                                             rg->from);
> > +                                     record_hugetlb_cgroup_uncharge_info(
> > +                                             h_cg, nrg, h);
> > +                                     list_add(&nrg->link, rg->link.prev);
> > +                             }
> >                       } else if (regions_needed)
> >                               *regions_needed += 1;
> >               }
> >
> >               last_accounted_offset = rg->to;
> > +             /* Record rg as the 'previous file region' incase we need it
> > +              * for the next iteration.
> > +              */
> > +             prg = rg;
> >       }
> >
> >       /* Handle the case where our range extends beyond
> > @@ -336,10 +370,18 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
> >       if (last_accounted_offset < t) {
> >               add += t - last_accounted_offset;
> >               if (!count_only) {
> > -                     nrg = get_file_region_entry_from_cache(
> > -                             resv, last_accounted_offset, t);
> > -                     record_hugetlb_cgroup_uncharge_info(h_cg, nrg, h);
> > -                     list_add(&nrg->link, rg->link.prev);
> > +                     /* Check if the last region can be extended. */
> > +                     if (prg && prg->to == last_accounted_offset &&
> > +                         has_same_uncharge_info(prg, h_cg, h)) {
> > +                             prg->to = last_accounted_offset;
> > +                     } else {
> > +                             /* If not, just create a new region. */
> > +                             nrg = get_file_region_entry_from_cache(
> > +                                     resv, last_accounted_offset, t);
> > +                             record_hugetlb_cgroup_uncharge_info(h_cg, nrg,
> > +                                                                 h);
> > +                             list_add(&nrg->link, rg->link.prev);
> > +                     }
> >               } else if (regions_needed)
> >                       *regions_needed += 1;
> >       }
> > --
> > 2.25.0.341.g760bfbb309-goog
> >
diff mbox series

Patch

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ec0b55ea1506e..058dd9c8269cf 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -272,6 +272,22 @@  static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg,
 #endif
 }

+static bool has_same_uncharge_info(struct file_region *rg,
+				   struct hugetlb_cgroup *h_cg,
+				   struct hstate *h)
+{
+#ifdef CONFIG_CGROUP_HUGETLB
+	return rg &&
+	       rg->reservation_counter ==
+		       &h_cg->rsvd_hugepage[hstate_index(h)] &&
+	       rg->pages_per_hpage == pages_per_huge_page(h) &&
+	       rg->css == &h_cg->css;
+
+#else
+	return true;
+#endif
+}
+
 /* Must be called with resv->lock held. Calling this with count_only == true
  * will count the number of pages to be added but will not modify the linked
  * list. If regions_needed != NULL and count_only == true, then regions_needed
@@ -286,7 +302,7 @@  static long add_reservation_in_range(struct resv_map *resv, long f, long t,
 	long add = 0;
 	struct list_head *head = &resv->regions;
 	long last_accounted_offset = f;
-	struct file_region *rg = NULL, *trg = NULL, *nrg = NULL;
+	struct file_region *rg = NULL, *trg = NULL, *nrg = NULL, *prg = NULL;

 	if (regions_needed)
 		*regions_needed = 0;
@@ -318,16 +334,34 @@  static long add_reservation_in_range(struct resv_map *resv, long f, long t,
 		if (rg->from > last_accounted_offset) {
 			add += rg->from - last_accounted_offset;
 			if (!count_only) {
-				nrg = get_file_region_entry_from_cache(
-					resv, last_accounted_offset, rg->from);
-				record_hugetlb_cgroup_uncharge_info(h_cg, nrg,
-								    h);
-				list_add(&nrg->link, rg->link.prev);
+				/* Check if the last region can be extended. */
+				if (prg && prg->to == last_accounted_offset &&
+				    has_same_uncharge_info(prg, h_cg, h)) {
+					prg->to = rg->from;
+				/* Check if the next region can be extended. */
+				} else if (has_same_uncharge_info(rg, h_cg,
+								  h)) {
+					rg->from = last_accounted_offset;
+				/* If neither of the regions can be extended,
+				 * add a region.
+				 */
+				} else {
+					nrg = get_file_region_entry_from_cache(
+						resv, last_accounted_offset,
+						rg->from);
+					record_hugetlb_cgroup_uncharge_info(
+						h_cg, nrg, h);
+					list_add(&nrg->link, rg->link.prev);
+				}
 			} else if (regions_needed)
 				*regions_needed += 1;
 		}

 		last_accounted_offset = rg->to;
+		/* Record rg as the 'previous file region' incase we need it
+		 * for the next iteration.
+		 */
+		prg = rg;
 	}

 	/* Handle the case where our range extends beyond
@@ -336,10 +370,18 @@  static long add_reservation_in_range(struct resv_map *resv, long f, long t,
 	if (last_accounted_offset < t) {
 		add += t - last_accounted_offset;
 		if (!count_only) {
-			nrg = get_file_region_entry_from_cache(
-				resv, last_accounted_offset, t);
-			record_hugetlb_cgroup_uncharge_info(h_cg, nrg, h);
-			list_add(&nrg->link, rg->link.prev);
+			/* Check if the last region can be extended. */
+			if (prg && prg->to == last_accounted_offset &&
+			    has_same_uncharge_info(prg, h_cg, h)) {
+				prg->to = last_accounted_offset;
+			} else {
+				/* If not, just create a new region. */
+				nrg = get_file_region_entry_from_cache(
+					resv, last_accounted_offset, t);
+				record_hugetlb_cgroup_uncharge_info(h_cg, nrg,
+								    h);
+				list_add(&nrg->link, rg->link.prev);
+			}
 		} else if (regions_needed)
 			*regions_needed += 1;
 	}