Message ID | 1626077374-81682-5-git-send-email-feng.tang@intel.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Introduce multi-preference mempolicy | expand |
On 7/12/21 1:09 AM, Feng Tang wrote: > From: Ben Widawsky <ben.widawsky@intel.com> > > Implement the missing huge page allocation functionality while obeying > the preferred node semantics. This is similar to the implementation > for general page allocation, as it uses a fallback mechanism to try > multiple preferred nodes first, and then all other nodes. > > [Thanks to 0day bot for caching the missing #ifdef CONFIG_NUMA issue] > > Link: https://lore.kernel.org/r/20200630212517.308045-12-ben.widawsky@intel.com > Suggested-by: Michal Hocko <mhocko@suse.com> > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com> > Co-developed-by: Feng Tang <feng.tang@intel.com> > Signed-off-by: Feng Tang <feng.tang@intel.com> > --- > mm/hugetlb.c | 25 +++++++++++++++++++++++++ > mm/mempolicy.c | 3 ++- > 2 files changed, 27 insertions(+), 1 deletion(-) > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 924553aa8f78..3e84508c1b8c 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -1164,7 +1164,18 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, > > gfp_mask = htlb_alloc_mask(h); > nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask); > +#ifdef CONFIG_NUMA > + if (mpol->mode == MPOL_PREFERRED_MANY) { > + page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); > + if (page) > + goto check_reserve; > + /* Fallback to all nodes */ > + nodemask = NULL; > + } > +#endif > page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); > + > +check_reserve: > if (page && !avoid_reserve && vma_has_reserves(vma, chg)) { > SetHPageRestoreReserve(page); > h->resv_huge_pages--; > @@ -2095,6 +2106,20 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h, > nodemask_t *nodemask; > > nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask); > +#ifdef CONFIG_NUMA > + if (mpol->mode == MPOL_PREFERRED_MANY) { > + gfp_t gfp = (gfp_mask | __GFP_NOWARN) & ~__GFP_DIRECT_RECLAIM; I believe __GFP_NOWARN will be added later in alloc_buddy_huge_page, so no need to add here? > + > + page = alloc_surplus_huge_page(h, gfp, nid, nodemask); > + if (page) { > + mpol_cond_put(mpol); > + return page; > + } > + > + /* Fallback to all nodes */ > + nodemask = NULL; > + } > +#endif > page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask); > mpol_cond_put(mpol); > > diff --git a/mm/mempolicy.c b/mm/mempolicy.c > index 9dce67fc9bb6..93f8789758a7 100644 > --- a/mm/mempolicy.c > +++ b/mm/mempolicy.c > @@ -2054,7 +2054,8 @@ int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, > huge_page_shift(hstate_vma(vma))); > } else { > nid = policy_node(gfp_flags, *mpol, numa_node_id()); > - if ((*mpol)->mode == MPOL_BIND) > + if ((*mpol)->mode == MPOL_BIND || > + (*mpol)->mode == MPOL_PREFERRED_MANY) > *nodemask = &(*mpol)->nodes; > } > return nid; > Other than the one nit above, Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Mike, On Wed, Jul 21, 2021 at 01:49:15PM -0700, Mike Kravetz wrote: > On 7/12/21 1:09 AM, Feng Tang wrote: > > From: Ben Widawsky <ben.widawsky@intel.com> > > > > Implement the missing huge page allocation functionality while obeying > > the preferred node semantics. This is similar to the implementation > > for general page allocation, as it uses a fallback mechanism to try > > multiple preferred nodes first, and then all other nodes. > > > > [Thanks to 0day bot for caching the missing #ifdef CONFIG_NUMA issue] > > > > Link: https://lore.kernel.org/r/20200630212517.308045-12-ben.widawsky@intel.com > > Suggested-by: Michal Hocko <mhocko@suse.com> > > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com> > > Co-developed-by: Feng Tang <feng.tang@intel.com> > > Signed-off-by: Feng Tang <feng.tang@intel.com> > > --- > > mm/hugetlb.c | 25 +++++++++++++++++++++++++ > > mm/mempolicy.c | 3 ++- > > 2 files changed, 27 insertions(+), 1 deletion(-) > > > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > > index 924553aa8f78..3e84508c1b8c 100644 > > --- a/mm/hugetlb.c > > +++ b/mm/hugetlb.c > > @@ -1164,7 +1164,18 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, > > > > gfp_mask = htlb_alloc_mask(h); > > nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask); > > +#ifdef CONFIG_NUMA > > + if (mpol->mode == MPOL_PREFERRED_MANY) { > > + page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); > > + if (page) > > + goto check_reserve; > > + /* Fallback to all nodes */ > > + nodemask = NULL; > > + } > > +#endif > > page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); > > + > > +check_reserve: > > if (page && !avoid_reserve && vma_has_reserves(vma, chg)) { > > SetHPageRestoreReserve(page); > > h->resv_huge_pages--; > > @@ -2095,6 +2106,20 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h, > > nodemask_t *nodemask; > > > > nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask); > > +#ifdef CONFIG_NUMA > > + if (mpol->mode == MPOL_PREFERRED_MANY) { > > + gfp_t gfp = (gfp_mask | __GFP_NOWARN) & ~__GFP_DIRECT_RECLAIM; > > I believe __GFP_NOWARN will be added later in alloc_buddy_huge_page, so > no need to add here? Thanks for the suggestion, will remove it. > > + > > + page = alloc_surplus_huge_page(h, gfp, nid, nodemask); > > + if (page) { > > + mpol_cond_put(mpol); > > + return page; > > + } > > + > > + /* Fallback to all nodes */ > > + nodemask = NULL; > > + } > > +#endif > > page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask); > > mpol_cond_put(mpol); > > > > diff --git a/mm/mempolicy.c b/mm/mempolicy.c > > index 9dce67fc9bb6..93f8789758a7 100644 > > --- a/mm/mempolicy.c > > +++ b/mm/mempolicy.c > > @@ -2054,7 +2054,8 @@ int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, > > huge_page_shift(hstate_vma(vma))); > > } else { > > nid = policy_node(gfp_flags, *mpol, numa_node_id()); > > - if ((*mpol)->mode == MPOL_BIND) > > + if ((*mpol)->mode == MPOL_BIND || > > + (*mpol)->mode == MPOL_PREFERRED_MANY) > > *nodemask = &(*mpol)->nodes; > > } > > return nid; > > > > Other than the one nit above, > > Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com> Thanks! Andrew, I have to ask for your help again to fold this to the 4/6 patch, thanks! - Feng ---------------------------8<-------------------------------------------- From de1cd29d8da96856a6d754a30a4c7585d87b8348 Mon Sep 17 00:00:00 2001 From: Feng Tang <feng.tang@intel.com> Date: Thu, 22 Jul 2021 16:00:49 +0800 Subject: [PATCH] mm/hugetlb: remove the unneeded __GFP_NOWARN flag setting As the alloc_buddy_huge_page() will set it anyway. Suggested-by: Mike Kravetz <mike.kravetz@oracle.com> Signed-off-by: Feng Tang <feng.tang@intel.com> --- mm/hugetlb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 528947d..a96e283 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2162,9 +2162,9 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h, nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask); #ifdef CONFIG_NUMA if (mpol->mode == MPOL_PREFERRED_MANY) { - gfp_t gfp = (gfp_mask | __GFP_NOWARN) & ~__GFP_DIRECT_RECLAIM; - - page = alloc_surplus_huge_page(h, gfp, nid, nodemask, false); + page = alloc_surplus_huge_page(h, + gfp_mask & ~__GFP_DIRECT_RECLAIM, + nid, nodemask, false); if (page) { mpol_cond_put(mpol); return page;
On Wed 21-07-21 13:49:15, Mike Kravetz wrote: > On 7/12/21 1:09 AM, Feng Tang wrote: [...] > > +#ifdef CONFIG_NUMA > > + if (mpol->mode == MPOL_PREFERRED_MANY) { > > + gfp_t gfp = (gfp_mask | __GFP_NOWARN) & ~__GFP_DIRECT_RECLAIM; > > I believe __GFP_NOWARN will be added later in alloc_buddy_huge_page, so > no need to add here? The mask is manipulated here anyway and the __GFP_NOWARN is really telling that there is no need to report the failure for _this_ allocation request. alloc_surplus_huge_page might alter that in whatever way in the future. So I would keep NOWARN here for the code clarity rather than rely on some implicit assumption down the path.
On 7/22/21 2:42 AM, Michal Hocko wrote: > On Wed 21-07-21 13:49:15, Mike Kravetz wrote: >> On 7/12/21 1:09 AM, Feng Tang wrote: > [...] >>> +#ifdef CONFIG_NUMA >>> + if (mpol->mode == MPOL_PREFERRED_MANY) { >>> + gfp_t gfp = (gfp_mask | __GFP_NOWARN) & ~__GFP_DIRECT_RECLAIM; >> >> I believe __GFP_NOWARN will be added later in alloc_buddy_huge_page, so >> no need to add here? > > The mask is manipulated here anyway and the __GFP_NOWARN is really > telling that there is no need to report the failure for _this_ > allocation request. alloc_surplus_huge_page might alter that in whatever > way in the future. So I would keep NOWARN here for the code clarity > rather than rely on some implicit assumption down the path. Makes sense. Better to leave the __GFP_NOWARN here for clarity.
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 924553aa8f78..3e84508c1b8c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1164,7 +1164,18 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, gfp_mask = htlb_alloc_mask(h); nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask); +#ifdef CONFIG_NUMA + if (mpol->mode == MPOL_PREFERRED_MANY) { + page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); + if (page) + goto check_reserve; + /* Fallback to all nodes */ + nodemask = NULL; + } +#endif page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); + +check_reserve: if (page && !avoid_reserve && vma_has_reserves(vma, chg)) { SetHPageRestoreReserve(page); h->resv_huge_pages--; @@ -2095,6 +2106,20 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h, nodemask_t *nodemask; nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask); +#ifdef CONFIG_NUMA + if (mpol->mode == MPOL_PREFERRED_MANY) { + gfp_t gfp = (gfp_mask | __GFP_NOWARN) & ~__GFP_DIRECT_RECLAIM; + + page = alloc_surplus_huge_page(h, gfp, nid, nodemask); + if (page) { + mpol_cond_put(mpol); + return page; + } + + /* Fallback to all nodes */ + nodemask = NULL; + } +#endif page = alloc_surplus_huge_page(h, gfp_mask, nid, nodemask); mpol_cond_put(mpol); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 9dce67fc9bb6..93f8789758a7 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2054,7 +2054,8 @@ int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, huge_page_shift(hstate_vma(vma))); } else { nid = policy_node(gfp_flags, *mpol, numa_node_id()); - if ((*mpol)->mode == MPOL_BIND) + if ((*mpol)->mode == MPOL_BIND || + (*mpol)->mode == MPOL_PREFERRED_MANY) *nodemask = &(*mpol)->nodes; } return nid;