Message ID | 20210923175347.10727-5-mike.kravetz@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | hugetlb: add demote/split page functionality | expand |
On 23.09.21 19:53, Mike Kravetz wrote: > Demote page functionality will split a huge page into a number of huge > pages of a smaller size. For example, on x86 a 1GB huge page can be > demoted into 512 2M huge pages. Demotion is done 'in place' by simply > splitting the huge page. > > Added '*_for_demote' wrappers for remove_hugetlb_page, > destroy_compound_gigantic_page and prep_compound_gigantic_page for use > by demote code. > > Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> > --- > mm/hugetlb.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 75 insertions(+), 4 deletions(-) > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 2317d411243d..ab7bd0434057 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -1260,7 +1260,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) > ((node = hstate_next_node_to_free(hs, mask)) || 1); \ > nr_nodes--) > > -#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE > +/* used to demote non-gigantic_huge pages as well */ > static void __destroy_compound_gigantic_page(struct page *page, > unsigned int order, bool demote) > { > @@ -1283,6 +1283,13 @@ static void __destroy_compound_gigantic_page(struct page *page, > __ClearPageHead(page); > } > > +static void destroy_compound_gigantic_page_for_demote(struct page *page, > + unsigned int order) > +{ > + __destroy_compound_gigantic_page(page, order, true); > +} > + > +#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE > static void destroy_compound_gigantic_page(struct page *page, > unsigned int order) > { > @@ -1428,6 +1435,12 @@ static void remove_hugetlb_page(struct hstate *h, struct page *page, > __remove_hugetlb_page(h, page, adjust_surplus, false); > } > > +static void remove_hugetlb_page_for_demote(struct hstate *h, struct page *page, > + bool adjust_surplus) > +{ > + __remove_hugetlb_page(h, page, adjust_surplus, true); > +} > + > static void add_hugetlb_page(struct hstate *h, struct page *page, > bool adjust_surplus) > { > @@ -1777,6 +1790,12 @@ static bool prep_compound_gigantic_page(struct page *page, unsigned int order) > return __prep_compound_gigantic_page(page, order, false); > } > > +static bool prep_compound_gigantic_page_for_demote(struct page *page, > + unsigned int order) > +{ > + return __prep_compound_gigantic_page(page, order, true); > +} > + > /* > * PageHuge() only returns true for hugetlbfs pages, but not for normal or > * transparent huge pages. See the PageTransHuge() documentation for more > @@ -3298,9 +3317,55 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, > return 0; > } > > +static int demote_free_huge_page(struct hstate *h, struct page *page) > +{ > + int i, nid = page_to_nid(page); > + struct hstate *target_hstate; > + bool cma_page = HPageCma(page); > + > + target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order); > + > + remove_hugetlb_page_for_demote(h, page, false); > + spin_unlock_irq(&hugetlb_lock); > + > + if (alloc_huge_page_vmemmap(h, page)) { > + /* Allocation of vmemmmap failed, we can not demote page */ > + spin_lock_irq(&hugetlb_lock); > + set_page_refcounted(page); > + add_hugetlb_page(h, page, false); I dislike using 0/1 as return values as it will just hide the actual issue. This here would be -ENOMEM, right?
On 9/24/21 2:44 AM, David Hildenbrand wrote: > On 23.09.21 19:53, Mike Kravetz wrote: >> + >> + if (alloc_huge_page_vmemmap(h, page)) { >> + /* Allocation of vmemmmap failed, we can not demote page */ >> + spin_lock_irq(&hugetlb_lock); >> + set_page_refcounted(page); >> + add_hugetlb_page(h, page, false); > > I dislike using 0/1 as return values as it will just hide the actual issue. > > This here would be -ENOMEM, right? > I will pass along the return value from alloc_huge_page_vmemmap(). You are right, -ENOMEM is the only non-zero value. Thanks,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 2317d411243d..ab7bd0434057 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1260,7 +1260,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) ((node = hstate_next_node_to_free(hs, mask)) || 1); \ nr_nodes--) -#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE +/* used to demote non-gigantic_huge pages as well */ static void __destroy_compound_gigantic_page(struct page *page, unsigned int order, bool demote) { @@ -1283,6 +1283,13 @@ static void __destroy_compound_gigantic_page(struct page *page, __ClearPageHead(page); } +static void destroy_compound_gigantic_page_for_demote(struct page *page, + unsigned int order) +{ + __destroy_compound_gigantic_page(page, order, true); +} + +#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE static void destroy_compound_gigantic_page(struct page *page, unsigned int order) { @@ -1428,6 +1435,12 @@ static void remove_hugetlb_page(struct hstate *h, struct page *page, __remove_hugetlb_page(h, page, adjust_surplus, false); } +static void remove_hugetlb_page_for_demote(struct hstate *h, struct page *page, + bool adjust_surplus) +{ + __remove_hugetlb_page(h, page, adjust_surplus, true); +} + static void add_hugetlb_page(struct hstate *h, struct page *page, bool adjust_surplus) { @@ -1777,6 +1790,12 @@ static bool prep_compound_gigantic_page(struct page *page, unsigned int order) return __prep_compound_gigantic_page(page, order, false); } +static bool prep_compound_gigantic_page_for_demote(struct page *page, + unsigned int order) +{ + return __prep_compound_gigantic_page(page, order, true); +} + /* * PageHuge() only returns true for hugetlbfs pages, but not for normal or * transparent huge pages. See the PageTransHuge() documentation for more @@ -3298,9 +3317,55 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, return 0; } +static int demote_free_huge_page(struct hstate *h, struct page *page) +{ + int i, nid = page_to_nid(page); + struct hstate *target_hstate; + bool cma_page = HPageCma(page); + + target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order); + + remove_hugetlb_page_for_demote(h, page, false); + spin_unlock_irq(&hugetlb_lock); + + if (alloc_huge_page_vmemmap(h, page)) { + /* Allocation of vmemmmap failed, we can not demote page */ + spin_lock_irq(&hugetlb_lock); + set_page_refcounted(page); + add_hugetlb_page(h, page, false); + return 1; + } + + /* + * Use destroy_compound_gigantic_page_for_demote for all huge page + * sizes as it will not ref count pages. + */ + destroy_compound_gigantic_page_for_demote(page, huge_page_order(h)); + + for (i = 0; i < pages_per_huge_page(h); + i += pages_per_huge_page(target_hstate)) { + if (hstate_is_gigantic(target_hstate)) + prep_compound_gigantic_page_for_demote(page + i, + target_hstate->order); + else + prep_compound_page(page + i, target_hstate->order); + set_page_private(page + i, 0); + set_page_refcounted(page + i); + prep_new_huge_page(target_hstate, page + i, nid); + if (cma_page) + SetHPageCma(page + i); + put_page(page + i); + } + + spin_lock_irq(&hugetlb_lock); + return 0; +} + static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed) __must_hold(&hugetlb_lock) { + int nr_nodes, node; + struct page *page; int rc = 0; lockdep_assert_held(&hugetlb_lock); @@ -3309,9 +3374,15 @@ static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed) if (!h->demote_order) return rc; - /* - * TODO - demote fucntionality will be added in subsequent patch - */ + for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) { + if (!list_empty(&h->hugepage_freelists[node])) { + page = list_entry(h->hugepage_freelists[node].next, + struct page, lru); + rc = !demote_free_huge_page(h, page); + break; + } + } + return rc; }
Demote page functionality will split a huge page into a number of huge pages of a smaller size. For example, on x86 a 1GB huge page can be demoted into 512 2M huge pages. Demotion is done 'in place' by simply splitting the huge page. Added '*_for_demote' wrappers for remove_hugetlb_page, destroy_compound_gigantic_page and prep_compound_gigantic_page for use by demote code. Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> --- mm/hugetlb.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 75 insertions(+), 4 deletions(-)