Message ID | 20210317111251.17808-5-osalvador@suse.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Make alloc_contig_range handle Hugetlb pages | expand |
On Wed 17-03-21 12:12:50, Oscar Salvador wrote: > alloc_contig_range() will fail if it finds a HugeTLB page within the range, > without a chance to handle them. Since HugeTLB pages can be migrated as any > LRU or Movable page, it does not make sense to bail out without trying. > Enable the interface to recognize in-use HugeTLB pages so we can migrate > them, and have much better chances to succeed the call. > > Signed-off-by: Oscar Salvador <osalvador@suse.de> > Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com> Acked-by: Michal Hocko <mhocko@suse.com> I am still not entirely happy about this > @@ -2347,7 +2351,19 @@ int isolate_or_dissolve_huge_page(struct page *page) > if (hstate_is_gigantic(h)) > return -ENOMEM; > > - return alloc_and_dissolve_huge_page(h, head); > +retry: > + if (page_count(head) && isolate_huge_page(head, list)) { > + ret = 0; > + } else if (!page_count(head)) { > + ret = alloc_and_dissolve_huge_page(h, head); > + > + if (ret == -EBUSY && try_again) { > + try_again = false; > + goto retry; > + } > + } > + > + return ret; > } it would be imho better to retry inside alloc_and_dissolve_huge_page because it already has its retry logic implemented. But not something I will insist on.
On Wed, Mar 17, 2021 at 03:26:50PM +0100, Michal Hocko wrote: > it would be imho better to retry inside alloc_and_dissolve_huge_page > because it already has its retry logic implemented. > > But not something I will insist on. Ok, what about this (I did not even compile it yet, but gives a rough idea): diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index bcff86ca616f..a37b4ce86e58 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -583,7 +583,7 @@ struct huge_bootmem_page { struct hstate *hstate; }; -int isolate_or_dissolve_huge_page(struct page *page); +int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, @@ -866,7 +866,8 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; -static inline int isolate_or_dissolve_huge_page(struct page *page) +static inline int isolate_or_dissolve_huge_page(struct page *page, + struct list_head *list) { return -ENOMEM; } diff --git a/mm/compaction.c b/mm/compaction.c index 9f253fc3b4f9..6e47855fd154 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -910,7 +910,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, } if (PageHuge(page) && cc->alloc_contig) { - ret = isolate_or_dissolve_huge_page(page); + ret = isolate_or_dissolve_huge_page(page, &cc->migratepages); /* * Fail isolation in case isolate_or_dissolve_huge_page @@ -927,6 +927,15 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, goto isolate_fail; } + if (PageHuge(page)) { + /* + * Hugepage was successfully isolated and placed + * on the cc->migratepages list. + */ + low_pfn += compound_nr(page) - 1; + goto isolate_success_no_list; + } + /* * Ok, the hugepage was dissolved. Now these pages are * Buddy and cannot be re-allocated because they are @@ -1068,6 +1077,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, isolate_success: list_add(&page->lru, &cc->migratepages); +isolate_success_no_list: cc->nr_migratepages += compound_nr(page); nr_isolated += compound_nr(page); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3194c1bd9e32..87227224c03b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2257,7 +2257,8 @@ static void restore_reserve_on_error(struct hstate *h, * Returns 0 on success, otherwise negated error. */ -static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page) +static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page, + struct list_head *list) { gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; int nid = page_to_nid(old_page); @@ -2287,10 +2288,12 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page) goto unlock; } else if (page_count(old_page)) { /* - * Someone has grabbed the page, fail for now. + * Someone has grabbed the page, try to isolate it here. + * Fail with -EBUSY if not possible. */ - ret = -EBUSY; update_and_free_page(h, new_page); + if (!isolate_huge_page(old_page, list) + ret = -EBUSY; goto unlock; } else if (!HPageFreed(old_page)) { /* @@ -2319,10 +2322,11 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page) return ret; } -int isolate_or_dissolve_huge_page(struct page *page) +int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) { struct hstate *h; struct page *head; + int ret = -EBUSY; /* * The page might have been dissolved from under our feet, so make sure @@ -2347,7 +2351,12 @@ int isolate_or_dissolve_huge_page(struct page *page) if (hstate_is_gigantic(h)) return -ENOMEM; - return alloc_and_dissolve_huge_page(h, head); + if (page_count(head) && isolate_huge_page(head, list)) + ret = 0; + else if (!page_count(head)) + ret = alloc_and_dissolve_huge_page(h, head, list); + + return ret; } struct page *alloc_huge_page(struct vm_area_struct *vma, diff --git a/mm/vmscan.c b/mm/vmscan.c index 562e87cbd7a1..42aaef30633e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1507,8 +1507,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, LIST_HEAD(clean_pages); list_for_each_entry_safe(page, next, page_list, lru) { - if (page_is_file_lru(page) && !PageDirty(page) && - !__PageMovable(page) && !PageUnevictable(page)) { + if (!PageHuge(page) && page_is_file_lru(page) && + !PageDirty(page) && !__PageMovable(page) && + !PageUnevictable(page)) { ClearPageActive(page); list_move(&page->lru, &clean_pages); } -- 2.16.3
On Thu 18-03-21 09:54:01, Oscar Salvador wrote: [...] > @@ -2287,10 +2288,12 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page) > goto unlock; > } else if (page_count(old_page)) { > /* > - * Someone has grabbed the page, fail for now. > + * Someone has grabbed the page, try to isolate it here. > + * Fail with -EBUSY if not possible. > */ > - ret = -EBUSY; > update_and_free_page(h, new_page); > + if (!isolate_huge_page(old_page, list) > + ret = -EBUSY; > goto unlock; > } else if (!HPageFreed(old_page)) { I do not think you want to call isolate_huge_page with hugetlb_lock held. You would need to drop the lock before calling isolate_huge_page.
On Thu, Mar 18, 2021 at 10:29:57AM +0100, Michal Hocko wrote: > On Thu 18-03-21 09:54:01, Oscar Salvador wrote: > [...] > > @@ -2287,10 +2288,12 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page) > > goto unlock; > > } else if (page_count(old_page)) { > > /* > > - * Someone has grabbed the page, fail for now. > > + * Someone has grabbed the page, try to isolate it here. > > + * Fail with -EBUSY if not possible. > > */ > > - ret = -EBUSY; > > update_and_free_page(h, new_page); > > + if (!isolate_huge_page(old_page, list) > > + ret = -EBUSY; > > goto unlock; > > } else if (!HPageFreed(old_page)) { > > I do not think you want to call isolate_huge_page with hugetlb_lock > held. You would need to drop the lock before calling isolate_huge_page. Yeah, that was an oversight. As I said I did not compile it(let alone test it), otherwise the system would have screamed I guess. I was more interested in knowing whether how did it look wrt. retry concerns: diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index bcff86ca616f..a37b4ce86e58 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -583,7 +583,7 @@ struct huge_bootmem_page { struct hstate *hstate; }; -int isolate_or_dissolve_huge_page(struct page *page); +int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, @@ -866,7 +866,8 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; -static inline int isolate_or_dissolve_huge_page(struct page *page) +static inline int isolate_or_dissolve_huge_page(struct page *page, + struct list_head *list) { return -ENOMEM; } diff --git a/mm/compaction.c b/mm/compaction.c index 9f253fc3b4f9..6e47855fd154 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -910,7 +910,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, } if (PageHuge(page) && cc->alloc_contig) { - ret = isolate_or_dissolve_huge_page(page); + ret = isolate_or_dissolve_huge_page(page, &cc->migratepages); /* * Fail isolation in case isolate_or_dissolve_huge_page @@ -927,6 +927,15 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, goto isolate_fail; } + if (PageHuge(page)) { + /* + * Hugepage was successfully isolated and placed + * on the cc->migratepages list. + */ + low_pfn += compound_nr(page) - 1; + goto isolate_success_no_list; + } + /* * Ok, the hugepage was dissolved. Now these pages are * Buddy and cannot be re-allocated because they are @@ -1068,6 +1077,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, isolate_success: list_add(&page->lru, &cc->migratepages); +isolate_success_no_list: cc->nr_migratepages += compound_nr(page); nr_isolated += compound_nr(page); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3194c1bd9e32..f55fa6acc6f9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2257,7 +2257,8 @@ static void restore_reserve_on_error(struct hstate *h, * Returns 0 on success, otherwise negated error. */ -static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page) +static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page, + struct list_head *list) { gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; int nid = page_to_nid(old_page); @@ -2287,10 +2288,14 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page) goto unlock; } else if (page_count(old_page)) { /* - * Someone has grabbed the page, fail for now. + * Someone has grabbed the page, try to isolate it here. + * Fail with -EBUSY if not possible. */ - ret = -EBUSY; update_and_free_page(h, new_page); + spin_unlock(&hugetlb_lock); + if (!isolate_huge_page(old_page, list) + ret = -EBUSY; + spin_lock(&hugetlb_lock); goto unlock; } else if (!HPageFreed(old_page)) { /* @@ -2319,10 +2324,11 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page) return ret; } -int isolate_or_dissolve_huge_page(struct page *page) +int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) { struct hstate *h; struct page *head; + int ret = -EBUSY; /* * The page might have been dissolved from under our feet, so make sure @@ -2347,7 +2353,12 @@ int isolate_or_dissolve_huge_page(struct page *page) if (hstate_is_gigantic(h)) return -ENOMEM; - return alloc_and_dissolve_huge_page(h, head); + if (page_count(head) && isolate_huge_page(head, list)) + ret = 0; + else if (!page_count(head)) + ret = alloc_and_dissolve_huge_page(h, head, list); + + return ret; } struct page *alloc_huge_page(struct vm_area_struct *vma, diff --git a/mm/vmscan.c b/mm/vmscan.c index 562e87cbd7a1..42aaef30633e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1507,8 +1507,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, LIST_HEAD(clean_pages); list_for_each_entry_safe(page, next, page_list, lru) { - if (page_is_file_lru(page) && !PageDirty(page) && - !__PageMovable(page) && !PageUnevictable(page)) { + if (!PageHuge(page) && page_is_file_lru(page) && + !PageDirty(page) && !__PageMovable(page) && + !PageUnevictable(page)) { ClearPageActive(page); list_move(&page->lru, &clean_pages); } The spin_lock after the isolate_huge_page() in alloc_and_dissolve_huge_page() could probably be spared by placing a goto out directly before the return. But just a POC.
On Thu 18-03-21 10:59:10, Oscar Salvador wrote: > On Thu, Mar 18, 2021 at 10:29:57AM +0100, Michal Hocko wrote: > > On Thu 18-03-21 09:54:01, Oscar Salvador wrote: > > [...] > > > @@ -2287,10 +2288,12 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page) > > > goto unlock; > > > } else if (page_count(old_page)) { > > > /* > > > - * Someone has grabbed the page, fail for now. > > > + * Someone has grabbed the page, try to isolate it here. > > > + * Fail with -EBUSY if not possible. > > > */ > > > - ret = -EBUSY; > > > update_and_free_page(h, new_page); > > > + if (!isolate_huge_page(old_page, list) > > > + ret = -EBUSY; > > > goto unlock; > > > } else if (!HPageFreed(old_page)) { > > > > I do not think you want to call isolate_huge_page with hugetlb_lock > > held. You would need to drop the lock before calling isolate_huge_page. > > Yeah, that was an oversight. As I said I did not compile it(let alone > test it), otherwise the system would have screamed I guess. > > I was more interested in knowing whether how did it look wrt. retry > concerns: Yes this looks much more to my taste. If we need to retry then it could just goto retry there. The caller doesn't really have to care. > @@ -2287,10 +2288,14 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page) > goto unlock; > } else if (page_count(old_page)) { > /* > - * Someone has grabbed the page, fail for now. > + * Someone has grabbed the page, try to isolate it here. > + * Fail with -EBUSY if not possible. > */ > - ret = -EBUSY; > update_and_free_page(h, new_page); > + spin_unlock(&hugetlb_lock); > + if (!isolate_huge_page(old_page, list) > + ret = -EBUSY; > + spin_lock(&hugetlb_lock); > goto unlock; simply return ret; here
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index bcff86ca616f..a37b4ce86e58 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -583,7 +583,7 @@ struct huge_bootmem_page { struct hstate *hstate; }; -int isolate_or_dissolve_huge_page(struct page *page); +int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, @@ -866,7 +866,8 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; -static inline int isolate_or_dissolve_huge_page(struct page *page) +static inline int isolate_or_dissolve_huge_page(struct page *page, + struct list_head *list) { return -ENOMEM; } diff --git a/mm/compaction.c b/mm/compaction.c index 9f253fc3b4f9..6e47855fd154 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -910,7 +910,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, } if (PageHuge(page) && cc->alloc_contig) { - ret = isolate_or_dissolve_huge_page(page); + ret = isolate_or_dissolve_huge_page(page, &cc->migratepages); /* * Fail isolation in case isolate_or_dissolve_huge_page @@ -927,6 +927,15 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, goto isolate_fail; } + if (PageHuge(page)) { + /* + * Hugepage was successfully isolated and placed + * on the cc->migratepages list. + */ + low_pfn += compound_nr(page) - 1; + goto isolate_success_no_list; + } + /* * Ok, the hugepage was dissolved. Now these pages are * Buddy and cannot be re-allocated because they are @@ -1068,6 +1077,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, isolate_success: list_add(&page->lru, &cc->migratepages); +isolate_success_no_list: cc->nr_migratepages += compound_nr(page); nr_isolated += compound_nr(page); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3194c1bd9e32..11e86434d8bd 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2287,7 +2287,9 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page) goto unlock; } else if (page_count(old_page)) { /* - * Someone has grabbed the page, fail for now. + * Someone has grabbed the page, return -EBUSY so we give + * isolate_or_dissolve_huge_page a chance to handle an in-use + * page. */ ret = -EBUSY; update_and_free_page(h, new_page); @@ -2319,10 +2321,12 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page) return ret; } -int isolate_or_dissolve_huge_page(struct page *page) +int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) { struct hstate *h; struct page *head; + bool try_again = true; + int ret = -EBUSY; /* * The page might have been dissolved from under our feet, so make sure @@ -2347,7 +2351,19 @@ int isolate_or_dissolve_huge_page(struct page *page) if (hstate_is_gigantic(h)) return -ENOMEM; - return alloc_and_dissolve_huge_page(h, head); +retry: + if (page_count(head) && isolate_huge_page(head, list)) { + ret = 0; + } else if (!page_count(head)) { + ret = alloc_and_dissolve_huge_page(h, head); + + if (ret == -EBUSY && try_again) { + try_again = false; + goto retry; + } + } + + return ret; } struct page *alloc_huge_page(struct vm_area_struct *vma, diff --git a/mm/vmscan.c b/mm/vmscan.c index 562e87cbd7a1..42aaef30633e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1507,8 +1507,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, LIST_HEAD(clean_pages); list_for_each_entry_safe(page, next, page_list, lru) { - if (page_is_file_lru(page) && !PageDirty(page) && - !__PageMovable(page) && !PageUnevictable(page)) { + if (!PageHuge(page) && page_is_file_lru(page) && + !PageDirty(page) && !__PageMovable(page) && + !PageUnevictable(page)) { ClearPageActive(page); list_move(&page->lru, &clean_pages); }