Message ID | 20191127102800.51526-1-wangkefeng.wang@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [RFC] mm, page_alloc: avoid page_to_pfn() in move_freepages() | expand |
[...] > static int move_freepages(struct zone *zone, > - struct page *start_page, struct page *end_page, > + unsigned long start_pfn, unsigned long end_pfn, > int migratetype, int *num_movable) > { > struct page *page; > + unsigned long pfn; > unsigned int order; > int pages_moved = 0; > > - for (page = start_page; page <= end_page;) { > - if (!pfn_valid_within(page_to_pfn(page))) { > - page++; > + for (pfn = start_pfn; pfn <= end_pfn;) { > + if (!pfn_valid_within(pfn)) { > + pfn++; > continue; > } > > + page = pfn_to_page(pfn); > if (!PageBuddy(page)) { > /* > * We assume that pages that could be isolated for > @@ -2268,8 +2270,7 @@ static int move_freepages(struct zone *zone, > if (num_movable && > (PageLRU(page) || __PageMovable(page))) > (*num_movable)++; > - > - page++; > + pfn++; > continue; > } > > @@ -2280,6 +2281,7 @@ static int move_freepages(struct zone *zone, > order = page_order(page); > move_to_free_area(page, &zone->free_area[order], migratetype); > page += 1 << order; You can drop this now as well, no? > + pfn += 1 << order; > pages_moved += 1 << order; > } > > @@ -2289,25 +2291,22 @@ static int move_freepages(struct zone *zone, > int move_freepages_block(struct zone *zone, struct page *page, > int migratetype, int *num_movable) > { > - unsigned long start_pfn, end_pfn; > - struct page *start_page, *end_page; > + unsigned long start_pfn, end_pfn, pfn; > > if (num_movable) > *num_movable = 0; > > - start_pfn = page_to_pfn(page); > + pfn = start_pfn = page_to_pfn(page); pfn = page_to_pfn(page); and ... > start_pfn = start_pfn & ~(pageblock_nr_pages-1); ... start_pfn = pfn & ~(pageblock_nr_pages - 1); instead? > - start_page = pfn_to_page(start_pfn); > - end_page = start_page + pageblock_nr_pages - 1; > end_pfn = start_pfn + pageblock_nr_pages - 1; > > /* Do not cross zone boundaries */ > if (!zone_spans_pfn(zone, start_pfn)) > - start_page = page; > + start_pfn = pfn; > if (!zone_spans_pfn(zone, end_pfn)) > return 0; > > - return move_freepages(zone, start_page, end_page, migratetype, > + return move_freepages(zone, start_pfn, end_pfn, migratetype, > num_movable); > } > >
On 2019/11/27 18:47, David Hildenbrand wrote: > [...] > >> static int move_freepages(struct zone *zone, >> - struct page *start_page, struct page *end_page, >> + unsigned long start_pfn, unsigned long end_pfn, >> int migratetype, int *num_movable) >> { >> struct page *page; >> + unsigned long pfn; >> unsigned int order; >> int pages_moved = 0; >> >> - for (page = start_page; page <= end_page;) { >> - if (!pfn_valid_within(page_to_pfn(page))) { >> - page++; >> + for (pfn = start_pfn; pfn <= end_pfn;) { >> + if (!pfn_valid_within(pfn)) { >> + pfn++; >> continue; >> } >> >> + page = pfn_to_page(pfn); >> if (!PageBuddy(page)) { >> /* >> * We assume that pages that could be isolated for >> @@ -2268,8 +2270,7 @@ static int move_freepages(struct zone *zone, >> if (num_movable && >> (PageLRU(page) || __PageMovable(page))) >> (*num_movable)++; >> - >> - page++; >> + pfn++; >> continue; >> } >> >> @@ -2280,6 +2281,7 @@ static int move_freepages(struct zone *zone, >> order = page_order(page); >> move_to_free_area(page, &zone->free_area[order], migratetype); >> page += 1 << order; > > You can drop this now as well, no? should do it > >> + pfn += 1 << order; >> pages_moved += 1 << order; >> } >> >> @@ -2289,25 +2291,22 @@ static int move_freepages(struct zone *zone, >> int move_freepages_block(struct zone *zone, struct page *page, >> int migratetype, int *num_movable) >> { >> - unsigned long start_pfn, end_pfn; >> - struct page *start_page, *end_page; >> + unsigned long start_pfn, end_pfn, pfn; >> >> if (num_movable) >> *num_movable = 0; >> >> - start_pfn = page_to_pfn(page); >> + pfn = start_pfn = page_to_pfn(page); > > pfn = page_to_pfn(page); > > and ... > >> start_pfn = start_pfn & ~(pageblock_nr_pages-1); > > ... > > start_pfn = pfn & ~(pageblock_nr_pages - 1); > > instead? will change, thanks for your comments. > >> - start_page = pfn_to_page(start_pfn); >> - end_page = start_page + pageblock_nr_pages - 1; >> end_pfn = start_pfn + pageblock_nr_pages - 1; >> >> /* Do not cross zone boundaries */ >> if (!zone_spans_pfn(zone, start_pfn)) >> - start_page = page; >> + start_pfn = pfn; >> if (!zone_spans_pfn(zone, end_pfn)) >> return 0; >> >> - return move_freepages(zone, start_page, end_page, migratetype, >> + return move_freepages(zone, start_pfn, end_pfn, migratetype, >> num_movable); >> } >> >> > >
On Wed 27-11-19 18:28:00, Kefeng Wang wrote: > The start_pfn and end_pfn are already available in move_freepages_block(), > pfn_valid_within() should validate pfn first before touching the page, > or we might access an unitialized page with CONFIG_HOLES_IN_ZONE configs. > > Cc: Andrew Morton <akpm@linux-foundation.org> > Cc: Michal Hocko <mhocko@suse.com> > Cc: Vlastimil Babka <vbabka@suse.cz> > Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> > --- > > Here is an oops in 4.4(arm64 enabled CONFIG_HOLES_IN_ZONE), Is this reproducible with the current upstream kernel? There were large changes in this aread since 4.4 Btw. the below should be part of the changelog. > Unable to handle kernel NULL pointer dereference at virtual address 00000000 > pgd = ffffff8008f7e000 > [00000000] *pgd=0000000017ffe003, *pud=0000000017ffe003, *pmd=0000000000000000 > Internal error: Oops: 96000007 [#1] SMP > CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W O 4.4.185 #1 > > PC is at move_freepages+0x80/0x10c > LR is at move_freepages_block+0xd4/0xf4 > pc : [<ffffff80083332e8>] lr : [<ffffff8008333448>] pstate: 80000085 > [...] > [<ffffff80083332e8>] move_freepages+0x80/0x10c > [<ffffff8008333448>] move_freepages_block+0xd4/0xf4 > [<ffffff8008335414>] __rmqueue+0x2bc/0x44c > [<ffffff800833580c>] get_page_from_freelist+0x268/0x600 > [<ffffff8008335e84>] __alloc_pages_nodemask+0x184/0x88c > [<ffffff800837fae8>] new_slab+0xd0/0x494 > [<ffffff8008381834>] ___slab_alloc.constprop.29+0x1c8/0x2e8 > [<ffffff80083819a8>] __slab_alloc.constprop.28+0x54/0x84 > [<ffffff8008381e68>] kmem_cache_alloc+0x64/0x198 > [<ffffff80085b04e0>] __build_skb+0x44/0xa4 > [<ffffff80085b06e4>] __netdev_alloc_skb+0xe4/0x134 > > mm/page_alloc.c | 25 ++++++++++++------------- > 1 file changed, 12 insertions(+), 13 deletions(-) > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index f391c0c4ed1d..59f2c2b860fe 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -2246,19 +2246,21 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone, > * boundary. If alignment is required, use move_freepages_block() > */ > static int move_freepages(struct zone *zone, > - struct page *start_page, struct page *end_page, > + unsigned long start_pfn, unsigned long end_pfn, > int migratetype, int *num_movable) > { > struct page *page; > + unsigned long pfn; > unsigned int order; > int pages_moved = 0; > > - for (page = start_page; page <= end_page;) { > - if (!pfn_valid_within(page_to_pfn(page))) { > - page++; > + for (pfn = start_pfn; pfn <= end_pfn;) { > + if (!pfn_valid_within(pfn)) { > + pfn++; > continue; > } > > + page = pfn_to_page(pfn); > if (!PageBuddy(page)) { > /* > * We assume that pages that could be isolated for > @@ -2268,8 +2270,7 @@ static int move_freepages(struct zone *zone, > if (num_movable && > (PageLRU(page) || __PageMovable(page))) > (*num_movable)++; > - > - page++; > + pfn++; > continue; > } > > @@ -2280,6 +2281,7 @@ static int move_freepages(struct zone *zone, > order = page_order(page); > move_to_free_area(page, &zone->free_area[order], migratetype); > page += 1 << order; > + pfn += 1 << order; > pages_moved += 1 << order; > } > > @@ -2289,25 +2291,22 @@ static int move_freepages(struct zone *zone, > int move_freepages_block(struct zone *zone, struct page *page, > int migratetype, int *num_movable) > { > - unsigned long start_pfn, end_pfn; > - struct page *start_page, *end_page; > + unsigned long start_pfn, end_pfn, pfn; > > if (num_movable) > *num_movable = 0; > > - start_pfn = page_to_pfn(page); > + pfn = start_pfn = page_to_pfn(page); > start_pfn = start_pfn & ~(pageblock_nr_pages-1); > - start_page = pfn_to_page(start_pfn); > - end_page = start_page + pageblock_nr_pages - 1; > end_pfn = start_pfn + pageblock_nr_pages - 1; > > /* Do not cross zone boundaries */ > if (!zone_spans_pfn(zone, start_pfn)) > - start_page = page; > + start_pfn = pfn; > if (!zone_spans_pfn(zone, end_pfn)) > return 0; > > - return move_freepages(zone, start_page, end_page, migratetype, > + return move_freepages(zone, start_pfn, end_pfn, migratetype, > num_movable); > } > > -- > 2.20.1 >
On 2019/11/27 19:47, Michal Hocko wrote: > On Wed 27-11-19 18:28:00, Kefeng Wang wrote: >> The start_pfn and end_pfn are already available in move_freepages_block(), >> pfn_valid_within() should validate pfn first before touching the page, >> or we might access an unitialized page with CONFIG_HOLES_IN_ZONE configs. >> >> Cc: Andrew Morton <akpm@linux-foundation.org> >> Cc: Michal Hocko <mhocko@suse.com> >> Cc: Vlastimil Babka <vbabka@suse.cz> >> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> >> --- >> >> Here is an oops in 4.4(arm64 enabled CONFIG_HOLES_IN_ZONE), > > Is this reproducible with the current upstream kernel? There were large > changes in this aread since 4.4 Our inner tester found this oops twice, but couldn't be reproduced for now, even in 4.4 kernel, still trying... But the page_to_pfn() shouldn't be used in move_freepages(), right? ; ) > > Btw. the below should be part of the changelog. Ok, will resend. > >> Unable to handle kernel NULL pointer dereference at virtual address 00000000 >> pgd = ffffff8008f7e000 >> [00000000] *pgd=0000000017ffe003, *pud=0000000017ffe003, *pmd=0000000000000000 >> Internal error: Oops: 96000007 [#1] SMP >> CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W O 4.4.185 #1 >> >> PC is at move_freepages+0x80/0x10c >> LR is at move_freepages_block+0xd4/0xf4 >> pc : [<ffffff80083332e8>] lr : [<ffffff8008333448>] pstate: 80000085 >> [...] >> [<ffffff80083332e8>] move_freepages+0x80/0x10c >> [<ffffff8008333448>] move_freepages_block+0xd4/0xf4 >> [<ffffff8008335414>] __rmqueue+0x2bc/0x44c >> [<ffffff800833580c>] get_page_from_freelist+0x268/0x600 >> [<ffffff8008335e84>] __alloc_pages_nodemask+0x184/0x88c >> [<ffffff800837fae8>] new_slab+0xd0/0x494 >> [<ffffff8008381834>] ___slab_alloc.constprop.29+0x1c8/0x2e8 >> [<ffffff80083819a8>] __slab_alloc.constprop.28+0x54/0x84 >> [<ffffff8008381e68>] kmem_cache_alloc+0x64/0x198 >> [<ffffff80085b04e0>] __build_skb+0x44/0xa4 >> [<ffffff80085b06e4>] __netdev_alloc_skb+0xe4/0x134 >> \
On Wed 27-11-19 21:13:00, Kefeng Wang wrote: > > > On 2019/11/27 19:47, Michal Hocko wrote: > > On Wed 27-11-19 18:28:00, Kefeng Wang wrote: > >> The start_pfn and end_pfn are already available in move_freepages_block(), > >> pfn_valid_within() should validate pfn first before touching the page, > >> or we might access an unitialized page with CONFIG_HOLES_IN_ZONE configs. > >> > >> Cc: Andrew Morton <akpm@linux-foundation.org> > >> Cc: Michal Hocko <mhocko@suse.com> > >> Cc: Vlastimil Babka <vbabka@suse.cz> > >> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> > >> --- > >> > >> Here is an oops in 4.4(arm64 enabled CONFIG_HOLES_IN_ZONE), > > > > Is this reproducible with the current upstream kernel? There were large > > changes in this aread since 4.4 > > Our inner tester found this oops twice, but couldn't be reproduced for now, > even in 4.4 kernel, still trying... > > But the page_to_pfn() shouldn't be used in move_freepages(), right? ; ) Well, I do agree that going back and forth between page and pfn is ugly. So this as a cleanup makes sense to me. But you are trying to fix a bug and that bug should be explained. NULL ptr dereference sounds like a memmap is not allocated for the particular pfn and this is a bit unexpected even with holes, at least on x86, maybe arm64 allows that. But the changelog should be clear about all this rather than paper over a deeper problem potentially. Please also make sure to involve arm64 people.
> On Nov 27, 2019, at 9:13 AM, Michal Hocko <mhocko@suse.com> wrote: > > On Wed 27-11-19 21:13:00, Kefeng Wang wrote: >> >> >> On 2019/11/27 19:47, Michal Hocko wrote: >>> On Wed 27-11-19 18:28:00, Kefeng Wang wrote: >>>> The start_pfn and end_pfn are already available in move_freepages_block(), >>>> pfn_valid_within() should validate pfn first before touching the page, >>>> or we might access an unitialized page with CONFIG_HOLES_IN_ZONE configs. >>>> >>>> Cc: Andrew Morton <akpm@linux-foundation.org> >>>> Cc: Michal Hocko <mhocko@suse.com> >>>> Cc: Vlastimil Babka <vbabka@suse.cz> >>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> >>>> --- >>>> >>>> Here is an oops in 4.4(arm64 enabled CONFIG_HOLES_IN_ZONE), >>> >>> Is this reproducible with the current upstream kernel? There were large >>> changes in this aread since 4.4 >> >> Our inner tester found this oops twice, but couldn't be reproduced for now, >> even in 4.4 kernel, still trying... >> >> But the page_to_pfn() shouldn't be used in move_freepages(), right? ; ) > > Well, I do agree that going back and forth between page and pfn is ugly. > So this as a cleanup makes sense to me. But you are trying to fix a bug > and that bug should be explained. NULL ptr dereference sounds like a > memmap is not allocated for the particular pfn and this is a bit > unexpected even with holes, at least on x86, maybe arm64 allows that. > But the changelog should be clear about all this rather than paper over > a deeper problem potentially. Please also make sure to involve arm64 > people. Indeed. Too many times people are only able to reproduce the issues on old kernels but insist to forward-fix the mainline as well which only bring unstable there.
On 2019/11/27 22:13, Michal Hocko wrote: > On Wed 27-11-19 21:13:00, Kefeng Wang wrote: >> >> >> On 2019/11/27 19:47, Michal Hocko wrote: >>> On Wed 27-11-19 18:28:00, Kefeng Wang wrote: >>>> The start_pfn and end_pfn are already available in move_freepages_block(), >>>> pfn_valid_within() should validate pfn first before touching the page, >>>> or we might access an unitialized page with CONFIG_HOLES_IN_ZONE configs. >>>> >>>> Cc: Andrew Morton <akpm@linux-foundation.org> >>>> Cc: Michal Hocko <mhocko@suse.com> >>>> Cc: Vlastimil Babka <vbabka@suse.cz> >>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> >>>> --- >>>> >>>> Here is an oops in 4.4(arm64 enabled CONFIG_HOLES_IN_ZONE), >>> >>> Is this reproducible with the current upstream kernel? There were large >>> changes in this aread since 4.4 >> >> Our inner tester found this oops twice, but couldn't be reproduced for now, >> even in 4.4 kernel, still trying... >> >> But the page_to_pfn() shouldn't be used in move_freepages(), right? ; ) > > Well, I do agree that going back and forth between page and pfn is ugly. > So this as a cleanup makes sense to me. But you are trying to fix a bug > and that bug should be explained. NULL ptr dereference sounds like a > memmap is not allocated for the particular pfn and this is a bit > unexpected even with holes, at least on x86, maybe arm64 allows that. > But the changelog should be clear about all this rather than paper over > a deeper problem potentially. Please also make sure to involve arm64 > people. I'm still trying to reproduce it on 4.4 and 5.4, add Catalin, Will Mark, could you give some advice on it, thanks. https://lore.kernel.org/linux-mm/54064878-ea85-247a-3382-b96ddf97c667@huawei.com/T/#m87c545730a0a00c45e042937593c59f6552d1246 note: We backport numa patches into 4.4, so the CONFIG_HOLES_IN_ZONE is enabled. # CONFIG_NUMA is not set CONFIG_HOLES_IN_ZONE=y CONFIG_SPARSEMEM_MANUAL=y CONFIG_SPARSEMEM=y CONFIG_HAVE_MEMORY_PRESENT=y CONFIG_SPARSEMEM_EXTREME=y CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER=y # CONFIG_SPARSEMEM_VMEMMAP is not set >
> On Nov 27, 2019, at 9:39 AM, Kefeng Wang <wangkefeng.wang@huawei.com> wrote: > > > > On 2019/11/27 22:13, Michal Hocko wrote: >> On Wed 27-11-19 21:13:00, Kefeng Wang wrote: >>> >>> >>> On 2019/11/27 19:47, Michal Hocko wrote: >>>> On Wed 27-11-19 18:28:00, Kefeng Wang wrote: >>>>> The start_pfn and end_pfn are already available in move_freepages_block(), >>>>> pfn_valid_within() should validate pfn first before touching the page, >>>>> or we might access an unitialized page with CONFIG_HOLES_IN_ZONE configs. >>>>> >>>>> Cc: Andrew Morton <akpm@linux-foundation.org> >>>>> Cc: Michal Hocko <mhocko@suse.com> >>>>> Cc: Vlastimil Babka <vbabka@suse.cz> >>>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> >>>>> --- >>>>> >>>>> Here is an oops in 4.4(arm64 enabled CONFIG_HOLES_IN_ZONE), >>>> >>>> Is this reproducible with the current upstream kernel? There were large >>>> changes in this aread since 4.4 >>> >>> Our inner tester found this oops twice, but couldn't be reproduced for now, >>> even in 4.4 kernel, still trying... >>> >>> But the page_to_pfn() shouldn't be used in move_freepages(), right? ; ) >> >> Well, I do agree that going back and forth between page and pfn is ugly. >> So this as a cleanup makes sense to me. But you are trying to fix a bug >> and that bug should be explained. NULL ptr dereference sounds like a >> memmap is not allocated for the particular pfn and this is a bit >> unexpected even with holes, at least on x86, maybe arm64 allows that. >> But the changelog should be clear about all this rather than paper over >> a deeper problem potentially. Please also make sure to involve arm64 >> people. > > I'm still trying to reproduce it on 4.4 and 5.4, add Catalin, Will Mark, > could you give some advice on it, thanks. > > https://lore.kernel.org/linux-mm/54064878-ea85-247a-3382-b96ddf97c667@huawei.com/T/#m87c545730a0a00c45e042937593c59f6552d1246 > > note: > We backport numa patches into 4.4, so the CONFIG_HOLES_IN_ZONE is enabled. > > # CONFIG_NUMA is not set > CONFIG_HOLES_IN_ZONE=y > > CONFIG_SPARSEMEM_MANUAL=y > CONFIG_SPARSEMEM=y > CONFIG_HAVE_MEMORY_PRESENT=y > CONFIG_SPARSEMEM_EXTREME=y > CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y > CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER=y > # CONFIG_SPARSEMEM_VMEMMAP is not set See the commit b13bc35193d9 (“mm/hotplug: invalid PFNs from pfn_to_online_page()”) where it lists a lot of code churn in that space that might indicate there are many commits missing in your kernel.
On 27.11.19 15:13, Michal Hocko wrote: > On Wed 27-11-19 21:13:00, Kefeng Wang wrote: >> >> >> On 2019/11/27 19:47, Michal Hocko wrote: >>> On Wed 27-11-19 18:28:00, Kefeng Wang wrote: >>>> The start_pfn and end_pfn are already available in move_freepages_block(), >>>> pfn_valid_within() should validate pfn first before touching the page, >>>> or we might access an unitialized page with CONFIG_HOLES_IN_ZONE configs. >>>> >>>> Cc: Andrew Morton <akpm@linux-foundation.org> >>>> Cc: Michal Hocko <mhocko@suse.com> >>>> Cc: Vlastimil Babka <vbabka@suse.cz> >>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> >>>> --- >>>> >>>> Here is an oops in 4.4(arm64 enabled CONFIG_HOLES_IN_ZONE), >>> >>> Is this reproducible with the current upstream kernel? There were large >>> changes in this aread since 4.4 >> >> Our inner tester found this oops twice, but couldn't be reproduced for now, >> even in 4.4 kernel, still trying... >> >> But the page_to_pfn() shouldn't be used in move_freepages(), right? ; ) > > Well, I do agree that going back and forth between page and pfn is ugly. > So this as a cleanup makes sense to me. But you are trying to fix a bug > and that bug should be explained. NULL ptr dereference sounds like a > memmap is not allocated for the particular pfn and this is a bit > unexpected even with holes, at least on x86, maybe arm64 allows that. AFAIK ARM allows that. (and arm64) It's basically CONFIG_HAVE_ARCH_PFN_VALID (and CONFIG_HOLES_IN_ZONE if I am not wrong) commit eb33575cf67d3f35fa2510210ef92631266e2465 Author: Mel Gorman <mel@csn.ul.ie> Date: Wed May 13 17:34:48 2009 +0100 [ARM] Double check memmap is actually valid with a memmap has unexpected holes V2 pfn_valid() is meant to be able to tell if a given PFN has valid memmap associated with it or not. In FLATMEM, it is expected that holes always have valid memmap as long as there is valid PFNs either side of the hole. In SPARSEMEM, it is assumed that a valid section has a memmap for the entire section. However, ARM and maybe other embedded architectures in the future free memmap backing holes to save memory on the assumption the memmap is never used. The page_zone linkages are then broken even though pfn_valid() returns true. A walker of the full memmap must then do this additional check to ensure the memmap they are looking at is sane by making sure the zone and PFN linkages are still valid. This is expensive, but walkers of the full memmap are extremely rare. [...] And commit 7b7bf499f79de3f6c85a340c8453a78789523f85 Author: Will Deacon <will.deacon@arm.com> Date: Thu May 19 13:21:14 2011 +0100 ARM: 6913/1: sparsemem: allow pfn_valid to be overridden when using SPARSEMEM Side note: I find overriding pfn_valid() extremely ugly ... ... and CONFIG_HOLES_IN_ZONE as well. > But the changelog should be clear about all this rather than paper over > a deeper problem potentially. Please also make sure to involve arm64 > people. >
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f391c0c4ed1d..59f2c2b860fe 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2246,19 +2246,21 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone, * boundary. If alignment is required, use move_freepages_block() */ static int move_freepages(struct zone *zone, - struct page *start_page, struct page *end_page, + unsigned long start_pfn, unsigned long end_pfn, int migratetype, int *num_movable) { struct page *page; + unsigned long pfn; unsigned int order; int pages_moved = 0; - for (page = start_page; page <= end_page;) { - if (!pfn_valid_within(page_to_pfn(page))) { - page++; + for (pfn = start_pfn; pfn <= end_pfn;) { + if (!pfn_valid_within(pfn)) { + pfn++; continue; } + page = pfn_to_page(pfn); if (!PageBuddy(page)) { /* * We assume that pages that could be isolated for @@ -2268,8 +2270,7 @@ static int move_freepages(struct zone *zone, if (num_movable && (PageLRU(page) || __PageMovable(page))) (*num_movable)++; - - page++; + pfn++; continue; } @@ -2280,6 +2281,7 @@ static int move_freepages(struct zone *zone, order = page_order(page); move_to_free_area(page, &zone->free_area[order], migratetype); page += 1 << order; + pfn += 1 << order; pages_moved += 1 << order; } @@ -2289,25 +2291,22 @@ static int move_freepages(struct zone *zone, int move_freepages_block(struct zone *zone, struct page *page, int migratetype, int *num_movable) { - unsigned long start_pfn, end_pfn; - struct page *start_page, *end_page; + unsigned long start_pfn, end_pfn, pfn; if (num_movable) *num_movable = 0; - start_pfn = page_to_pfn(page); + pfn = start_pfn = page_to_pfn(page); start_pfn = start_pfn & ~(pageblock_nr_pages-1); - start_page = pfn_to_page(start_pfn); - end_page = start_page + pageblock_nr_pages - 1; end_pfn = start_pfn + pageblock_nr_pages - 1; /* Do not cross zone boundaries */ if (!zone_spans_pfn(zone, start_pfn)) - start_page = page; + start_pfn = pfn; if (!zone_spans_pfn(zone, end_pfn)) return 0; - return move_freepages(zone, start_page, end_page, migratetype, + return move_freepages(zone, start_pfn, end_pfn, migratetype, num_movable); }
The start_pfn and end_pfn are already available in move_freepages_block(), pfn_valid_within() should validate pfn first before touching the page, or we might access an unitialized page with CONFIG_HOLES_IN_ZONE configs. Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> --- Here is an oops in 4.4(arm64 enabled CONFIG_HOLES_IN_ZONE), Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = ffffff8008f7e000 [00000000] *pgd=0000000017ffe003, *pud=0000000017ffe003, *pmd=0000000000000000 Internal error: Oops: 96000007 [#1] SMP CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W O 4.4.185 #1 PC is at move_freepages+0x80/0x10c LR is at move_freepages_block+0xd4/0xf4 pc : [<ffffff80083332e8>] lr : [<ffffff8008333448>] pstate: 80000085 [...] [<ffffff80083332e8>] move_freepages+0x80/0x10c [<ffffff8008333448>] move_freepages_block+0xd4/0xf4 [<ffffff8008335414>] __rmqueue+0x2bc/0x44c [<ffffff800833580c>] get_page_from_freelist+0x268/0x600 [<ffffff8008335e84>] __alloc_pages_nodemask+0x184/0x88c [<ffffff800837fae8>] new_slab+0xd0/0x494 [<ffffff8008381834>] ___slab_alloc.constprop.29+0x1c8/0x2e8 [<ffffff80083819a8>] __slab_alloc.constprop.28+0x54/0x84 [<ffffff8008381e68>] kmem_cache_alloc+0x64/0x198 [<ffffff80085b04e0>] __build_skb+0x44/0xa4 [<ffffff80085b06e4>] __netdev_alloc_skb+0xe4/0x134 mm/page_alloc.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-)