diff mbox series

[v2] mm/sparse.c: Use kvmalloc_node/kvfree to alloc/free memmap for the classic sparse

Message ID 20200312130822.6589-1-bhe@redhat.com (mailing list archive)
State New, archived
Headers show
Series [v2] mm/sparse.c: Use kvmalloc_node/kvfree to alloc/free memmap for the classic sparse | expand

Commit Message

Baoquan He March 12, 2020, 1:08 p.m. UTC
This change makes populate_section_memmap()/depopulate_section_memmap
much simpler.

Suggested-by: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Baoquan He <bhe@redhat.com>
---
v1->v2:
  The old version only used __get_free_pages() to replace alloc_pages()
  in populate_section_memmap().
  http://lkml.kernel.org/r/20200307084229.28251-8-bhe@redhat.com

 mm/sparse.c | 27 +++------------------------
 1 file changed, 3 insertions(+), 24 deletions(-)

Comments

Matthew Wilcox March 12, 2020, 1:34 p.m. UTC | #1
On Thu, Mar 12, 2020 at 09:08:22PM +0800, Baoquan He wrote:
> This change makes populate_section_memmap()/depopulate_section_memmap
> much simpler.
> 
> Suggested-by: Michal Hocko <mhocko@kernel.org>
> Signed-off-by: Baoquan He <bhe@redhat.com>
> ---
> v1->v2:
>   The old version only used __get_free_pages() to replace alloc_pages()
>   in populate_section_memmap().
>   http://lkml.kernel.org/r/20200307084229.28251-8-bhe@redhat.com
> 
>  mm/sparse.c | 27 +++------------------------
>  1 file changed, 3 insertions(+), 24 deletions(-)
> 
> diff --git a/mm/sparse.c b/mm/sparse.c
> index bf6c00a28045..362018e82e22 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -734,35 +734,14 @@ static void free_map_bootmem(struct page *memmap)
>  struct page * __meminit populate_section_memmap(unsigned long pfn,
>  		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
>  {
> -	struct page *page, *ret;
> -	unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
> -
> -	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
> -	if (page)
> -		goto got_map_page;
> -
> -	ret = vmalloc(memmap_size);
> -	if (ret)
> -		goto got_map_ptr;
> -
> -	return NULL;
> -got_map_page:
> -	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
> -got_map_ptr:
> -
> -	return ret;
> +	return kvmalloc_node(sizeof(struct page) * PAGES_PER_SECTION,
> +			     GFP_KERNEL|__GFP_NOWARN, nid);

Use of NOWARN here is inappropriate, because there's no fallback.
Also, I'd use array_size(sizeof(struct page), PAGES_PER_SECTION).
Baoquan He March 12, 2020, 1:54 p.m. UTC | #2
On 03/12/20 at 06:34am, Matthew Wilcox wrote:
> On Thu, Mar 12, 2020 at 09:08:22PM +0800, Baoquan He wrote:
> > This change makes populate_section_memmap()/depopulate_section_memmap
> > much simpler.
> > 
> > Suggested-by: Michal Hocko <mhocko@kernel.org>
> > Signed-off-by: Baoquan He <bhe@redhat.com>
> > ---
> > v1->v2:
> >   The old version only used __get_free_pages() to replace alloc_pages()
> >   in populate_section_memmap().
> >   http://lkml.kernel.org/r/20200307084229.28251-8-bhe@redhat.com
> > 
> >  mm/sparse.c | 27 +++------------------------
> >  1 file changed, 3 insertions(+), 24 deletions(-)
> > 
> > diff --git a/mm/sparse.c b/mm/sparse.c
> > index bf6c00a28045..362018e82e22 100644
> > --- a/mm/sparse.c
> > +++ b/mm/sparse.c
> > @@ -734,35 +734,14 @@ static void free_map_bootmem(struct page *memmap)
> >  struct page * __meminit populate_section_memmap(unsigned long pfn,
> >  		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
> >  {
> > -	struct page *page, *ret;
> > -	unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
> > -
> > -	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
> > -	if (page)
> > -		goto got_map_page;
> > -
> > -	ret = vmalloc(memmap_size);
> > -	if (ret)
> > -		goto got_map_ptr;
> > -
> > -	return NULL;
> > -got_map_page:
> > -	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
> > -got_map_ptr:
> > -
> > -	return ret;
> > +	return kvmalloc_node(sizeof(struct page) * PAGES_PER_SECTION,
> > +			     GFP_KERNEL|__GFP_NOWARN, nid);
> 
> Use of NOWARN here is inappropriate, because there's no fallback.

kvmalloc_node has added __GFP_NOWARN internally when try to allocate
continuous pages. I will remove it.

> Also, I'd use array_size(sizeof(struct page), PAGES_PER_SECTION).

It's fine to me, even though we know it has no risk to overflow. I will
use array_size. Thanks.
Wei Yang March 12, 2020, 2:18 p.m. UTC | #3
On Thu, Mar 12, 2020 at 06:34:16AM -0700, Matthew Wilcox wrote:
>On Thu, Mar 12, 2020 at 09:08:22PM +0800, Baoquan He wrote:
>> This change makes populate_section_memmap()/depopulate_section_memmap
>> much simpler.
>> 
>> Suggested-by: Michal Hocko <mhocko@kernel.org>
>> Signed-off-by: Baoquan He <bhe@redhat.com>
>> ---
>> v1->v2:
>>   The old version only used __get_free_pages() to replace alloc_pages()
>>   in populate_section_memmap().
>>   http://lkml.kernel.org/r/20200307084229.28251-8-bhe@redhat.com
>> 
>>  mm/sparse.c | 27 +++------------------------
>>  1 file changed, 3 insertions(+), 24 deletions(-)
>> 
>> diff --git a/mm/sparse.c b/mm/sparse.c
>> index bf6c00a28045..362018e82e22 100644
>> --- a/mm/sparse.c
>> +++ b/mm/sparse.c
>> @@ -734,35 +734,14 @@ static void free_map_bootmem(struct page *memmap)
>>  struct page * __meminit populate_section_memmap(unsigned long pfn,
>>  		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
>>  {
>> -	struct page *page, *ret;
>> -	unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
>> -
>> -	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
>> -	if (page)
>> -		goto got_map_page;
>> -
>> -	ret = vmalloc(memmap_size);
>> -	if (ret)
>> -		goto got_map_ptr;
>> -
>> -	return NULL;
>> -got_map_page:
>> -	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
>> -got_map_ptr:
>> -
>> -	return ret;
>> +	return kvmalloc_node(sizeof(struct page) * PAGES_PER_SECTION,
>> +			     GFP_KERNEL|__GFP_NOWARN, nid);
>
>Use of NOWARN here is inappropriate, because there's no fallback.

Hmm... this replacement is a little tricky.

When you look into kvmalloc_node(), it will do the fallback if the size is
bigger than PAGE_SIZE. This means the change here may not be equivalent as
before if memmap_size is less than PAGE_SIZE.

For example if :
  PAGE_SIZE = 64K 
  SECTION_SIZE = 128M

would lead to memmap_size = 2K, which is less than PAGE_SIZE.

Not sure this combination would happen?

>Also, I'd use array_size(sizeof(struct page), PAGES_PER_SECTION).
Matthew Wilcox March 12, 2020, 2:25 p.m. UTC | #4
On Thu, Mar 12, 2020 at 02:18:26PM +0000, Wei Yang wrote:
> On Thu, Mar 12, 2020 at 06:34:16AM -0700, Matthew Wilcox wrote:
> >On Thu, Mar 12, 2020 at 09:08:22PM +0800, Baoquan He wrote:
> >> This change makes populate_section_memmap()/depopulate_section_memmap
> >> much simpler.
> >> 
> >> Suggested-by: Michal Hocko <mhocko@kernel.org>
> >> Signed-off-by: Baoquan He <bhe@redhat.com>
> >> ---
> >> v1->v2:
> >>   The old version only used __get_free_pages() to replace alloc_pages()
> >>   in populate_section_memmap().
> >>   http://lkml.kernel.org/r/20200307084229.28251-8-bhe@redhat.com
> >> 
> >>  mm/sparse.c | 27 +++------------------------
> >>  1 file changed, 3 insertions(+), 24 deletions(-)
> >> 
> >> diff --git a/mm/sparse.c b/mm/sparse.c
> >> index bf6c00a28045..362018e82e22 100644
> >> --- a/mm/sparse.c
> >> +++ b/mm/sparse.c
> >> @@ -734,35 +734,14 @@ static void free_map_bootmem(struct page *memmap)
> >>  struct page * __meminit populate_section_memmap(unsigned long pfn,
> >>  		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
> >>  {
> >> -	struct page *page, *ret;
> >> -	unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
> >> -
> >> -	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
> >> -	if (page)
> >> -		goto got_map_page;
> >> -
> >> -	ret = vmalloc(memmap_size);
> >> -	if (ret)
> >> -		goto got_map_ptr;
> >> -
> >> -	return NULL;
> >> -got_map_page:
> >> -	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
> >> -got_map_ptr:
> >> -
> >> -	return ret;
> >> +	return kvmalloc_node(sizeof(struct page) * PAGES_PER_SECTION,
> >> +			     GFP_KERNEL|__GFP_NOWARN, nid);
> >
> >Use of NOWARN here is inappropriate, because there's no fallback.
> 
> Hmm... this replacement is a little tricky.
> 
> When you look into kvmalloc_node(), it will do the fallback if the size is
> bigger than PAGE_SIZE. This means the change here may not be equivalent as
> before if memmap_size is less than PAGE_SIZE.
> 
> For example if :
>   PAGE_SIZE = 64K 
>   SECTION_SIZE = 128M
> 
> would lead to memmap_size = 2K, which is less than PAGE_SIZE.

Yes, I thought about that.  I decided it wasn't a problem, as long as
the struct page remains aligned, and we now have a guarantee that allocations
above 512 bytes in size are aligned.  With a 64 byte struct page, as long
as we're allocating at least 8 pages, we know it'll be naturally aligned.

Your calculation doesn't take into account the size of struct page.
128M / 64k is indeed 2k, but you forgot to multiply by 64, which takes
us to 128kB.
Wei Yang March 12, 2020, 10:50 p.m. UTC | #5
On Thu, Mar 12, 2020 at 07:25:35AM -0700, Matthew Wilcox wrote:
>On Thu, Mar 12, 2020 at 02:18:26PM +0000, Wei Yang wrote:
>> On Thu, Mar 12, 2020 at 06:34:16AM -0700, Matthew Wilcox wrote:
>> >On Thu, Mar 12, 2020 at 09:08:22PM +0800, Baoquan He wrote:
>> >> This change makes populate_section_memmap()/depopulate_section_memmap
>> >> much simpler.
>> >> 
>> >> Suggested-by: Michal Hocko <mhocko@kernel.org>
>> >> Signed-off-by: Baoquan He <bhe@redhat.com>
>> >> ---
>> >> v1->v2:
>> >>   The old version only used __get_free_pages() to replace alloc_pages()
>> >>   in populate_section_memmap().
>> >>   http://lkml.kernel.org/r/20200307084229.28251-8-bhe@redhat.com
>> >> 
>> >>  mm/sparse.c | 27 +++------------------------
>> >>  1 file changed, 3 insertions(+), 24 deletions(-)
>> >> 
>> >> diff --git a/mm/sparse.c b/mm/sparse.c
>> >> index bf6c00a28045..362018e82e22 100644
>> >> --- a/mm/sparse.c
>> >> +++ b/mm/sparse.c
>> >> @@ -734,35 +734,14 @@ static void free_map_bootmem(struct page *memmap)
>> >>  struct page * __meminit populate_section_memmap(unsigned long pfn,
>> >>  		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
>> >>  {
>> >> -	struct page *page, *ret;
>> >> -	unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
>> >> -
>> >> -	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
>> >> -	if (page)
>> >> -		goto got_map_page;
>> >> -
>> >> -	ret = vmalloc(memmap_size);
>> >> -	if (ret)
>> >> -		goto got_map_ptr;
>> >> -
>> >> -	return NULL;
>> >> -got_map_page:
>> >> -	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
>> >> -got_map_ptr:
>> >> -
>> >> -	return ret;
>> >> +	return kvmalloc_node(sizeof(struct page) * PAGES_PER_SECTION,
>> >> +			     GFP_KERNEL|__GFP_NOWARN, nid);
>> >
>> >Use of NOWARN here is inappropriate, because there's no fallback.
>> 
>> Hmm... this replacement is a little tricky.
>> 
>> When you look into kvmalloc_node(), it will do the fallback if the size is
>> bigger than PAGE_SIZE. This means the change here may not be equivalent as
>> before if memmap_size is less than PAGE_SIZE.
>> 
>> For example if :
>>   PAGE_SIZE = 64K 
>>   SECTION_SIZE = 128M
>> 
>> would lead to memmap_size = 2K, which is less than PAGE_SIZE.
>
>Yes, I thought about that.  I decided it wasn't a problem, as long as
>the struct page remains aligned, and we now have a guarantee that allocations
>above 512 bytes in size are aligned.  With a 64 byte struct page, as long

Where is this 512 bytes condition comes from?

>as we're allocating at least 8 pages, we know it'll be naturally aligned.
>
>Your calculation doesn't take into account the size of struct page.
>128M / 64k is indeed 2k, but you forgot to multiply by 64, which takes
>us to 128kB.

You are right. While would there be other combination? Or in the future?

For example, there are definitions of

#define SECTION_SIZE_BITS       26
#define SECTION_SIZE_BITS       24

Are we sure it won't break some thing?
Matthew Wilcox March 13, 2020, midnight UTC | #6
On Thu, Mar 12, 2020 at 10:50:55PM +0000, Wei Yang wrote:
> On Thu, Mar 12, 2020 at 07:25:35AM -0700, Matthew Wilcox wrote:
> >Yes, I thought about that.  I decided it wasn't a problem, as long as
> >the struct page remains aligned, and we now have a guarantee that allocations
> >above 512 bytes in size are aligned.  With a 64 byte struct page, as long
> 
> Where is this 512 bytes condition comes from?

Filesystems need to do I/Os from kmalloc addresses and those I/Os need to
be 512 byte aligned.

> >as we're allocating at least 8 pages, we know it'll be naturally aligned.
> >
> >Your calculation doesn't take into account the size of struct page.
> >128M / 64k is indeed 2k, but you forgot to multiply by 64, which takes
> >us to 128kB.
> 
> You are right. While would there be other combination? Or in the future?
> 
> For example, there are definitions of
> 
> #define SECTION_SIZE_BITS       26
> #define SECTION_SIZE_BITS       24
> 
> Are we sure it won't break some thing?

As I said, once it's at least 512 bytes, it'll be 512 byte aligned.  And I
can't see us having sections smaller than 8 pages, can you?
David Hildenbrand March 13, 2020, 2:35 p.m. UTC | #7
On 13.03.20 01:00, Matthew Wilcox wrote:
> On Thu, Mar 12, 2020 at 10:50:55PM +0000, Wei Yang wrote:
>> On Thu, Mar 12, 2020 at 07:25:35AM -0700, Matthew Wilcox wrote:
>>> Yes, I thought about that.  I decided it wasn't a problem, as long as
>>> the struct page remains aligned, and we now have a guarantee that allocations
>>> above 512 bytes in size are aligned.  With a 64 byte struct page, as long
>>
>> Where is this 512 bytes condition comes from?
> 
> Filesystems need to do I/Os from kmalloc addresses and those I/Os need to
> be 512 byte aligned.
> 
>>> as we're allocating at least 8 pages, we know it'll be naturally aligned.
>>>
>>> Your calculation doesn't take into account the size of struct page.
>>> 128M / 64k is indeed 2k, but you forgot to multiply by 64, which takes
>>> us to 128kB.
>>
>> You are right. While would there be other combination? Or in the future?
>>
>> For example, there are definitions of
>>
>> #define SECTION_SIZE_BITS       26
>> #define SECTION_SIZE_BITS       24
>>
>> Are we sure it won't break some thing?
> 
> As I said, once it's at least 512 bytes, it'll be 512 byte aligned.  And I
> can't see us having sections smaller than 8 pages, can you?
> 

Smallest I am aware of is indeed special case of 16MB sections on PPC.

(If my math is correct, a 16MB section on PPC with 64k pages needs 16k
of memmap, which would be less than a 64k page, and thus, quite some
memory is wasted - but maybe my friday-afternoon-math is just wrong)
Vlastimil Babka March 13, 2020, 2:46 p.m. UTC | #8
On 3/13/20 1:00 AM, Matthew Wilcox wrote:
> On Thu, Mar 12, 2020 at 10:50:55PM +0000, Wei Yang wrote:
>> On Thu, Mar 12, 2020 at 07:25:35AM -0700, Matthew Wilcox wrote:
>> >Yes, I thought about that.  I decided it wasn't a problem, as long as
>> >the struct page remains aligned, and we now have a guarantee that allocations
>> >above 512 bytes in size are aligned.  With a 64 byte struct page, as long
>> 
>> Where is this 512 bytes condition comes from?
> 
> Filesystems need to do I/Os from kmalloc addresses and those I/Os need to
> be 512 byte aligned.

To clarify, the guarantee exist for every power of two size. The I/O usecase was
part of the motivation for the guarantee, but there is not 512 byte limit. But
that means there is also no guarantee for a non-power-of-two size above (or
below) 512 bytes. Currently this only matters for sizes that fall into the 96
byte or 192 byte caches. With SLOB it can be any size.

So what I'm saying the allocations should make sure they are power of two and
then they will be aligned. The page size of 64bytes depends on some debugging
options being disabled, right?

>> >as we're allocating at least 8 pages, we know it'll be naturally aligned.
>> >
>> >Your calculation doesn't take into account the size of struct page.
>> >128M / 64k is indeed 2k, but you forgot to multiply by 64, which takes
>> >us to 128kB.
>> 
>> You are right. While would there be other combination? Or in the future?
>> 
>> For example, there are definitions of
>> 
>> #define SECTION_SIZE_BITS       26
>> #define SECTION_SIZE_BITS       24
>> 
>> Are we sure it won't break some thing?
> 
> As I said, once it's at least 512 bytes, it'll be 512 byte aligned.  And I
> can't see us having sections smaller than 8 pages, can you?
>
Michal Hocko March 13, 2020, 2:57 p.m. UTC | #9
On Thu 12-03-20 14:18:26, Wei Yang wrote:
> On Thu, Mar 12, 2020 at 06:34:16AM -0700, Matthew Wilcox wrote:
> >On Thu, Mar 12, 2020 at 09:08:22PM +0800, Baoquan He wrote:
> >> This change makes populate_section_memmap()/depopulate_section_memmap
> >> much simpler.
> >> 
> >> Suggested-by: Michal Hocko <mhocko@kernel.org>
> >> Signed-off-by: Baoquan He <bhe@redhat.com>
> >> ---
> >> v1->v2:
> >>   The old version only used __get_free_pages() to replace alloc_pages()
> >>   in populate_section_memmap().
> >>   http://lkml.kernel.org/r/20200307084229.28251-8-bhe@redhat.com
> >> 
> >>  mm/sparse.c | 27 +++------------------------
> >>  1 file changed, 3 insertions(+), 24 deletions(-)
> >> 
> >> diff --git a/mm/sparse.c b/mm/sparse.c
> >> index bf6c00a28045..362018e82e22 100644
> >> --- a/mm/sparse.c
> >> +++ b/mm/sparse.c
> >> @@ -734,35 +734,14 @@ static void free_map_bootmem(struct page *memmap)
> >>  struct page * __meminit populate_section_memmap(unsigned long pfn,
> >>  		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
> >>  {
> >> -	struct page *page, *ret;
> >> -	unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
> >> -
> >> -	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
> >> -	if (page)
> >> -		goto got_map_page;
> >> -
> >> -	ret = vmalloc(memmap_size);
> >> -	if (ret)
> >> -		goto got_map_ptr;
> >> -
> >> -	return NULL;
> >> -got_map_page:
> >> -	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
> >> -got_map_ptr:
> >> -
> >> -	return ret;
> >> +	return kvmalloc_node(sizeof(struct page) * PAGES_PER_SECTION,
> >> +			     GFP_KERNEL|__GFP_NOWARN, nid);
> >
> >Use of NOWARN here is inappropriate, because there's no fallback.
> 
> Hmm... this replacement is a little tricky.
> 
> When you look into kvmalloc_node(), it will do the fallback if the size is
> bigger than PAGE_SIZE. This means the change here may not be equivalent as
> before if memmap_size is less than PAGE_SIZE.

I do not understand your concern to be honest. Even if a sub page memmap
size was possible (I haven't checked), I fail to see why kmalloc would
fail to allocate while vmalloc would have a bigger chance to succeed.
Wei Yang March 13, 2020, 9:54 p.m. UTC | #10
On Fri, Mar 13, 2020 at 03:57:33PM +0100, Michal Hocko wrote:
>On Thu 12-03-20 14:18:26, Wei Yang wrote:
>> On Thu, Mar 12, 2020 at 06:34:16AM -0700, Matthew Wilcox wrote:
>> >On Thu, Mar 12, 2020 at 09:08:22PM +0800, Baoquan He wrote:
>> >> This change makes populate_section_memmap()/depopulate_section_memmap
>> >> much simpler.
>> >> 
>> >> Suggested-by: Michal Hocko <mhocko@kernel.org>
>> >> Signed-off-by: Baoquan He <bhe@redhat.com>
>> >> ---
>> >> v1->v2:
>> >>   The old version only used __get_free_pages() to replace alloc_pages()
>> >>   in populate_section_memmap().
>> >>   http://lkml.kernel.org/r/20200307084229.28251-8-bhe@redhat.com
>> >> 
>> >>  mm/sparse.c | 27 +++------------------------
>> >>  1 file changed, 3 insertions(+), 24 deletions(-)
>> >> 
>> >> diff --git a/mm/sparse.c b/mm/sparse.c
>> >> index bf6c00a28045..362018e82e22 100644
>> >> --- a/mm/sparse.c
>> >> +++ b/mm/sparse.c
>> >> @@ -734,35 +734,14 @@ static void free_map_bootmem(struct page *memmap)
>> >>  struct page * __meminit populate_section_memmap(unsigned long pfn,
>> >>  		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
>> >>  {
>> >> -	struct page *page, *ret;
>> >> -	unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
>> >> -
>> >> -	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
>> >> -	if (page)
>> >> -		goto got_map_page;
>> >> -
>> >> -	ret = vmalloc(memmap_size);
>> >> -	if (ret)
>> >> -		goto got_map_ptr;
>> >> -
>> >> -	return NULL;
>> >> -got_map_page:
>> >> -	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
>> >> -got_map_ptr:
>> >> -
>> >> -	return ret;
>> >> +	return kvmalloc_node(sizeof(struct page) * PAGES_PER_SECTION,
>> >> +			     GFP_KERNEL|__GFP_NOWARN, nid);
>> >
>> >Use of NOWARN here is inappropriate, because there's no fallback.
>> 
>> Hmm... this replacement is a little tricky.
>> 
>> When you look into kvmalloc_node(), it will do the fallback if the size is
>> bigger than PAGE_SIZE. This means the change here may not be equivalent as
>> before if memmap_size is less than PAGE_SIZE.
>
>I do not understand your concern to be honest. Even if a sub page memmap
>size was possible (I haven't checked), I fail to see why kmalloc would
>fail to allocate while vmalloc would have a bigger chance to succeed.
>

You are right. My concern is just at kvmalloc_node() level.

After I look deep into the implementation, __vmalloc_area_node(), it will
still allocate memory on PAGE_SIZE base and fill up kernel page table.

That's why kvmalloc_node() stops trying when size is less than PAGE_SIZE.

Learned one more point. Thanks

>-- 
>Michal Hocko
>SUSE Labs
diff mbox series

Patch

diff --git a/mm/sparse.c b/mm/sparse.c
index bf6c00a28045..362018e82e22 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -734,35 +734,14 @@  static void free_map_bootmem(struct page *memmap)
 struct page * __meminit populate_section_memmap(unsigned long pfn,
 		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
 {
-	struct page *page, *ret;
-	unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
-
-	page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
-	if (page)
-		goto got_map_page;
-
-	ret = vmalloc(memmap_size);
-	if (ret)
-		goto got_map_ptr;
-
-	return NULL;
-got_map_page:
-	ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
-got_map_ptr:
-
-	return ret;
+	return kvmalloc_node(sizeof(struct page) * PAGES_PER_SECTION,
+			     GFP_KERNEL|__GFP_NOWARN, nid);
 }
 
 static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
 		struct vmem_altmap *altmap)
 {
-	struct page *memmap = pfn_to_page(pfn);
-
-	if (is_vmalloc_addr(memmap))
-		vfree(memmap);
-	else
-		free_pages((unsigned long)memmap,
-			   get_order(sizeof(struct page) * PAGES_PER_SECTION));
+	kvfree(pfn_to_page(pfn));
 }
 
 static void free_map_bootmem(struct page *memmap)