diff mbox series

[v5,1/2] mm: Allow the page cache to allocate large pages

Message ID 20190902092341.26712-2-william.kucharski@oracle.com (mailing list archive)
State New, archived
Headers show
Series mm,thp: Add filemap_huge_fault() for THP | expand

Commit Message

William Kucharski Sept. 2, 2019, 9:23 a.m. UTC
Add an 'order' argument to __page_cache_alloc() and
do_read_cache_page(). Ensure the allocated pages are compound pages.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: William Kucharski <william.kucharski@oracle.com>
Reported-by: kbuild test robot <lkp@intel.com>
---
 fs/afs/dir.c            |  2 +-
 fs/btrfs/compression.c  |  2 +-
 fs/cachefiles/rdwr.c    |  4 ++--
 fs/ceph/addr.c          |  2 +-
 fs/ceph/file.c          |  2 +-
 include/linux/pagemap.h | 10 ++++++----
 mm/filemap.c            | 20 +++++++++++---------
 mm/readahead.c          |  2 +-
 net/ceph/pagelist.c     |  4 ++--
 net/ceph/pagevec.c      |  2 +-
 10 files changed, 27 insertions(+), 23 deletions(-)

Comments

Michal Hocko Sept. 3, 2019, 11:57 a.m. UTC | #1
On Mon 02-09-19 03:23:40, William Kucharski wrote:
> Add an 'order' argument to __page_cache_alloc() and
> do_read_cache_page(). Ensure the allocated pages are compound pages.

Why do we need to touch all the existing callers and change them to use
order 0 when none is actually converted to a different order? This just
seem to add a lot of code churn without a good reason. If anything I
would simply add __page_cache_alloc_order and make __page_cache_alloc
call it with order 0 argument.

Also is it so much to ask callers to provide __GFP_COMP explicitly?

> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Signed-off-by: William Kucharski <william.kucharski@oracle.com>
> Reported-by: kbuild test robot <lkp@intel.com>
> ---
>  fs/afs/dir.c            |  2 +-
>  fs/btrfs/compression.c  |  2 +-
>  fs/cachefiles/rdwr.c    |  4 ++--
>  fs/ceph/addr.c          |  2 +-
>  fs/ceph/file.c          |  2 +-
>  include/linux/pagemap.h | 10 ++++++----
>  mm/filemap.c            | 20 +++++++++++---------
>  mm/readahead.c          |  2 +-
>  net/ceph/pagelist.c     |  4 ++--
>  net/ceph/pagevec.c      |  2 +-
>  10 files changed, 27 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/afs/dir.c b/fs/afs/dir.c
> index 139b4e3cc946..ca8f8e77e012 100644
> --- a/fs/afs/dir.c
> +++ b/fs/afs/dir.c
> @@ -274,7 +274,7 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
>  				afs_stat_v(dvnode, n_inval);
>  
>  			ret = -ENOMEM;
> -			req->pages[i] = __page_cache_alloc(gfp);
> +			req->pages[i] = __page_cache_alloc(gfp, 0);
>  			if (!req->pages[i])
>  				goto error;
>  			ret = add_to_page_cache_lru(req->pages[i],
> diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
> index 60c47b417a4b..5280e7477b7e 100644
> --- a/fs/btrfs/compression.c
> +++ b/fs/btrfs/compression.c
> @@ -466,7 +466,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
>  		}
>  
>  		page = __page_cache_alloc(mapping_gfp_constraint(mapping,
> -								 ~__GFP_FS));
> +								 ~__GFP_FS), 0);
>  		if (!page)
>  			break;
>  
> diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
> index 44a3ce1e4ce4..11d30212745f 100644
> --- a/fs/cachefiles/rdwr.c
> +++ b/fs/cachefiles/rdwr.c
> @@ -259,7 +259,7 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object,
>  			goto backing_page_already_present;
>  
>  		if (!newpage) {
> -			newpage = __page_cache_alloc(cachefiles_gfp);
> +			newpage = __page_cache_alloc(cachefiles_gfp, 0);
>  			if (!newpage)
>  				goto nomem_monitor;
>  		}
> @@ -495,7 +495,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
>  				goto backing_page_already_present;
>  
>  			if (!newpage) {
> -				newpage = __page_cache_alloc(cachefiles_gfp);
> +				newpage = __page_cache_alloc(cachefiles_gfp, 0);
>  				if (!newpage)
>  					goto nomem;
>  			}
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index b3c8b886bf64..7c1c3857fbb9 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -1708,7 +1708,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
>  		if (len > PAGE_SIZE)
>  			len = PAGE_SIZE;
>  	} else {
> -		page = __page_cache_alloc(GFP_NOFS);
> +		page = __page_cache_alloc(GFP_NOFS, 0);
>  		if (!page) {
>  			err = -ENOMEM;
>  			goto out;
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 685a03cc4b77..ae58d7c31aa4 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -1305,7 +1305,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
>  		struct page *page = NULL;
>  		loff_t i_size;
>  		if (retry_op == READ_INLINE) {
> -			page = __page_cache_alloc(GFP_KERNEL);
> +			page = __page_cache_alloc(GFP_KERNEL, 0);
>  			if (!page)
>  				return -ENOMEM;
>  		}
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index c7552459a15f..92e026d9a6b7 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -208,17 +208,19 @@ static inline int page_cache_add_speculative(struct page *page, int count)
>  }
>  
>  #ifdef CONFIG_NUMA
> -extern struct page *__page_cache_alloc(gfp_t gfp);
> +extern struct page *__page_cache_alloc(gfp_t gfp, unsigned int order);
>  #else
> -static inline struct page *__page_cache_alloc(gfp_t gfp)
> +static inline struct page *__page_cache_alloc(gfp_t gfp, unsigned int order)
>  {
> -	return alloc_pages(gfp, 0);
> +	if (order > 0)
> +		gfp |= __GFP_COMP;
> +	return alloc_pages(gfp, order);
>  }
>  #endif
>  
>  static inline struct page *page_cache_alloc(struct address_space *x)
>  {
> -	return __page_cache_alloc(mapping_gfp_mask(x));
> +	return __page_cache_alloc(mapping_gfp_mask(x), 0);
>  }
>  
>  static inline gfp_t readahead_gfp_mask(struct address_space *x)
> diff --git a/mm/filemap.c b/mm/filemap.c
> index d0cf700bf201..38b46fc00855 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -954,22 +954,25 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
>  EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
>  
>  #ifdef CONFIG_NUMA
> -struct page *__page_cache_alloc(gfp_t gfp)
> +struct page *__page_cache_alloc(gfp_t gfp, unsigned int order)
>  {
>  	int n;
>  	struct page *page;
>  
> +	if (order > 0)
> +		gfp |= __GFP_COMP;
> +
>  	if (cpuset_do_page_mem_spread()) {
>  		unsigned int cpuset_mems_cookie;
>  		do {
>  			cpuset_mems_cookie = read_mems_allowed_begin();
>  			n = cpuset_mem_spread_node();
> -			page = __alloc_pages_node(n, gfp, 0);
> +			page = __alloc_pages_node(n, gfp, order);
>  		} while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
>  
>  		return page;
>  	}
> -	return alloc_pages(gfp, 0);
> +	return alloc_pages(gfp, order);
>  }
>  EXPORT_SYMBOL(__page_cache_alloc);
>  #endif
> @@ -1665,7 +1668,7 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
>  		if (fgp_flags & FGP_NOFS)
>  			gfp_mask &= ~__GFP_FS;
>  
> -		page = __page_cache_alloc(gfp_mask);
> +		page = __page_cache_alloc(gfp_mask, 0);
>  		if (!page)
>  			return NULL;
>  
> @@ -2802,15 +2805,14 @@ static struct page *wait_on_page_read(struct page *page)
>  static struct page *do_read_cache_page(struct address_space *mapping,
>  				pgoff_t index,
>  				int (*filler)(void *, struct page *),
> -				void *data,
> -				gfp_t gfp)
> +				void *data, unsigned int order, gfp_t gfp)
>  {
>  	struct page *page;
>  	int err;
>  repeat:
>  	page = find_get_page(mapping, index);
>  	if (!page) {
> -		page = __page_cache_alloc(gfp);
> +		page = __page_cache_alloc(gfp, order);
>  		if (!page)
>  			return ERR_PTR(-ENOMEM);
>  		err = add_to_page_cache_lru(page, mapping, index, gfp);
> @@ -2917,7 +2919,7 @@ struct page *read_cache_page(struct address_space *mapping,
>  				int (*filler)(void *, struct page *),
>  				void *data)
>  {
> -	return do_read_cache_page(mapping, index, filler, data,
> +	return do_read_cache_page(mapping, index, filler, data, 0,
>  			mapping_gfp_mask(mapping));
>  }
>  EXPORT_SYMBOL(read_cache_page);
> @@ -2939,7 +2941,7 @@ struct page *read_cache_page_gfp(struct address_space *mapping,
>  				pgoff_t index,
>  				gfp_t gfp)
>  {
> -	return do_read_cache_page(mapping, index, NULL, NULL, gfp);
> +	return do_read_cache_page(mapping, index, NULL, NULL, 0, gfp);
>  }
>  EXPORT_SYMBOL(read_cache_page_gfp);
>  
> diff --git a/mm/readahead.c b/mm/readahead.c
> index 2fe72cd29b47..954760a612ea 100644
> --- a/mm/readahead.c
> +++ b/mm/readahead.c
> @@ -193,7 +193,7 @@ unsigned int __do_page_cache_readahead(struct address_space *mapping,
>  			continue;
>  		}
>  
> -		page = __page_cache_alloc(gfp_mask);
> +		page = __page_cache_alloc(gfp_mask, 0);
>  		if (!page)
>  			break;
>  		page->index = page_offset;
> diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
> index 65e34f78b05d..0c3face908dc 100644
> --- a/net/ceph/pagelist.c
> +++ b/net/ceph/pagelist.c
> @@ -56,7 +56,7 @@ static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
>  	struct page *page;
>  
>  	if (!pl->num_pages_free) {
> -		page = __page_cache_alloc(GFP_NOFS);
> +		page = __page_cache_alloc(GFP_NOFS, 0);
>  	} else {
>  		page = list_first_entry(&pl->free_list, struct page, lru);
>  		list_del(&page->lru);
> @@ -107,7 +107,7 @@ int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space)
>  	space = (space + PAGE_SIZE - 1) >> PAGE_SHIFT;   /* conv to num pages */
>  
>  	while (space > pl->num_pages_free) {
> -		struct page *page = __page_cache_alloc(GFP_NOFS);
> +		struct page *page = __page_cache_alloc(GFP_NOFS, 0);
>  		if (!page)
>  			return -ENOMEM;
>  		list_add_tail(&page->lru, &pl->free_list);
> diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
> index 64305e7056a1..1d07e639216d 100644
> --- a/net/ceph/pagevec.c
> +++ b/net/ceph/pagevec.c
> @@ -45,7 +45,7 @@ struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
>  	if (!pages)
>  		return ERR_PTR(-ENOMEM);
>  	for (i = 0; i < num_pages; i++) {
> -		pages[i] = __page_cache_alloc(flags);
> +		pages[i] = __page_cache_alloc(flags, 0);
>  		if (pages[i] == NULL) {
>  			ceph_release_page_vector(pages, i);
>  			return ERR_PTR(-ENOMEM);
> -- 
> 2.21.0
Matthew Wilcox (Oracle) Sept. 3, 2019, 12:11 p.m. UTC | #2
On Tue, Sep 03, 2019 at 01:57:48PM +0200, Michal Hocko wrote:
> On Mon 02-09-19 03:23:40, William Kucharski wrote:
> > Add an 'order' argument to __page_cache_alloc() and
> > do_read_cache_page(). Ensure the allocated pages are compound pages.
> 
> Why do we need to touch all the existing callers and change them to use
> order 0 when none is actually converted to a different order? This just
> seem to add a lot of code churn without a good reason. If anything I
> would simply add __page_cache_alloc_order and make __page_cache_alloc
> call it with order 0 argument.

Patch 2/2 uses a non-zero order.  I agree it's a lot of churn without
good reason; that's why I tried to add GFP_ORDER flags a few months ago.
Unfortunately, you didn't like that approach either.

> Also is it so much to ask callers to provide __GFP_COMP explicitly?

Yes, it's an unreasonable burden on the callers.  Those that pass 0 will
have the test optimised away by the compiler (for the non-NUMA case).
For the NUMA case, passing zero is going to be only a couple of extra
instructions to not set the GFP_COMP flag.

> >  #ifdef CONFIG_NUMA
> > -extern struct page *__page_cache_alloc(gfp_t gfp);
> > +extern struct page *__page_cache_alloc(gfp_t gfp, unsigned int order);
> >  #else
> > -static inline struct page *__page_cache_alloc(gfp_t gfp)
> > +static inline struct page *__page_cache_alloc(gfp_t gfp, unsigned int order)
> >  {
> > -	return alloc_pages(gfp, 0);
> > +	if (order > 0)
> > +		gfp |= __GFP_COMP;
> > +	return alloc_pages(gfp, order);
> >  }
> >  #endif
Michal Hocko Sept. 3, 2019, 12:19 p.m. UTC | #3
On Tue 03-09-19 05:11:55, Matthew Wilcox wrote:
> On Tue, Sep 03, 2019 at 01:57:48PM +0200, Michal Hocko wrote:
> > On Mon 02-09-19 03:23:40, William Kucharski wrote:
> > > Add an 'order' argument to __page_cache_alloc() and
> > > do_read_cache_page(). Ensure the allocated pages are compound pages.
> > 
> > Why do we need to touch all the existing callers and change them to use
> > order 0 when none is actually converted to a different order? This just
> > seem to add a lot of code churn without a good reason. If anything I
> > would simply add __page_cache_alloc_order and make __page_cache_alloc
> > call it with order 0 argument.
> 
> Patch 2/2 uses a non-zero order.

It is a new caller and it can use a new function right?

> I agree it's a lot of churn without
> good reason; that's why I tried to add GFP_ORDER flags a few months ago.
> Unfortunately, you didn't like that approach either.

Is there any future plan that all/most __page_cache_alloc will get a
non-zero order argument?

> > Also is it so much to ask callers to provide __GFP_COMP explicitly?
> 
> Yes, it's an unreasonable burden on the callers.

Care to exaplain why? __GFP_COMP tends to be used in the kernel quite
extensively.

> Those that pass 0 will
> have the test optimised away by the compiler (for the non-NUMA case).
> For the NUMA case, passing zero is going to be only a couple of extra
> instructions to not set the GFP_COMP flag.
> 
> > >  #ifdef CONFIG_NUMA
> > > -extern struct page *__page_cache_alloc(gfp_t gfp);
> > > +extern struct page *__page_cache_alloc(gfp_t gfp, unsigned int order);
> > >  #else
> > > -static inline struct page *__page_cache_alloc(gfp_t gfp)
> > > +static inline struct page *__page_cache_alloc(gfp_t gfp, unsigned int order)
> > >  {
> > > -	return alloc_pages(gfp, 0);
> > > +	if (order > 0)
> > > +		gfp |= __GFP_COMP;
> > > +	return alloc_pages(gfp, order);
> > >  }
> > >  #endif
Matthew Wilcox (Oracle) Sept. 3, 2019, 4:28 p.m. UTC | #4
On Tue, Sep 03, 2019 at 02:19:52PM +0200, Michal Hocko wrote:
> On Tue 03-09-19 05:11:55, Matthew Wilcox wrote:
> > On Tue, Sep 03, 2019 at 01:57:48PM +0200, Michal Hocko wrote:
> > > On Mon 02-09-19 03:23:40, William Kucharski wrote:
> > > > Add an 'order' argument to __page_cache_alloc() and
> > > > do_read_cache_page(). Ensure the allocated pages are compound pages.
> > > 
> > > Why do we need to touch all the existing callers and change them to use
> > > order 0 when none is actually converted to a different order? This just
> > > seem to add a lot of code churn without a good reason. If anything I
> > > would simply add __page_cache_alloc_order and make __page_cache_alloc
> > > call it with order 0 argument.
> > 
> > Patch 2/2 uses a non-zero order.
> 
> It is a new caller and it can use a new function right?
> 
> > I agree it's a lot of churn without
> > good reason; that's why I tried to add GFP_ORDER flags a few months ago.
> > Unfortunately, you didn't like that approach either.
> 
> Is there any future plan that all/most __page_cache_alloc will get a
> non-zero order argument?

I'm not sure about "most".  It will certainly become more common, as
far as I can tell.

> > > Also is it so much to ask callers to provide __GFP_COMP explicitly?
> > 
> > Yes, it's an unreasonable burden on the callers.
> 
> Care to exaplain why? __GFP_COMP tends to be used in the kernel quite
> extensively.

Most of the places which call this function get their gfp_t from
mapping->gfp_mask.  If we only want to allocate a single page, we
must not set __GFP_COMP.  If we want to allocate a large page, we must
set __GFP_COMP.  Rather than require individual filesystems to concern
themselves with this wart of the GFP interface, we can solve it in the
page cache.
Michal Hocko Sept. 3, 2019, 7:18 p.m. UTC | #5
On Tue 03-09-19 09:28:31, Matthew Wilcox wrote:
> On Tue, Sep 03, 2019 at 02:19:52PM +0200, Michal Hocko wrote:
> > On Tue 03-09-19 05:11:55, Matthew Wilcox wrote:
> > > On Tue, Sep 03, 2019 at 01:57:48PM +0200, Michal Hocko wrote:
> > > > On Mon 02-09-19 03:23:40, William Kucharski wrote:
> > > > > Add an 'order' argument to __page_cache_alloc() and
> > > > > do_read_cache_page(). Ensure the allocated pages are compound pages.
> > > > 
> > > > Why do we need to touch all the existing callers and change them to use
> > > > order 0 when none is actually converted to a different order? This just
> > > > seem to add a lot of code churn without a good reason. If anything I
> > > > would simply add __page_cache_alloc_order and make __page_cache_alloc
> > > > call it with order 0 argument.
> > > 
> > > Patch 2/2 uses a non-zero order.
> > 
> > It is a new caller and it can use a new function right?
> > 
> > > I agree it's a lot of churn without
> > > good reason; that's why I tried to add GFP_ORDER flags a few months ago.
> > > Unfortunately, you didn't like that approach either.
> > 
> > Is there any future plan that all/most __page_cache_alloc will get a
> > non-zero order argument?
> 
> I'm not sure about "most".  It will certainly become more common, as
> far as I can tell.

I would personally still go with  __page_cache_alloc_order way, but this
is up to you and other fs people what suits best. I was just surprised
to see a lot of code churn when it was not really used in the second
patch. That's why I brought it up. 

> > > > Also is it so much to ask callers to provide __GFP_COMP explicitly?
> > > 
> > > Yes, it's an unreasonable burden on the callers.
> > 
> > Care to exaplain why? __GFP_COMP tends to be used in the kernel quite
> > extensively.
> 
> Most of the places which call this function get their gfp_t from
> mapping->gfp_mask.  If we only want to allocate a single page, we
> must not set __GFP_COMP.  If we want to allocate a large page, we must
> set __GFP_COMP.  Rather than require individual filesystems to concern
> themselves with this wart of the GFP interface, we can solve it in the
> page cache.

Fair enough.
William Kucharski Sept. 4, 2019, 3:30 a.m. UTC | #6
> On Sep 3, 2019, at 5:57 AM, Michal Hocko <mhocko@kernel.org> wrote:
> 
> On Mon 02-09-19 03:23:40, William Kucharski wrote:
>> Add an 'order' argument to __page_cache_alloc() and
>> do_read_cache_page(). Ensure the allocated pages are compound pages.
> 
> Why do we need to touch all the existing callers and change them to use
> order 0 when none is actually converted to a different order? This just
> seem to add a lot of code churn without a good reason. If anything I
> would simply add __page_cache_alloc_order and make __page_cache_alloc
> call it with order 0 argument.

All the EXISTING code in patch [1/2] is changed to call it with an order
of 0, as you would expect.

However, new code in part [2/2] of the patch calls it with an order of
HPAGE_PMD_ORDER, as it seems cleaner to have those routines operate on
a page, regardless of the order of the page desired.

I certainly can change this as you request, but once again the question
is whether "page" should MEAN "page" regardless of the order desired,
or whether the assumption will always be "page" means base PAGESIZE.

Either approach works, but what is the semantic we want going forward?

Thanks again!
Michal Hocko Sept. 4, 2019, 8:28 a.m. UTC | #7
On Tue 03-09-19 21:30:30, William Kucharski wrote:
> 
> 
> > On Sep 3, 2019, at 5:57 AM, Michal Hocko <mhocko@kernel.org> wrote:
> > 
> > On Mon 02-09-19 03:23:40, William Kucharski wrote:
> >> Add an 'order' argument to __page_cache_alloc() and
> >> do_read_cache_page(). Ensure the allocated pages are compound pages.
> > 
> > Why do we need to touch all the existing callers and change them to use
> > order 0 when none is actually converted to a different order? This just
> > seem to add a lot of code churn without a good reason. If anything I
> > would simply add __page_cache_alloc_order and make __page_cache_alloc
> > call it with order 0 argument.
> 
> All the EXISTING code in patch [1/2] is changed to call it with an order
> of 0, as you would expect.
> 
> However, new code in part [2/2] of the patch calls it with an order of
> HPAGE_PMD_ORDER, as it seems cleaner to have those routines operate on
> a page, regardless of the order of the page desired.
> 
> I certainly can change this as you request, but once again the question
> is whether "page" should MEAN "page" regardless of the order desired,
> or whether the assumption will always be "page" means base PAGESIZE.
> 
> Either approach works, but what is the semantic we want going forward?

I do not have anything against handling page as compound, if that is the
question. All I was interested in whether adding a new helper to
_allocate_ the comound page wouldn't be easier than touching all
existing __page_cache_alloc users.
diff mbox series

Patch

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 139b4e3cc946..ca8f8e77e012 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -274,7 +274,7 @@  static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
 				afs_stat_v(dvnode, n_inval);
 
 			ret = -ENOMEM;
-			req->pages[i] = __page_cache_alloc(gfp);
+			req->pages[i] = __page_cache_alloc(gfp, 0);
 			if (!req->pages[i])
 				goto error;
 			ret = add_to_page_cache_lru(req->pages[i],
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 60c47b417a4b..5280e7477b7e 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -466,7 +466,7 @@  static noinline int add_ra_bio_pages(struct inode *inode,
 		}
 
 		page = __page_cache_alloc(mapping_gfp_constraint(mapping,
-								 ~__GFP_FS));
+								 ~__GFP_FS), 0);
 		if (!page)
 			break;
 
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 44a3ce1e4ce4..11d30212745f 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -259,7 +259,7 @@  static int cachefiles_read_backing_file_one(struct cachefiles_object *object,
 			goto backing_page_already_present;
 
 		if (!newpage) {
-			newpage = __page_cache_alloc(cachefiles_gfp);
+			newpage = __page_cache_alloc(cachefiles_gfp, 0);
 			if (!newpage)
 				goto nomem_monitor;
 		}
@@ -495,7 +495,7 @@  static int cachefiles_read_backing_file(struct cachefiles_object *object,
 				goto backing_page_already_present;
 
 			if (!newpage) {
-				newpage = __page_cache_alloc(cachefiles_gfp);
+				newpage = __page_cache_alloc(cachefiles_gfp, 0);
 				if (!newpage)
 					goto nomem;
 			}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index b3c8b886bf64..7c1c3857fbb9 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1708,7 +1708,7 @@  int ceph_uninline_data(struct file *filp, struct page *locked_page)
 		if (len > PAGE_SIZE)
 			len = PAGE_SIZE;
 	} else {
-		page = __page_cache_alloc(GFP_NOFS);
+		page = __page_cache_alloc(GFP_NOFS, 0);
 		if (!page) {
 			err = -ENOMEM;
 			goto out;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 685a03cc4b77..ae58d7c31aa4 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1305,7 +1305,7 @@  static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		struct page *page = NULL;
 		loff_t i_size;
 		if (retry_op == READ_INLINE) {
-			page = __page_cache_alloc(GFP_KERNEL);
+			page = __page_cache_alloc(GFP_KERNEL, 0);
 			if (!page)
 				return -ENOMEM;
 		}
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c7552459a15f..92e026d9a6b7 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -208,17 +208,19 @@  static inline int page_cache_add_speculative(struct page *page, int count)
 }
 
 #ifdef CONFIG_NUMA
-extern struct page *__page_cache_alloc(gfp_t gfp);
+extern struct page *__page_cache_alloc(gfp_t gfp, unsigned int order);
 #else
-static inline struct page *__page_cache_alloc(gfp_t gfp)
+static inline struct page *__page_cache_alloc(gfp_t gfp, unsigned int order)
 {
-	return alloc_pages(gfp, 0);
+	if (order > 0)
+		gfp |= __GFP_COMP;
+	return alloc_pages(gfp, order);
 }
 #endif
 
 static inline struct page *page_cache_alloc(struct address_space *x)
 {
-	return __page_cache_alloc(mapping_gfp_mask(x));
+	return __page_cache_alloc(mapping_gfp_mask(x), 0);
 }
 
 static inline gfp_t readahead_gfp_mask(struct address_space *x)
diff --git a/mm/filemap.c b/mm/filemap.c
index d0cf700bf201..38b46fc00855 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -954,22 +954,25 @@  int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
 
 #ifdef CONFIG_NUMA
-struct page *__page_cache_alloc(gfp_t gfp)
+struct page *__page_cache_alloc(gfp_t gfp, unsigned int order)
 {
 	int n;
 	struct page *page;
 
+	if (order > 0)
+		gfp |= __GFP_COMP;
+
 	if (cpuset_do_page_mem_spread()) {
 		unsigned int cpuset_mems_cookie;
 		do {
 			cpuset_mems_cookie = read_mems_allowed_begin();
 			n = cpuset_mem_spread_node();
-			page = __alloc_pages_node(n, gfp, 0);
+			page = __alloc_pages_node(n, gfp, order);
 		} while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
 
 		return page;
 	}
-	return alloc_pages(gfp, 0);
+	return alloc_pages(gfp, order);
 }
 EXPORT_SYMBOL(__page_cache_alloc);
 #endif
@@ -1665,7 +1668,7 @@  struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
 		if (fgp_flags & FGP_NOFS)
 			gfp_mask &= ~__GFP_FS;
 
-		page = __page_cache_alloc(gfp_mask);
+		page = __page_cache_alloc(gfp_mask, 0);
 		if (!page)
 			return NULL;
 
@@ -2802,15 +2805,14 @@  static struct page *wait_on_page_read(struct page *page)
 static struct page *do_read_cache_page(struct address_space *mapping,
 				pgoff_t index,
 				int (*filler)(void *, struct page *),
-				void *data,
-				gfp_t gfp)
+				void *data, unsigned int order, gfp_t gfp)
 {
 	struct page *page;
 	int err;
 repeat:
 	page = find_get_page(mapping, index);
 	if (!page) {
-		page = __page_cache_alloc(gfp);
+		page = __page_cache_alloc(gfp, order);
 		if (!page)
 			return ERR_PTR(-ENOMEM);
 		err = add_to_page_cache_lru(page, mapping, index, gfp);
@@ -2917,7 +2919,7 @@  struct page *read_cache_page(struct address_space *mapping,
 				int (*filler)(void *, struct page *),
 				void *data)
 {
-	return do_read_cache_page(mapping, index, filler, data,
+	return do_read_cache_page(mapping, index, filler, data, 0,
 			mapping_gfp_mask(mapping));
 }
 EXPORT_SYMBOL(read_cache_page);
@@ -2939,7 +2941,7 @@  struct page *read_cache_page_gfp(struct address_space *mapping,
 				pgoff_t index,
 				gfp_t gfp)
 {
-	return do_read_cache_page(mapping, index, NULL, NULL, gfp);
+	return do_read_cache_page(mapping, index, NULL, NULL, 0, gfp);
 }
 EXPORT_SYMBOL(read_cache_page_gfp);
 
diff --git a/mm/readahead.c b/mm/readahead.c
index 2fe72cd29b47..954760a612ea 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -193,7 +193,7 @@  unsigned int __do_page_cache_readahead(struct address_space *mapping,
 			continue;
 		}
 
-		page = __page_cache_alloc(gfp_mask);
+		page = __page_cache_alloc(gfp_mask, 0);
 		if (!page)
 			break;
 		page->index = page_offset;
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 65e34f78b05d..0c3face908dc 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -56,7 +56,7 @@  static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
 	struct page *page;
 
 	if (!pl->num_pages_free) {
-		page = __page_cache_alloc(GFP_NOFS);
+		page = __page_cache_alloc(GFP_NOFS, 0);
 	} else {
 		page = list_first_entry(&pl->free_list, struct page, lru);
 		list_del(&page->lru);
@@ -107,7 +107,7 @@  int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space)
 	space = (space + PAGE_SIZE - 1) >> PAGE_SHIFT;   /* conv to num pages */
 
 	while (space > pl->num_pages_free) {
-		struct page *page = __page_cache_alloc(GFP_NOFS);
+		struct page *page = __page_cache_alloc(GFP_NOFS, 0);
 		if (!page)
 			return -ENOMEM;
 		list_add_tail(&page->lru, &pl->free_list);
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 64305e7056a1..1d07e639216d 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -45,7 +45,7 @@  struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
 	if (!pages)
 		return ERR_PTR(-ENOMEM);
 	for (i = 0; i < num_pages; i++) {
-		pages[i] = __page_cache_alloc(flags);
+		pages[i] = __page_cache_alloc(flags, 0);
 		if (pages[i] == NULL) {
 			ceph_release_page_vector(pages, i);
 			return ERR_PTR(-ENOMEM);