diff mbox series

[v5.1] fs: Allow fine-grained control of folio sizes

Message ID 20240527210125.1905586-1-willy@infradead.org (mailing list archive)
State New, archived
Headers show
Series [v5.1] fs: Allow fine-grained control of folio sizes | expand

Commit Message

Matthew Wilcox (Oracle) May 27, 2024, 9:01 p.m. UTC
We need filesystems to be able to communicate acceptable folio sizes
to the pagecache for a variety of uses (e.g. large block sizes).
Support a range of folio sizes between order-0 and order-31.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Co-developed-by: Pankaj Raghav <p.raghav@samsung.com>
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
For this version, I fixed the TODO that the maximum folio size was not
being honoured.  I made some other changes too like adding const, moving
the location of the constants, checking CONFIG_TRANSPARENT_HUGEPAGE, and
dropping some of the functions which aren't needed until later patches.
(They can be added in the commits that need them).  Also rebased against
current Linus tree, so MAX_PAGECACHE_ORDER no longer needs to be moved).

 include/linux/pagemap.h | 81 +++++++++++++++++++++++++++++++++++------
 mm/filemap.c            |  6 +--
 mm/readahead.c          |  4 +-
 3 files changed, 73 insertions(+), 18 deletions(-)

Comments

Pankaj Raghav (Samsung) May 27, 2024, 10:09 p.m. UTC | #1
On Mon, May 27, 2024 at 10:01:23PM +0100, Matthew Wilcox (Oracle) wrote:
> We need filesystems to be able to communicate acceptable folio sizes
> to the pagecache for a variety of uses (e.g. large block sizes).
> Support a range of folio sizes between order-0 and order-31.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Co-developed-by: Pankaj Raghav <p.raghav@samsung.com>
> Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> Reviewed-by: Hannes Reinecke <hare@suse.de>
> ---
> For this version, I fixed the TODO that the maximum folio size was not
> being honoured.  I made some other changes too like adding const, moving
> the location of the constants, checking CONFIG_TRANSPARENT_HUGEPAGE, and
> dropping some of the functions which aren't needed until later patches.
> (They can be added in the commits that need them).  Also rebased against
> current Linus tree, so MAX_PAGECACHE_ORDER no longer needs to be moved).

Thanks for this! So I am currently running my xfstests on the new series
I am planning to send in a day or two based on next-20240523.

I assume this patch is intended to be folded in to the next LBS series?

> 
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index 1ed9274a0deb..c6aaceed0de6 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -204,13 +204,18 @@ enum mapping_flags {
>  	AS_EXITING	= 4, 	/* final truncate in progress */
>  	/* writeback related tags are not used */
>  	AS_NO_WRITEBACK_TAGS = 5,
> -	AS_LARGE_FOLIO_SUPPORT = 6,
> -	AS_RELEASE_ALWAYS,	/* Call ->release_folio(), even if no private data */
> -	AS_STABLE_WRITES,	/* must wait for writeback before modifying
> +	AS_RELEASE_ALWAYS = 6,	/* Call ->release_folio(), even if no private data */
> +	AS_STABLE_WRITES = 7,	/* must wait for writeback before modifying
>  				   folio contents */
> -	AS_UNMOVABLE,		/* The mapping cannot be moved, ever */
> +	AS_UNMOVABLE = 8,	/* The mapping cannot be moved, ever */
> +	AS_FOLIO_ORDER_MIN = 16,
> +	AS_FOLIO_ORDER_MAX = 21, /* Bits 16-25 are used for FOLIO_ORDER */
>  };
>  
> +#define AS_FOLIO_ORDER_MIN_MASK 0x001f0000
> +#define AS_FOLIO_ORDER_MAX_MASK 0x03e00000

As you changed the mapping flag offset, these masks also needs to be
changed accordingly.

I moved(pun intended) the AS_UNMOVABLE and kept the
AS_FOLIO_ORDER_(MIN|MAX) value the same.

	AS_FOLIO_ORDER_MIN = 8,
	AS_FOLIO_ORDER_MAX = 13, /* Bit 8-17 are used for FOLIO_ORDER */
	AS_UNMOVABLE = 18,	 /* The mapping cannot be moved, ever */
	AS_INACCESSIBLE,	/* Do not attempt direct R/W access to the mapping */

> +#define AS_FOLIO_ORDER_MASK (AS_FOLIO_ORDER_MIN_MASK | AS_FOLIO_ORDER_MAX_MASK)
> +
>  
--
Pankaj
Matthew Wilcox (Oracle) May 27, 2024, 10:39 p.m. UTC | #2
On Mon, May 27, 2024 at 10:09:26PM +0000, Pankaj Raghav (Samsung) wrote:
> > For this version, I fixed the TODO that the maximum folio size was not
> > being honoured.  I made some other changes too like adding const, moving
> > the location of the constants, checking CONFIG_TRANSPARENT_HUGEPAGE, and
> > dropping some of the functions which aren't needed until later patches.
> > (They can be added in the commits that need them).  Also rebased against
> > current Linus tree, so MAX_PAGECACHE_ORDER no longer needs to be moved).
> 
> Thanks for this! So I am currently running my xfstests on the new series
> I am planning to send in a day or two based on next-20240523.
> 
> I assume this patch is intended to be folded in to the next LBS series?

Right, that was why I numbered it as 5.1 so as to not preempt your v6.

> > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> > index 1ed9274a0deb..c6aaceed0de6 100644
> > --- a/include/linux/pagemap.h
> > +++ b/include/linux/pagemap.h
> > @@ -204,13 +204,18 @@ enum mapping_flags {
> >  	AS_EXITING	= 4, 	/* final truncate in progress */
> >  	/* writeback related tags are not used */
> >  	AS_NO_WRITEBACK_TAGS = 5,
> > -	AS_LARGE_FOLIO_SUPPORT = 6,
> > -	AS_RELEASE_ALWAYS,	/* Call ->release_folio(), even if no private data */
> > -	AS_STABLE_WRITES,	/* must wait for writeback before modifying
> > +	AS_RELEASE_ALWAYS = 6,	/* Call ->release_folio(), even if no private data */
> > +	AS_STABLE_WRITES = 7,	/* must wait for writeback before modifying
> >  				   folio contents */
> > -	AS_UNMOVABLE,		/* The mapping cannot be moved, ever */
> > +	AS_UNMOVABLE = 8,	/* The mapping cannot be moved, ever */
> > +	AS_FOLIO_ORDER_MIN = 16,
> > +	AS_FOLIO_ORDER_MAX = 21, /* Bits 16-25 are used for FOLIO_ORDER */
> >  };
> >  
> > +#define AS_FOLIO_ORDER_MIN_MASK 0x001f0000
> > +#define AS_FOLIO_ORDER_MAX_MASK 0x03e00000
> 
> As you changed the mapping flag offset, these masks also needs to be
> changed accordingly.

That's why I did change them?
Matthew Wilcox (Oracle) May 27, 2024, 10:43 p.m. UTC | #3
On Mon, May 27, 2024 at 11:39:47PM +0100, Matthew Wilcox wrote:
> > > +	AS_FOLIO_ORDER_MIN = 16,
> > > +	AS_FOLIO_ORDER_MAX = 21, /* Bits 16-25 are used for FOLIO_ORDER */
> > >  };
> > >  
> > > +#define AS_FOLIO_ORDER_MIN_MASK 0x001f0000
> > > +#define AS_FOLIO_ORDER_MAX_MASK 0x03e00000
> > 
> > As you changed the mapping flag offset, these masks also needs to be
> > changed accordingly.
> 
> That's why I did change them?

How about:

-#define AS_FOLIO_ORDER_MIN_MASK 0x001f0000
-#define AS_FOLIO_ORDER_MAX_MASK 0x03e00000
+#define AS_FOLIO_ORDER_MIN_MASK (31 << AS_FOLIO_ORDER_MIN)
+#define AS_FOLIO_ORDER_MAX_MASK (31 << AS_FOLIO_ORDER_MAX)
Dave Chinner May 27, 2024, 11 p.m. UTC | #4
On Mon, May 27, 2024 at 11:43:43PM +0100, Matthew Wilcox wrote:
> On Mon, May 27, 2024 at 11:39:47PM +0100, Matthew Wilcox wrote:
> > > > +	AS_FOLIO_ORDER_MIN = 16,
> > > > +	AS_FOLIO_ORDER_MAX = 21, /* Bits 16-25 are used for FOLIO_ORDER */
> > > >  };
> > > >  
> > > > +#define AS_FOLIO_ORDER_MIN_MASK 0x001f0000
> > > > +#define AS_FOLIO_ORDER_MAX_MASK 0x03e00000
> > > 
> > > As you changed the mapping flag offset, these masks also needs to be
> > > changed accordingly.
> > 
> > That's why I did change them?
> 
> How about:
> 
> -#define AS_FOLIO_ORDER_MIN_MASK 0x001f0000
> -#define AS_FOLIO_ORDER_MAX_MASK 0x03e00000
> +#define AS_FOLIO_ORDER_MIN_MASK (31 << AS_FOLIO_ORDER_MIN)
> +#define AS_FOLIO_ORDER_MAX_MASK (31 << AS_FOLIO_ORDER_MAX)

Lots of magic numbers based on the order having only having 5 bits
of resolution. Removing that magic looks like this:

	AS_FOLIO_ORDER_BITS = 5,
	AS_FOLIO_ORDER_MIN = 16,
	AS_FOLIO_ORDER_MAX = AS_FOLIO_ORDER_MIN + AS_FOLIO_ORDER_BITS,
};

#define AS_FOLIO_ORDER_MASK	((1u << AS_FOLIO_ORDER_BITS) - 1)
#define AS_FOLIO_ORDER_MIN_MASK	(AS_FOLIO_ORDER_MASK << AS_FOLIO_ORDER_MIN)
#define AS_FOLIO_ORDER_MAX_MASK	(AS_FOLIO_ORDER_MASK << AS_FOLIO_ORDER_MAX)

This way if we want to increase the order mask, we only need to
change AS_FOLIO_ORDER_BITS and everything else automatically
recalculates.

Doing this means We could also easily use the high bits of the flag
word for the folio orders, rather than putting them in the middle of
the flag space...

-Dave.
Pankaj Raghav (Samsung) May 28, 2024, 9:12 a.m. UTC | #5
On Tue, May 28, 2024 at 09:00:00AM +1000, Dave Chinner wrote:
> On Mon, May 27, 2024 at 11:43:43PM +0100, Matthew Wilcox wrote:
> > On Mon, May 27, 2024 at 11:39:47PM +0100, Matthew Wilcox wrote:
> > > > > +	AS_FOLIO_ORDER_MIN = 16,
> > > > > +	AS_FOLIO_ORDER_MAX = 21, /* Bits 16-25 are used for FOLIO_ORDER */
> > > > >  };
> > > > >  
> > > > > +#define AS_FOLIO_ORDER_MIN_MASK 0x001f0000
> > > > > +#define AS_FOLIO_ORDER_MAX_MASK 0x03e00000
> > > > 
> > > > As you changed the mapping flag offset, these masks also needs to be
> > > > changed accordingly.
> > > 
> > > That's why I did change them?
> > 
> > How about:
> > 
> > -#define AS_FOLIO_ORDER_MIN_MASK 0x001f0000
> > -#define AS_FOLIO_ORDER_MAX_MASK 0x03e00000
> > +#define AS_FOLIO_ORDER_MIN_MASK (31 << AS_FOLIO_ORDER_MIN)
> > +#define AS_FOLIO_ORDER_MAX_MASK (31 << AS_FOLIO_ORDER_MAX)
> 
> Lots of magic numbers based on the order having only having 5 bits
> of resolution. Removing that magic looks like this:
> 
> 	AS_FOLIO_ORDER_BITS = 5,

I think this needs to be defined outside of the enum as 5 is already
taken by AS_NO_WRITEBACK_TAGS? But I like the idea of making it generic
like this.

Something like this?

#define AS_FOLIO_ORDER_BITS 5
/*
 * Bits in mapping->flags.
 */
enum mapping_flags {
	AS_EIO		= 0,	/* IO error on async write */
	AS_ENOSPC	= 1,	/* ENOSPC on async write */
	AS_MM_ALL_LOCKS	= 2,	/* under mm_take_all_locks() */
	AS_UNEVICTABLE	= 3,	/* e.g., ramdisk, SHM_LOCK */
	AS_EXITING	= 4, 	/* final truncate in progress */
	/* writeback related tags are not used */
	AS_NO_WRITEBACK_TAGS = 5,
	AS_RELEASE_ALWAYS = 6,	/* Call ->release_folio(), even if no private data */
	AS_STABLE_WRITES = 7,	/* must wait for writeback before modifying
				   folio contents */
	AS_UNMOVABLE = 8,	 /* The mapping cannot be moved, ever */
	/* Bit 16-21 are used for FOLIO_ORDER */
	AS_FOLIO_ORDER_MIN = 16,
	AS_FOLIO_ORDER_MAX = AS_FOLIO_ORDER_MIN + AS_FOLIO_ORDER_BITS, 
};

@willy: I can fold this change that Chinner is proposing if you are fine
with this.

> 	AS_FOLIO_ORDER_MIN = 16,
> 	AS_FOLIO_ORDER_MAX = AS_FOLIO_ORDER_MIN + AS_FOLIO_ORDER_BITS,
> };
> 
> #define AS_FOLIO_ORDER_MASK	((1u << AS_FOLIO_ORDER_BITS) - 1)
> #define AS_FOLIO_ORDER_MIN_MASK	(AS_FOLIO_ORDER_MASK << AS_FOLIO_ORDER_MIN)
> #define AS_FOLIO_ORDER_MAX_MASK	(AS_FOLIO_ORDER_MASK << AS_FOLIO_ORDER_MAX)
> 
> This way if we want to increase the order mask, we only need to
> change AS_FOLIO_ORDER_BITS and everything else automatically
> recalculates.
> 
> Doing this means We could also easily use the high bits of the flag
> word for the folio orders, rather than putting them in the middle of
> the flag space...
> 
> -Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
Dave Chinner May 28, 2024, 9:45 a.m. UTC | #6
On Tue, May 28, 2024 at 09:12:02AM +0000, Pankaj Raghav (Samsung) wrote:
> On Tue, May 28, 2024 at 09:00:00AM +1000, Dave Chinner wrote:
> > On Mon, May 27, 2024 at 11:43:43PM +0100, Matthew Wilcox wrote:
> > > On Mon, May 27, 2024 at 11:39:47PM +0100, Matthew Wilcox wrote:
> > > > > > +	AS_FOLIO_ORDER_MIN = 16,
> > > > > > +	AS_FOLIO_ORDER_MAX = 21, /* Bits 16-25 are used for FOLIO_ORDER */
> > > > > >  };
> > > > > >  
> > > > > > +#define AS_FOLIO_ORDER_MIN_MASK 0x001f0000
> > > > > > +#define AS_FOLIO_ORDER_MAX_MASK 0x03e00000
> > > > > 
> > > > > As you changed the mapping flag offset, these masks also needs to be
> > > > > changed accordingly.
> > > > 
> > > > That's why I did change them?
> > > 
> > > How about:
> > > 
> > > -#define AS_FOLIO_ORDER_MIN_MASK 0x001f0000
> > > -#define AS_FOLIO_ORDER_MAX_MASK 0x03e00000
> > > +#define AS_FOLIO_ORDER_MIN_MASK (31 << AS_FOLIO_ORDER_MIN)
> > > +#define AS_FOLIO_ORDER_MAX_MASK (31 << AS_FOLIO_ORDER_MAX)
> > 
> > Lots of magic numbers based on the order having only having 5 bits
> > of resolution. Removing that magic looks like this:
> > 
> > 	AS_FOLIO_ORDER_BITS = 5,
> 
> I think this needs to be defined outside of the enum as 5 is already
> taken by AS_NO_WRITEBACK_TAGS? But I like the idea of making it generic
> like this.

Duplicate values in assigned enums are legal and fairly common.
This:

enum {
        FOO = 1,
        BAR = 2,
        BAZ = 1,
};

int main(int argc, char *argv[])
{
        printf("foo %d, bar %d, baz %d\n", FOO, BAR, BAZ);
}

compiles without warnings or errors and gives the output:

foo 1, bar 2, baz 1

-Dave.
Pankaj Raghav (Samsung) May 28, 2024, 10:13 a.m. UTC | #7
On Mon, May 27, 2024 at 11:39:47PM +0100, Matthew Wilcox wrote:
> On Mon, May 27, 2024 at 10:09:26PM +0000, Pankaj Raghav (Samsung) wrote:
> > > For this version, I fixed the TODO that the maximum folio size was not
> > > being honoured.  I made some other changes too like adding const, moving
> > > the location of the constants, checking CONFIG_TRANSPARENT_HUGEPAGE, and
> > > dropping some of the functions which aren't needed until later patches.
> > > (They can be added in the commits that need them).  Also rebased against
> > > current Linus tree, so MAX_PAGECACHE_ORDER no longer needs to be moved).
> > 
> > Thanks for this! So I am currently running my xfstests on the new series
> > I am planning to send in a day or two based on next-20240523.
> > 
> > I assume this patch is intended to be folded in to the next LBS series?
> 
> Right, that was why I numbered it as 5.1 so as to not preempt your v6.
> 
> > > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> > > index 1ed9274a0deb..c6aaceed0de6 100644
> > > --- a/include/linux/pagemap.h
> > > +++ b/include/linux/pagemap.h
> > > @@ -204,13 +204,18 @@ enum mapping_flags {
> > >  	AS_EXITING	= 4, 	/* final truncate in progress */
> > >  	/* writeback related tags are not used */
> > >  	AS_NO_WRITEBACK_TAGS = 5,
> > > -	AS_LARGE_FOLIO_SUPPORT = 6,
> > > -	AS_RELEASE_ALWAYS,	/* Call ->release_folio(), even if no private data */
> > > -	AS_STABLE_WRITES,	/* must wait for writeback before modifying
> > > +	AS_RELEASE_ALWAYS = 6,	/* Call ->release_folio(), even if no private data */
> > > +	AS_STABLE_WRITES = 7,	/* must wait for writeback before modifying
> > >  				   folio contents */
> > > -	AS_UNMOVABLE,		/* The mapping cannot be moved, ever */
> > > +	AS_UNMOVABLE = 8,	/* The mapping cannot be moved, ever */
> > > +	AS_FOLIO_ORDER_MIN = 16,
> > > +	AS_FOLIO_ORDER_MAX = 21, /* Bits 16-25 are used for FOLIO_ORDER */
> > >  };
> > >  
> > > +#define AS_FOLIO_ORDER_MIN_MASK 0x001f0000
> > > +#define AS_FOLIO_ORDER_MAX_MASK 0x03e00000
> > 
> > As you changed the mapping flag offset, these masks also needs to be
> > changed accordingly.
> 
> That's why I did change them?

Oops, I missed the zeroes at the end.


Btw, I noticed you have removed mapping_align_start_index(). I will add
it back in.
Hannes Reinecke May 28, 2024, 10:20 a.m. UTC | #8
On 5/27/24 23:01, Matthew Wilcox (Oracle) wrote:
> We need filesystems to be able to communicate acceptable folio sizes
> to the pagecache for a variety of uses (e.g. large block sizes).
> Support a range of folio sizes between order-0 and order-31.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Co-developed-by: Pankaj Raghav <p.raghav@samsung.com>
> Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> Reviewed-by: Hannes Reinecke <hare@suse.de>
> ---
> For this version, I fixed the TODO that the maximum folio size was not
> being honoured.  I made some other changes too like adding const, moving
> the location of the constants, checking CONFIG_TRANSPARENT_HUGEPAGE, and
> dropping some of the functions which aren't needed until later patches.
> (They can be added in the commits that need them).  Also rebased against
> current Linus tree, so MAX_PAGECACHE_ORDER no longer needs to be moved).
> 
>   include/linux/pagemap.h | 81 +++++++++++++++++++++++++++++++++++------
>   mm/filemap.c            |  6 +--
>   mm/readahead.c          |  4 +-
>   3 files changed, 73 insertions(+), 18 deletions(-)
> 
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index 1ed9274a0deb..c6aaceed0de6 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -204,13 +204,18 @@ enum mapping_flags {
>   	AS_EXITING	= 4, 	/* final truncate in progress */
>   	/* writeback related tags are not used */
>   	AS_NO_WRITEBACK_TAGS = 5,
> -	AS_LARGE_FOLIO_SUPPORT = 6,
> -	AS_RELEASE_ALWAYS,	/* Call ->release_folio(), even if no private data */
> -	AS_STABLE_WRITES,	/* must wait for writeback before modifying
> +	AS_RELEASE_ALWAYS = 6,	/* Call ->release_folio(), even if no private data */
> +	AS_STABLE_WRITES = 7,	/* must wait for writeback before modifying
>   				   folio contents */
> -	AS_UNMOVABLE,		/* The mapping cannot be moved, ever */
> +	AS_UNMOVABLE = 8,	/* The mapping cannot be moved, ever */
> +	AS_FOLIO_ORDER_MIN = 16,
> +	AS_FOLIO_ORDER_MAX = 21, /* Bits 16-25 are used for FOLIO_ORDER */
>   };
>   
> +#define AS_FOLIO_ORDER_MIN_MASK 0x001f0000
> +#define AS_FOLIO_ORDER_MAX_MASK 0x03e00000
> +#define AS_FOLIO_ORDER_MASK (AS_FOLIO_ORDER_MIN_MASK | AS_FOLIO_ORDER_MAX_MASK)
> +
>   /**
>    * mapping_set_error - record a writeback error in the address_space
>    * @mapping: the mapping in which an error should be set
> @@ -359,9 +364,48 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
>   #define MAX_PAGECACHE_ORDER	8
>   #endif
>   
> +/*
> + * mapping_set_folio_order_range() - Set the orders supported by a file.
> + * @mapping: The address space of the file.
> + * @min: Minimum folio order (between 0-MAX_PAGECACHE_ORDER inclusive).
> + * @max: Maximum folio order (between @min-MAX_PAGECACHE_ORDER inclusive).
> + *
> + * The filesystem should call this function in its inode constructor to
> + * indicate which base size (min) and maximum size (max) of folio the VFS
> + * can use to cache the contents of the file.  This should only be used
> + * if the filesystem needs special handling of folio sizes (ie there is
> + * something the core cannot know).
> + * Do not tune it based on, eg, i_size.
> + *
> + * Context: This should not be called while the inode is active as it
> + * is non-atomic.
> + */
> +static inline void mapping_set_folio_order_range(struct address_space *mapping,
> +		unsigned int min, unsigned int max)
> +{
> +	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
> +		return;
> +
Errm. Sure? When transparent hugepages are _enabled_ we don't support 
this feature?
Confused.

Cheers,

Hannes
Pankaj Raghav (Samsung) May 28, 2024, 11:37 a.m. UTC | #9
> > + * is non-atomic.
> > + */
> > +static inline void mapping_set_folio_order_range(struct address_space *mapping,
> > +		unsigned int min, unsigned int max)
> > +{
> > +	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
> > +		return;
> > +
> Errm. Sure? When transparent hugepages are _enabled_ we don't support this
> feature?
> Confused.

I think large folio support depends on THP, at least for now. I remember
willy mentioning that in a thread. The future plan is to get rid of
this dependency.

> 
> Cheers,
> 
> Hannes
> -- 
> Dr. Hannes Reinecke                  Kernel Storage Architect
> hare@suse.de                                +49 911 74053 688
> SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
> HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
>
Pankaj Raghav (Samsung) May 28, 2024, 11:40 a.m. UTC | #10
> > + */
> > +static inline void mapping_set_folio_order_range(struct address_space *mapping,
> > +		unsigned int min, unsigned int max)
> > +{
> > +	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
> > +		return;
> > +
> Errm. Sure? When transparent hugepages are _enabled_ we don't support this
> feature?
> Confused.
> 
Replied too quickly. I think this is a mistake. Thanks. I will fold this
change.

> Cheers,
> 
> Hannes
> -- 
> Dr. Hannes Reinecke                  Kernel Storage Architect
> hare@suse.de                                +49 911 74053 688
> SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
> HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
>
Matthew Wilcox (Oracle) May 28, 2024, 12:13 p.m. UTC | #11
On Tue, May 28, 2024 at 10:13:32AM +0000, Pankaj Raghav (Samsung) wrote:
> Btw, I noticed you have removed mapping_align_start_index(). I will add
> it back in.

Add it in the patch which uses it, not this patch.
diff mbox series

Patch

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 1ed9274a0deb..c6aaceed0de6 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -204,13 +204,18 @@  enum mapping_flags {
 	AS_EXITING	= 4, 	/* final truncate in progress */
 	/* writeback related tags are not used */
 	AS_NO_WRITEBACK_TAGS = 5,
-	AS_LARGE_FOLIO_SUPPORT = 6,
-	AS_RELEASE_ALWAYS,	/* Call ->release_folio(), even if no private data */
-	AS_STABLE_WRITES,	/* must wait for writeback before modifying
+	AS_RELEASE_ALWAYS = 6,	/* Call ->release_folio(), even if no private data */
+	AS_STABLE_WRITES = 7,	/* must wait for writeback before modifying
 				   folio contents */
-	AS_UNMOVABLE,		/* The mapping cannot be moved, ever */
+	AS_UNMOVABLE = 8,	/* The mapping cannot be moved, ever */
+	AS_FOLIO_ORDER_MIN = 16,
+	AS_FOLIO_ORDER_MAX = 21, /* Bits 16-25 are used for FOLIO_ORDER */
 };
 
+#define AS_FOLIO_ORDER_MIN_MASK 0x001f0000
+#define AS_FOLIO_ORDER_MAX_MASK 0x03e00000
+#define AS_FOLIO_ORDER_MASK (AS_FOLIO_ORDER_MIN_MASK | AS_FOLIO_ORDER_MAX_MASK)
+
 /**
  * mapping_set_error - record a writeback error in the address_space
  * @mapping: the mapping in which an error should be set
@@ -359,9 +364,48 @@  static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
 #define MAX_PAGECACHE_ORDER	8
 #endif
 
+/*
+ * mapping_set_folio_order_range() - Set the orders supported by a file.
+ * @mapping: The address space of the file.
+ * @min: Minimum folio order (between 0-MAX_PAGECACHE_ORDER inclusive).
+ * @max: Maximum folio order (between @min-MAX_PAGECACHE_ORDER inclusive).
+ *
+ * The filesystem should call this function in its inode constructor to
+ * indicate which base size (min) and maximum size (max) of folio the VFS
+ * can use to cache the contents of the file.  This should only be used
+ * if the filesystem needs special handling of folio sizes (ie there is
+ * something the core cannot know).
+ * Do not tune it based on, eg, i_size.
+ *
+ * Context: This should not be called while the inode is active as it
+ * is non-atomic.
+ */
+static inline void mapping_set_folio_order_range(struct address_space *mapping,
+		unsigned int min, unsigned int max)
+{
+	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+		return;
+
+	if (min > MAX_PAGECACHE_ORDER)
+		min = MAX_PAGECACHE_ORDER;
+	if (max > MAX_PAGECACHE_ORDER)
+		max = MAX_PAGECACHE_ORDER;
+	if (max < min)
+		max = min;
+
+	mapping->flags = (mapping->flags & ~AS_FOLIO_ORDER_MASK) |
+		(min << AS_FOLIO_ORDER_MIN) | (max << AS_FOLIO_ORDER_MAX);
+}
+
+static inline void mapping_set_folio_min_order(struct address_space *mapping,
+		unsigned int min)
+{
+	mapping_set_folio_order_range(mapping, min, MAX_PAGECACHE_ORDER);
+}
+
 /**
  * mapping_set_large_folios() - Indicate the file supports large folios.
- * @mapping: The file.
+ * @mapping: The address space of the file.
  *
  * The filesystem should call this function in its inode constructor to
  * indicate that the VFS can use large folios to cache the contents of
@@ -372,7 +416,23 @@  static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
  */
 static inline void mapping_set_large_folios(struct address_space *mapping)
 {
-	__set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
+	mapping_set_folio_order_range(mapping, 0, MAX_PAGECACHE_ORDER);
+}
+
+static inline
+unsigned int mapping_max_folio_order(const struct address_space *mapping)
+{
+	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+		return 0;
+	return (mapping->flags & AS_FOLIO_ORDER_MAX_MASK) >> AS_FOLIO_ORDER_MAX;
+}
+
+static inline
+unsigned int mapping_min_folio_order(const struct address_space *mapping)
+{
+	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+		return 0;
+	return (mapping->flags & AS_FOLIO_ORDER_MIN_MASK) >> AS_FOLIO_ORDER_MIN;
 }
 
 /*
@@ -381,16 +441,13 @@  static inline void mapping_set_large_folios(struct address_space *mapping)
  */
 static inline bool mapping_large_folio_support(struct address_space *mapping)
 {
-	return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
-		test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
+	return mapping_max_folio_order(mapping) > 0;
 }
 
 /* Return the maximum folio size for this pagecache mapping, in bytes. */
-static inline size_t mapping_max_folio_size(struct address_space *mapping)
+static inline size_t mapping_max_folio_size(const struct address_space *mapping)
 {
-	if (mapping_large_folio_support(mapping))
-		return PAGE_SIZE << MAX_PAGECACHE_ORDER;
-	return PAGE_SIZE;
+	return PAGE_SIZE << mapping_max_folio_order(mapping);
 }
 
 static inline int filemap_nr_thps(struct address_space *mapping)
diff --git a/mm/filemap.c b/mm/filemap.c
index 382c3d06bfb1..0557020f130e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1933,10 +1933,8 @@  struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
 		if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
 			fgp_flags |= FGP_LOCK;
 
-		if (!mapping_large_folio_support(mapping))
-			order = 0;
-		if (order > MAX_PAGECACHE_ORDER)
-			order = MAX_PAGECACHE_ORDER;
+		if (order > mapping_max_folio_order(mapping))
+			order = mapping_max_folio_order(mapping);
 		/* If we're not aligned, allocate a smaller folio */
 		if (index & ((1UL << order) - 1))
 			order = __ffs(index);
diff --git a/mm/readahead.c b/mm/readahead.c
index c1b23989d9ca..66058ae02f2e 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -503,9 +503,9 @@  void page_cache_ra_order(struct readahead_control *ractl,
 
 	limit = min(limit, index + ra->size - 1);
 
-	if (new_order < MAX_PAGECACHE_ORDER) {
+	if (new_order < mapping_max_folio_order(mapping)) {
 		new_order += 2;
-		new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order);
+		new_order = min(mapping_max_folio_order(mapping), new_order);
 		new_order = min_t(unsigned int, new_order, ilog2(ra->size));
 	}