diff mbox series

[v2,4/5] mm: shmem: add a kernel command line to change the default huge policy for tmpfs

Message ID 64091a3d5a8c5edb0461fae203cfcf6f302a19ce.1731397290.git.baolin.wang@linux.alibaba.com (mailing list archive)
State New
Headers show
Series Support large folios for tmpfs | expand

Commit Message

Baolin Wang Nov. 12, 2024, 7:45 a.m. UTC
Now the tmpfs can allow to allocate any sized large folios, and the default
huge policy is still 'never'. Thus adding a new command line to change
the default huge policy will be helpful to use the large folios for tmpfs,
which is similar to the 'transparent_hugepage_shmem' cmdline for shmem.

Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
 .../admin-guide/kernel-parameters.txt         |  7 ++++++
 Documentation/admin-guide/mm/transhuge.rst    |  6 +++++
 mm/shmem.c                                    | 23 ++++++++++++++++++-
 3 files changed, 35 insertions(+), 1 deletion(-)

Comments

Daniel Gomez Nov. 15, 2024, 2:02 p.m. UTC | #1
On Tue Nov 12, 2024 at 8:45 AM CET, Baolin Wang wrote:
> Now the tmpfs can allow to allocate any sized large folios, and the default
> huge policy is still 'never'. Thus adding a new command line to change
> the default huge policy will be helpful to use the large folios for tmpfs,
> which is similar to the 'transparent_hugepage_shmem' cmdline for shmem.


I think it would be good to include a summary of why tmpfs is not
enabling large folios by default as the other fs. David has been
pretty good at repeating the reasons over and over and it would be very
valuable to have them included here.

>
> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
> ---
>  .../admin-guide/kernel-parameters.txt         |  7 ++++++
>  Documentation/admin-guide/mm/transhuge.rst    |  6 +++++
>  mm/shmem.c                                    | 23 ++++++++++++++++++-
>  3 files changed, 35 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index b48d744d99b0..007e6cfada3e 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -6943,6 +6943,13 @@
>  			See Documentation/admin-guide/mm/transhuge.rst
>  			for more details.
>  
> +	transparent_hugepage_tmpfs= [KNL]
> +			Format: [always|within_size|advise|never]
> +			Can be used to control the default hugepage allocation policy
> +			for the tmpfs mount.
> +			See Documentation/admin-guide/mm/transhuge.rst
> +			for more details.
> +
>  	trusted.source=	[KEYS]
>  			Format: <string>
>  			This parameter identifies the trust source as a backend
> diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
> index 5034915f4e8e..9ae775eaacbe 100644
> --- a/Documentation/admin-guide/mm/transhuge.rst
> +++ b/Documentation/admin-guide/mm/transhuge.rst
> @@ -332,6 +332,12 @@ allocation policy for the internal shmem mount by using the kernel parameter
>  seven valid policies for shmem (``always``, ``within_size``, ``advise``,
>  ``never``, ``deny``, and ``force``).
>  
> +Similarly to ``transparent_hugepage_shmem``, you can control the default
> +hugepage allocation policy for the tmpfs mount by using the kernel parameter
> +``transparent_hugepage_tmpfs=<policy>``, where ``<policy>`` is one of the
> +four valid policies for tmpfs (``always``, ``within_size``, ``advise``,
> +``never``). The tmpfs mount default policy is ``never``.
> +
>  In the same manner as ``thp_anon`` controls each supported anonymous THP
>  size, ``thp_shmem`` controls each supported shmem THP size. ``thp_shmem``
>  has the same format as ``thp_anon``, but also supports the policy
> diff --git a/mm/shmem.c b/mm/shmem.c
> index a3203cf8860f..021760e91cea 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -548,6 +548,7 @@ static bool shmem_confirm_swap(struct address_space *mapping,
>  /* ifdef here to avoid bloating shmem.o when not necessary */
>  
>  static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;
> +static int tmpfs_huge __read_mostly = SHMEM_HUGE_NEVER;
>  
>  /**
>   * shmem_mapping_size_orders - Get allowable folio orders for the given file size.
> @@ -4780,7 +4781,12 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
>  	sbinfo->gid = ctx->gid;
>  	sbinfo->full_inums = ctx->full_inums;
>  	sbinfo->mode = ctx->mode;
> -	sbinfo->huge = ctx->huge;
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +	if (ctx->seen & SHMEM_SEEN_HUGE)
> +		sbinfo->huge = ctx->huge;
> +	else
> +		sbinfo->huge = tmpfs_huge;
> +#endif
>  	sbinfo->mpol = ctx->mpol;
>  	ctx->mpol = NULL;
>  
> @@ -5259,6 +5265,21 @@ static int __init setup_transparent_hugepage_shmem(char *str)
>  }
>  __setup("transparent_hugepage_shmem=", setup_transparent_hugepage_shmem);
>  
> +static int __init setup_transparent_hugepage_tmpfs(char *str)
> +{
> +	int huge;
> +
> +	huge = shmem_parse_huge(str);
> +	if (huge < 0) {
> +		pr_warn("transparent_hugepage_tmpfs= cannot parse, ignored\n");
> +		return huge;
> +	}
> +
> +	tmpfs_huge = huge;
> +	return 1;
> +}
> +__setup("transparent_hugepage_tmpfs=", setup_transparent_hugepage_tmpfs);
> +
>  static char str_dup[PAGE_SIZE] __initdata;
>  static int __init setup_thp_shmem(char *str)
>  {
David Hildenbrand Nov. 15, 2024, 2:54 p.m. UTC | #2
On 15.11.24 15:02, Daniel Gomez wrote:
> On Tue Nov 12, 2024 at 8:45 AM CET, Baolin Wang wrote:
>> Now the tmpfs can allow to allocate any sized large folios, and the default
>> huge policy is still 'never'. Thus adding a new command line to change
>> the default huge policy will be helpful to use the large folios for tmpfs,
>> which is similar to the 'transparent_hugepage_shmem' cmdline for shmem.
> 
> 
> I think it would be good to include a summary of why tmpfs is not
> enabling large folios by default as the other fs. David has been
> pretty good at repeating the reasons over and over and it would be very
> valuable to have them included here.

Yes. We also discussed in v4 the idea of having a Kconfig option to just 
change the default policy to "always". We could mention that here as well.
Baolin Wang Nov. 16, 2024, 3 a.m. UTC | #3
On 2024/11/15 22:54, David Hildenbrand wrote:
> On 15.11.24 15:02, Daniel Gomez wrote:
>> On Tue Nov 12, 2024 at 8:45 AM CET, Baolin Wang wrote:
>>> Now the tmpfs can allow to allocate any sized large folios, and the 
>>> default
>>> huge policy is still 'never'. Thus adding a new command line to change
>>> the default huge policy will be helpful to use the large folios for 
>>> tmpfs,
>>> which is similar to the 'transparent_hugepage_shmem' cmdline for shmem.
>>
>>
>> I think it would be good to include a summary of why tmpfs is not
>> enabling large folios by default as the other fs. David has been
>> pretty good at repeating the reasons over and over and it would be very
>> valuable to have them included here.


OK. I'd like to directly quote David's previous comments. So hope Andew 
can help include the updated commit message:

=====
Now the tmpfs can allow to allocate any sized large folios, and the 
default huge policy is still prefered to be 'never'. Due to tmpfs not 
behaving like other file systems in some cases as previously explained 
by David[1]:

"
I think I raised this in the past, but tmpfs/shmem is just like any
other file system .. except it sometimes really isn't and behaves much
more like (swappable) anonymous memory. (or mlocked files)

There are many systems out there that run without swap enabled, or with
extremely minimal swap (IIRC until recently kubernetes was completely
incompatible with swapping). Swap can even be disabled today for shmem
using a mount option.

That's a big difference to all other file systems where you are
guaranteed to have backend storage where you can simply evict under
memory pressure (might temporarily fail, of course).

I *think* that's the reason why we have the "huge=" parameter that also
controls the THP allocations during page faults (IOW possible memory
over-allocation). Maybe also because it was a new feature, and we only
had a single THP size.
"

Thus adding a new command line to change the default huge policy will be 
helpful to use the large folios for tmpfs, which is similar to the 
'transparent_hugepage_shmem' cmdline for shmem.

[1] 
https://lore.kernel.org/all/cbadd5fe-69d5-4c21-8eb8-3344ed36c721@redhat.com/

> Yes. We also discussed in v4 the idea of having a Kconfig option to just 
> change the default policy to "always". We could mention that here as well.
diff mbox series

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index b48d744d99b0..007e6cfada3e 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6943,6 +6943,13 @@ 
 			See Documentation/admin-guide/mm/transhuge.rst
 			for more details.
 
+	transparent_hugepage_tmpfs= [KNL]
+			Format: [always|within_size|advise|never]
+			Can be used to control the default hugepage allocation policy
+			for the tmpfs mount.
+			See Documentation/admin-guide/mm/transhuge.rst
+			for more details.
+
 	trusted.source=	[KEYS]
 			Format: <string>
 			This parameter identifies the trust source as a backend
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
index 5034915f4e8e..9ae775eaacbe 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -332,6 +332,12 @@  allocation policy for the internal shmem mount by using the kernel parameter
 seven valid policies for shmem (``always``, ``within_size``, ``advise``,
 ``never``, ``deny``, and ``force``).
 
+Similarly to ``transparent_hugepage_shmem``, you can control the default
+hugepage allocation policy for the tmpfs mount by using the kernel parameter
+``transparent_hugepage_tmpfs=<policy>``, where ``<policy>`` is one of the
+four valid policies for tmpfs (``always``, ``within_size``, ``advise``,
+``never``). The tmpfs mount default policy is ``never``.
+
 In the same manner as ``thp_anon`` controls each supported anonymous THP
 size, ``thp_shmem`` controls each supported shmem THP size. ``thp_shmem``
 has the same format as ``thp_anon``, but also supports the policy
diff --git a/mm/shmem.c b/mm/shmem.c
index a3203cf8860f..021760e91cea 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -548,6 +548,7 @@  static bool shmem_confirm_swap(struct address_space *mapping,
 /* ifdef here to avoid bloating shmem.o when not necessary */
 
 static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;
+static int tmpfs_huge __read_mostly = SHMEM_HUGE_NEVER;
 
 /**
  * shmem_mapping_size_orders - Get allowable folio orders for the given file size.
@@ -4780,7 +4781,12 @@  static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 	sbinfo->gid = ctx->gid;
 	sbinfo->full_inums = ctx->full_inums;
 	sbinfo->mode = ctx->mode;
-	sbinfo->huge = ctx->huge;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	if (ctx->seen & SHMEM_SEEN_HUGE)
+		sbinfo->huge = ctx->huge;
+	else
+		sbinfo->huge = tmpfs_huge;
+#endif
 	sbinfo->mpol = ctx->mpol;
 	ctx->mpol = NULL;
 
@@ -5259,6 +5265,21 @@  static int __init setup_transparent_hugepage_shmem(char *str)
 }
 __setup("transparent_hugepage_shmem=", setup_transparent_hugepage_shmem);
 
+static int __init setup_transparent_hugepage_tmpfs(char *str)
+{
+	int huge;
+
+	huge = shmem_parse_huge(str);
+	if (huge < 0) {
+		pr_warn("transparent_hugepage_tmpfs= cannot parse, ignored\n");
+		return huge;
+	}
+
+	tmpfs_huge = huge;
+	return 1;
+}
+__setup("transparent_hugepage_tmpfs=", setup_transparent_hugepage_tmpfs);
+
 static char str_dup[PAGE_SIZE] __initdata;
 static int __init setup_thp_shmem(char *str)
 {