diff mbox series

[v2,6/7] ext4: Add mount option for provisioning blocks during allocations

Message ID 20221229081252.452240-7-sarthakkukreti@chromium.org (mailing list archive)
State New, archived
Headers show
Series Introduce provisioning primitives for thinly provisioned storage | expand

Commit Message

Sarthak Kukreti Dec. 29, 2022, 8:12 a.m. UTC
Add a mount option that sets the default provisioning mode for
all files within the filesystem.

Signed-off-by: Sarthak Kukreti <sarthakkukreti@chromium.org>
---
 fs/ext4/ext4.h    | 1 +
 fs/ext4/extents.c | 7 +++++++
 fs/ext4/super.c   | 7 +++++++
 3 files changed, 15 insertions(+)

Comments

Brian Foster Jan. 9, 2023, 3:02 p.m. UTC | #1
On Thu, Dec 29, 2022 at 12:12:51AM -0800, Sarthak Kukreti wrote:
> Add a mount option that sets the default provisioning mode for
> all files within the filesystem.
> 

There's not much description here to explain what a user should expect
from this mode. Should the user expect -ENOSPC from the lower layers
once out of space? What about files that are provisioned by the fs and
then freed? Should the user/admin know to run fstrim or also enable an
online discard mechanism to ensure freed filesystem blocks are returned
to the free pool in the block/dm layer in order to avoid premature fs
-ENOSPC conditions?

Also, what about dealing with block level snapshots? There is some
discussion on previous patches wrt to expectations on how provision
might handle unsharing of cow'd blocks. If the fs only provisions on
initial allocation, then a subsequent snapshot means we run into the
same sort of ENOSPC problem on overwrites of already allocated blocks.
That also doesn't consider things like an internal log, which may have
been physically allocated (provisioned?) at mkfs time and yet is subject
to the same general problem.

So what is the higher level goal with this sort of mode? Is
provision-on-alloc sufficient to provide a practical benefit to users,
or should this perhaps consider other scenarios where a provision might
be warranted before submitting writes to a thinly provisioned device?

FWIW, it seems reasonable to me to introduce this without snapshot
support and work toward it later, but it should be made clear what is
being advertised in the meantime. Unless there's some nice way to
explicitly limit the scope of use, such as preventing snapshots or
something, the fs might want to consider this sort of feature
experimental until it is more fully functional.

Brian

> Signed-off-by: Sarthak Kukreti <sarthakkukreti@chromium.org>
> ---
>  fs/ext4/ext4.h    | 1 +
>  fs/ext4/extents.c | 7 +++++++
>  fs/ext4/super.c   | 7 +++++++
>  3 files changed, 15 insertions(+)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 49832e90b62f..29cab2e2ea20 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1269,6 +1269,7 @@ struct ext4_inode_info {
>  #define EXT4_MOUNT2_MB_OPTIMIZE_SCAN	0x00000080 /* Optimize group
>  						    * scanning in mballoc
>  						    */
> +#define EXT4_MOUNT2_PROVISION		0x00000100 /* Provision while allocating file blocks */
>  
>  #define clear_opt(sb, opt)		EXT4_SB(sb)->s_mount_opt &= \
>  						~EXT4_MOUNT_##opt
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 2e64a9211792..a73f44264fe2 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -4441,6 +4441,13 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
>  	unsigned int credits;
>  	loff_t epos;
>  
> +	/*
> +	 * Attempt to provision file blocks if the mount is mounted with
> +	 * provision.
> +	 */
> +	if (test_opt2(inode->i_sb, PROVISION))
> +		flags |= EXT4_GET_BLOCKS_PROVISION;
> +
>  	BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
>  	map.m_lblk = offset;
>  	map.m_len = len;
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 260c1b3e3ef2..5bc376f6a6f0 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1591,6 +1591,7 @@ enum {
>  	Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
>  	Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan,
>  	Opt_errors, Opt_data, Opt_data_err, Opt_jqfmt, Opt_dax_type,
> +	Opt_provision, Opt_noprovision,
>  #ifdef CONFIG_EXT4_DEBUG
>  	Opt_fc_debug_max_replay, Opt_fc_debug_force
>  #endif
> @@ -1737,6 +1738,8 @@ static const struct fs_parameter_spec ext4_param_specs[] = {
>  	fsparam_flag	("reservation",		Opt_removed),	/* mount option from ext2/3 */
>  	fsparam_flag	("noreservation",	Opt_removed),	/* mount option from ext2/3 */
>  	fsparam_u32	("journal",		Opt_removed),	/* mount option from ext2/3 */
> +	fsparam_flag	("provision",		Opt_provision),
> +	fsparam_flag	("noprovision",		Opt_noprovision),
>  	{}
>  };
>  
> @@ -1826,6 +1829,8 @@ static const struct mount_opts {
>  	{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
>  	{Opt_no_prefetch_block_bitmaps, EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS,
>  	 MOPT_SET},
> +	{Opt_provision, EXT4_MOUNT2_PROVISION, MOPT_SET | MOPT_2},
> +	{Opt_noprovision, EXT4_MOUNT2_PROVISION, MOPT_CLEAR | MOPT_2},
>  #ifdef CONFIG_EXT4_DEBUG
>  	{Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
>  	 MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
> @@ -2977,6 +2982,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
>  		SEQ_OPTS_PUTS("dax=never");
>  	} else if (test_opt2(sb, DAX_INODE)) {
>  		SEQ_OPTS_PUTS("dax=inode");
> +	} else if (test_opt2(sb, PROVISION)) {
> +		SEQ_OPTS_PUTS("provision");
>  	}
>  
>  	if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
> -- 
> 2.37.3
>
diff mbox series

Patch

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 49832e90b62f..29cab2e2ea20 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1269,6 +1269,7 @@  struct ext4_inode_info {
 #define EXT4_MOUNT2_MB_OPTIMIZE_SCAN	0x00000080 /* Optimize group
 						    * scanning in mballoc
 						    */
+#define EXT4_MOUNT2_PROVISION		0x00000100 /* Provision while allocating file blocks */
 
 #define clear_opt(sb, opt)		EXT4_SB(sb)->s_mount_opt &= \
 						~EXT4_MOUNT_##opt
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2e64a9211792..a73f44264fe2 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4441,6 +4441,13 @@  static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
 	unsigned int credits;
 	loff_t epos;
 
+	/*
+	 * Attempt to provision file blocks if the mount is mounted with
+	 * provision.
+	 */
+	if (test_opt2(inode->i_sb, PROVISION))
+		flags |= EXT4_GET_BLOCKS_PROVISION;
+
 	BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
 	map.m_lblk = offset;
 	map.m_len = len;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 260c1b3e3ef2..5bc376f6a6f0 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1591,6 +1591,7 @@  enum {
 	Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
 	Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan,
 	Opt_errors, Opt_data, Opt_data_err, Opt_jqfmt, Opt_dax_type,
+	Opt_provision, Opt_noprovision,
 #ifdef CONFIG_EXT4_DEBUG
 	Opt_fc_debug_max_replay, Opt_fc_debug_force
 #endif
@@ -1737,6 +1738,8 @@  static const struct fs_parameter_spec ext4_param_specs[] = {
 	fsparam_flag	("reservation",		Opt_removed),	/* mount option from ext2/3 */
 	fsparam_flag	("noreservation",	Opt_removed),	/* mount option from ext2/3 */
 	fsparam_u32	("journal",		Opt_removed),	/* mount option from ext2/3 */
+	fsparam_flag	("provision",		Opt_provision),
+	fsparam_flag	("noprovision",		Opt_noprovision),
 	{}
 };
 
@@ -1826,6 +1829,8 @@  static const struct mount_opts {
 	{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
 	{Opt_no_prefetch_block_bitmaps, EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS,
 	 MOPT_SET},
+	{Opt_provision, EXT4_MOUNT2_PROVISION, MOPT_SET | MOPT_2},
+	{Opt_noprovision, EXT4_MOUNT2_PROVISION, MOPT_CLEAR | MOPT_2},
 #ifdef CONFIG_EXT4_DEBUG
 	{Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
 	 MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
@@ -2977,6 +2982,8 @@  static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
 		SEQ_OPTS_PUTS("dax=never");
 	} else if (test_opt2(sb, DAX_INODE)) {
 		SEQ_OPTS_PUTS("dax=inode");
+	} else if (test_opt2(sb, PROVISION)) {
+		SEQ_OPTS_PUTS("provision");
 	}
 
 	if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&