diff mbox series

[3/5] tmpfs: Create casefold mount options

Message ID 20240823173332.281211-4-andrealmeid@igalia.com (mailing list archive)
State New
Headers show
Series tmpfs: Add case-insesitive support for tmpfs | expand

Commit Message

André Almeida Aug. 23, 2024, 5:33 p.m. UTC
Most filesystems have their data stored in disk, so casefold option need
to be enabled when building a filesystem on a device (via mkfs).
However, as tmpfs is a RAM backed filesystem, there's no disk
information and thus no mkfs to store information about casefold.

For tmpfs, create casefold options for mounting. Userspace can then
enable casefold support for a mount point using:

$ mount -t tmpfs -o casefold=utf8-12.1.0 fs_name mount_dir/

Userspace must set what Unicode standard is aiming to. The available
options depends on what the kernel Unicode subsystem supports.

And for strict encoding:

$ mount -t tmpfs -o casefold=utf8-12.1.0,strict_encoding fs_name mount_dir/

Strict encoding means that tmpfs will refuse to create invalid UTF-8
sequences. When this option is not enabled, any invalid sequence will be
treated as an opaque byte sequence, ignoring the encoding thus not being
able to be looked up in a case-insensitive way.

Signed-off-by: André Almeida <andrealmeid@igalia.com>
---
 mm/shmem.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

Comments

Dan Carpenter Aug. 29, 2024, 9:25 a.m. UTC | #1
Hi André,

kernel test robot noticed the following build warnings:

https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Andr-Almeida/tmpfs-Add-casefold-lookup-support/20240826-135457
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20240823173332.281211-4-andrealmeid%40igalia.com
patch subject: [PATCH 3/5] tmpfs: Create casefold mount options
config: x86_64-randconfig-161-20240827 (https://download.01.org/0day-ci/archive/20240829/202408290349.lp2Eq74b-lkp@intel.com/config)
compiler: clang version 18.1.5 (https://github.com/llvm/llvm-project 617a15a9eac96088ae5e9134248d8236e34b91b1)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
| Closes: https://lore.kernel.org/r/202408290349.lp2Eq74b-lkp@intel.com/

smatch warnings:
mm/shmem.c:4307 shmem_parse_opt_casefold() error: uninitialized symbol 'maj'.
mm/shmem.c:4307 shmem_parse_opt_casefold() error: uninitialized symbol 'min'.
mm/shmem.c:4307 shmem_parse_opt_casefold() error: uninitialized symbol 'rev'.

vim +/maj +4307 mm/shmem.c

a024e87c2944676 André Almeida 2024-08-23  4291  static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param)
a024e87c2944676 André Almeida 2024-08-23  4292  {
a024e87c2944676 André Almeida 2024-08-23  4293  	struct shmem_options *ctx = fc->fs_private;
a024e87c2944676 André Almeida 2024-08-23  4294  	unsigned int maj, min, rev, version_number;
a024e87c2944676 André Almeida 2024-08-23  4295  	char version[10];
a024e87c2944676 André Almeida 2024-08-23  4296  	int ret;
a024e87c2944676 André Almeida 2024-08-23  4297  	struct unicode_map *encoding;
a024e87c2944676 André Almeida 2024-08-23  4298  
a024e87c2944676 André Almeida 2024-08-23  4299  	if (strncmp(param->string, "utf8-", 5))
a024e87c2944676 André Almeida 2024-08-23  4300  		return invalfc(fc, "Only utf8 encondings are supported");
a024e87c2944676 André Almeida 2024-08-23  4301  	ret = strscpy(version, param->string + 5, sizeof(version));
a024e87c2944676 André Almeida 2024-08-23  4302  	if (ret < 0)
a024e87c2944676 André Almeida 2024-08-23  4303  		return invalfc(fc, "Invalid enconding argument: %s",
a024e87c2944676 André Almeida 2024-08-23  4304  			       param->string);
a024e87c2944676 André Almeida 2024-08-23  4305  
a024e87c2944676 André Almeida 2024-08-23  4306  	utf8_parse_version(version, &maj, &min, &rev);

No error checking

a024e87c2944676 André Almeida 2024-08-23 @4307  	version_number = UNICODE_AGE(maj, min, rev);
                                                                                     ^^^^^^^^^^^^^

a024e87c2944676 André Almeida 2024-08-23  4308  	encoding = utf8_load(version_number);
a024e87c2944676 André Almeida 2024-08-23  4309  	if (IS_ERR(encoding))
a024e87c2944676 André Almeida 2024-08-23  4310  		return invalfc(fc, "Invalid utf8 version: %s", version);
a024e87c2944676 André Almeida 2024-08-23  4311  	pr_info("tmpfs: Using encoding provided by mount options: %s\n",
a024e87c2944676 André Almeida 2024-08-23  4312  		param->string);
a024e87c2944676 André Almeida 2024-08-23  4313  	ctx->encoding = encoding;
a024e87c2944676 André Almeida 2024-08-23  4314  
a024e87c2944676 André Almeida 2024-08-23  4315  	return 0;
a024e87c2944676 André Almeida 2024-08-23  4316  }
a024e87c2944676 André Almeida 2024-08-23  4317  #else
a024e87c2944676 André Almeida 2024-08-23  4318  static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param)
a024e87c2944676 André Almeida 2024-08-23  4319  {
a024e87c2944676 André Almeida 2024-08-23  4320  	return invalfc(fc, "tmpfs: No kernel support for casefold filesystems\n");
a024e87c2944676 André Almeida 2024-08-23  4321  }
diff mbox series

Patch

diff --git a/mm/shmem.c b/mm/shmem.c
index 67b6ab580ca2..5c77b4e73204 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -4102,6 +4102,8 @@  enum shmem_param {
 	Opt_usrquota_inode_hardlimit,
 	Opt_grpquota_block_hardlimit,
 	Opt_grpquota_inode_hardlimit,
+	Opt_casefold,
+	Opt_strict_encoding,
 };
 
 static const struct constant_table shmem_param_enums_huge[] = {
@@ -4133,9 +4135,67 @@  const struct fs_parameter_spec shmem_fs_parameters[] = {
 	fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit),
 	fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit),
 #endif
+	fsparam_string("casefold",	Opt_casefold),
+	fsparam_flag  ("strict_encoding", Opt_strict_encoding),
 	{}
 };
 
+#if IS_ENABLED(CONFIG_UNICODE)
+static int utf8_parse_version(const char *version, unsigned int *maj,
+			      unsigned int *min, unsigned int *rev)
+{
+	substring_t args[3];
+	char version_string[12];
+	static const struct match_token token[] = {
+		{1, "%d.%d.%d"},
+		{0, NULL}
+	};
+
+	strscpy(version_string, version, sizeof(version_string));
+
+	if (match_token(version_string, token, args) != 1)
+		return -EINVAL;
+
+	if (match_int(&args[0], maj) || match_int(&args[1], min) ||
+	    match_int(&args[2], rev))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param)
+{
+	struct shmem_options *ctx = fc->fs_private;
+	unsigned int maj, min, rev, version_number;
+	char version[10];
+	int ret;
+	struct unicode_map *encoding;
+
+	if (strncmp(param->string, "utf8-", 5))
+		return invalfc(fc, "Only utf8 encondings are supported");
+	ret = strscpy(version, param->string + 5, sizeof(version));
+	if (ret < 0)
+		return invalfc(fc, "Invalid enconding argument: %s",
+			       param->string);
+
+	utf8_parse_version(version, &maj, &min, &rev);
+	version_number = UNICODE_AGE(maj, min, rev);
+	encoding = utf8_load(version_number);
+	if (IS_ERR(encoding))
+		return invalfc(fc, "Invalid utf8 version: %s", version);
+	pr_info("tmpfs: Using encoding provided by mount options: %s\n",
+		param->string);
+	ctx->encoding = encoding;
+
+	return 0;
+}
+#else
+static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param)
+{
+	return invalfc(fc, "tmpfs: No kernel support for casefold filesystems\n");
+}
+#endif
+
 static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 {
 	struct shmem_options *ctx = fc->fs_private;
@@ -4294,6 +4354,11 @@  static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 				       "Group quota inode hardlimit too large.");
 		ctx->qlimits.grpquota_ihardlimit = size;
 		break;
+	case Opt_casefold:
+		return shmem_parse_opt_casefold(fc, param);
+	case Opt_strict_encoding:
+		ctx->strict_encoding = true;
+		break;
 	}
 	return 0;