diff mbox series

ksmbd: make utf-8 file name comparison work in __caseless_lookup()

Message ID 20220924022313.281318-1-atteh.mailbox@gmail.com (mailing list archive)
State New, archived
Headers show
Series ksmbd: make utf-8 file name comparison work in __caseless_lookup() | expand

Commit Message

Atte Heikkilä Sept. 24, 2022, 2:23 a.m. UTC
Case-insensitive file name lookups with __caseless_lookup() use
strncasecmp() for file name comparison. strncasecmp() assumes an
ISO8859-1-compatible encoding, which is not the case here as UTF-8
is always used. As such, use of strncasecmp() here produces correct
results only if both strings use characters in the ASCII range only.
Fix this by using utf8_strncasecmp() if CONFIG_UNICODE is set. On
failure or if CONFIG_UNICODE is not set, fallback to strncasecmp().
Also, as we are adding an include for `linux/unicode.h', include it
in `fs/ksmbd/connection.h' as well since it should be explicit there.

Signed-off-by: Atte Heikkilä <atteh.mailbox@gmail.com>
---
 fs/ksmbd/connection.h |  1 +
 fs/ksmbd/vfs.c        | 20 +++++++++++++++++---
 fs/ksmbd/vfs.h        |  2 ++
 3 files changed, 20 insertions(+), 3 deletions(-)

Comments

Namjae Jeon Sept. 25, 2022, 1:18 p.m. UTC | #1
2022-09-24 11:23 GMT+09:00, Atte Heikkilä <atteh.mailbox@gmail.com>:
> Case-insensitive file name lookups with __caseless_lookup() use
> strncasecmp() for file name comparison. strncasecmp() assumes an
> ISO8859-1-compatible encoding, which is not the case here as UTF-8
> is always used. As such, use of strncasecmp() here produces correct
> results only if both strings use characters in the ASCII range only.
> Fix this by using utf8_strncasecmp() if CONFIG_UNICODE is set. On
> failure or if CONFIG_UNICODE is not set, fallback to strncasecmp().
> Also, as we are adding an include for `linux/unicode.h', include it
> in `fs/ksmbd/connection.h' as well since it should be explicit there.
>
> Signed-off-by: Atte Heikkilä <atteh.mailbox@gmail.com>
> ---
>  fs/ksmbd/connection.h |  1 +
>  fs/ksmbd/vfs.c        | 20 +++++++++++++++++---
>  fs/ksmbd/vfs.h        |  2 ++
>  3 files changed, 20 insertions(+), 3 deletions(-)
>
> diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h
> index 41d96f5cef06..3643354a3fa7 100644
> --- a/fs/ksmbd/connection.h
> +++ b/fs/ksmbd/connection.h
> @@ -14,6 +14,7 @@
>  #include <net/request_sock.h>
>  #include <linux/kthread.h>
>  #include <linux/nls.h>
> +#include <linux/unicode.h>
>
>  #include "smb_common.h"
>  #include "ksmbd_work.h"
> diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
> index 4fcf96a01c16..a3269df7c7b3 100644
> --- a/fs/ksmbd/vfs.c
> +++ b/fs/ksmbd/vfs.c
> @@ -1145,12 +1145,23 @@ static int __caseless_lookup(struct dir_context
> *ctx, const char *name,
>  			     unsigned int d_type)
>  {
>  	struct ksmbd_readdir_data *buf;
> +	int cmp;
cmp should be initialized with -EINVAL to fallback strncasecmp() ?

>
>  	buf = container_of(ctx, struct ksmbd_readdir_data, ctx);
>
>  	if (buf->used != namlen)
>  		return 0;
> -	if (!strncasecmp((char *)buf->private, name, namlen)) {
> +	if (IS_ENABLED(CONFIG_UNICODE) && buf->um) {
> +		const struct qstr q_buf = {.name = buf->private,
> +					   .len = buf->used};
> +		const struct qstr q_name = {.name = name,
> +					    .len = namlen};
> +
> +		cmp = utf8_strncasecmp(buf->um, &q_buf, &q_name);
> +	}
> +	if (!(IS_ENABLED(CONFIG_UNICODE) && buf->um) || cmp < 0)
I wonder why ->um is checked with CONFIG_UNICODE.

Thanks.
> +		cmp = strncasecmp((char *)buf->private, name, namlen);
> +	if (!cmp) {
>  		memcpy((char *)buf->private, name, namlen);
>  		buf->dirent_count = 1;
>  		return -EEXIST;
> @@ -1166,7 +1177,8 @@ static int __caseless_lookup(struct dir_context *ctx,
> const char *name,
>   *
>   * Return:	0 on success, otherwise error
>   */
> -static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name,
> size_t namelen)
> +static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name,
> +				   size_t namelen, struct unicode_map *um)
>  {
>  	int ret;
>  	struct file *dfilp;
> @@ -1176,6 +1188,7 @@ static int ksmbd_vfs_lookup_in_dir(const struct path
> *dir, char *name, size_t na
>  		.private	= name,
>  		.used		= namelen,
>  		.dirent_count	= 0,
> +		.um		= um,
>  	};
>
>  	dfilp = dentry_open(dir, flags, current_cred());
> @@ -1238,7 +1251,8 @@ int ksmbd_vfs_kern_path(struct ksmbd_work *work, char
> *name,
>  				break;
>
>  			err = ksmbd_vfs_lookup_in_dir(&parent, filename,
> -						      filename_len);
> +						      filename_len,
> +						      work->conn->um);
>  			path_put(&parent);
>  			if (err)
>  				goto out;
> diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h
> index d7542a2dab52..593059ca8511 100644
> --- a/fs/ksmbd/vfs.h
> +++ b/fs/ksmbd/vfs.h
> @@ -12,6 +12,7 @@
>  #include <linux/namei.h>
>  #include <uapi/linux/xattr.h>
>  #include <linux/posix_acl.h>
> +#include <linux/unicode.h>
>
>  #include "smbacl.h"
>  #include "xattr.h"
> @@ -60,6 +61,7 @@ struct ksmbd_readdir_data {
>  	unsigned int		used;
>  	unsigned int		dirent_count;
>  	unsigned int		file_attr;
> +	struct unicode_map	*um;
>  };
>
>  /* ksmbd kstat wrapper to get valid create time when reading dir entry */
> --
> 2.37.3
>
>
Atte Heikkilä Sept. 25, 2022, 2:23 p.m. UTC | #2
On Sun, 25 Sep 2022 22:18:19 +0900, Namjae Jeon wrote:
> 2022-09-24 11:23 GMT+09:00, Atte Heikkilä <atteh.mailbox@gmail.com>:
>> Case-insensitive file name lookups with __caseless_lookup() use
>> strncasecmp() for file name comparison. strncasecmp() assumes an
>> ISO8859-1-compatible encoding, which is not the case here as UTF-8
>> is always used. As such, use of strncasecmp() here produces correct
>> results only if both strings use characters in the ASCII range only.
>> Fix this by using utf8_strncasecmp() if CONFIG_UNICODE is set. On
>> failure or if CONFIG_UNICODE is not set, fallback to strncasecmp().
>> Also, as we are adding an include for `linux/unicode.h', include it
>> in `fs/ksmbd/connection.h' as well since it should be explicit there.
>>
>> Signed-off-by: Atte Heikkilä <atteh.mailbox@gmail.com>
>> ---
>>  fs/ksmbd/connection.h |  1 +
>>  fs/ksmbd/vfs.c        | 20 +++++++++++++++++---
>>  fs/ksmbd/vfs.h        |  2 ++
>>  3 files changed, 20 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h
>> index 41d96f5cef06..3643354a3fa7 100644
>> --- a/fs/ksmbd/connection.h
>> +++ b/fs/ksmbd/connection.h
>> @@ -14,6 +14,7 @@
>>  #include <net/request_sock.h>
>>  #include <linux/kthread.h>
>>  #include <linux/nls.h>
>> +#include <linux/unicode.h>
>>
>>  #include "smb_common.h"
>>  #include "ksmbd_work.h"
>> diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
>> index 4fcf96a01c16..a3269df7c7b3 100644
>> --- a/fs/ksmbd/vfs.c
>> +++ b/fs/ksmbd/vfs.c
>> @@ -1145,12 +1145,23 @@ static int __caseless_lookup(struct dir_context
>> *ctx, const char *name,
>>  			     unsigned int d_type)
>>  {
>>  	struct ksmbd_readdir_data *buf;
>> +	int cmp;
> cmp should be initialized with -EINVAL to fallback strncasecmp() ?

Please see below for the explanation.
>
>>
>>  	buf = container_of(ctx, struct ksmbd_readdir_data, ctx);
>>
>>  	if (buf->used != namlen)
>>  		return 0;
>> -	if (!strncasecmp((char *)buf->private, name, namlen)) {
>> +	if (IS_ENABLED(CONFIG_UNICODE) && buf->um) {
>> +		const struct qstr q_buf = {.name = buf->private,
>> +					   .len = buf->used};
>> +		const struct qstr q_name = {.name = name,
>> +					    .len = namlen};
>> +
>> +		cmp = utf8_strncasecmp(buf->um, &q_buf, &q_name);
>> +	}
>> +	if (!(IS_ENABLED(CONFIG_UNICODE) && buf->um) || cmp < 0)
> I wonder why ->um is checked with CONFIG_UNICODE.

If !(IS_ENABLED(CONFIG_UNICODE) && buf->um) is true, then utf8_strncasecmp()
was not called. If !(IS_ENABLED(CONFIG_UNICODE) && buf->um) is false, then
utf8_strncasecmp() was called and we check for an error with `cmp < 0'.
Alternatively, `cmp' can be initialized to -EINVAL and then
!(IS_ENABLED(CONFIG_UNICODE) && buf->um) can be removed.
The latter is preferred, right?
>
> Thanks.
>> +		cmp = strncasecmp((char *)buf->private, name, namlen);
>> +	if (!cmp) {
>>  		memcpy((char *)buf->private, name, namlen);
>>  		buf->dirent_count = 1;
>>  		return -EEXIST;
>> @@ -1166,7 +1177,8 @@ static int __caseless_lookup(struct dir_context *ct> x,
>> const char *name,
>>   *
>>   * Return:	0 on success, otherwise error
>>   */
>> -static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name,
>> size_t namelen)
>> +static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name,
>> +				   size_t namelen, struct unicode_map *um)
>>  {
>>  	int ret;
>>  	struct file *dfilp;
>> @@ -1176,6 +1188,7 @@ static int ksmbd_vfs_lookup_in_dir(const struct pat> h
>> *dir, char *name, size_t na
>>  		.private	= name,
>>  		.used		= namelen,
>>  		.dirent_count	= 0,
>> +		.um		= um,
>>  	};
>>
>>  	dfilp = dentry_open(dir, flags, current_cred());
>> @@ -1238,7 +1251,8 @@ int ksmbd_vfs_kern_path(struct ksmbd_work *work, ch> ar
>> *name,
>>  				break;
>>
>>  			err = ksmbd_vfs_lookup_in_dir(&parent, filename,
>> -						      filename_len);
>> +						      filename_len,
>> +						      work->conn->um);
>>  			path_put(&parent);
>>  			if (err)
>>  				goto out;
>> diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h
>> index d7542a2dab52..593059ca8511 100644
>> --- a/fs/ksmbd/vfs.h
>> +++ b/fs/ksmbd/vfs.h
>> @@ -12,6 +12,7 @@
>>  #include <linux/namei.h>
>>  #include <uapi/linux/xattr.h>
>>  #include <linux/posix_acl.h>
>> +#include <linux/unicode.h>
>>
>>  #include "smbacl.h"
>>  #include "xattr.h"
>> @@ -60,6 +61,7 @@ struct ksmbd_readdir_data {
>>  	unsigned int		used;
>>  	unsigned int		dirent_count;
>>  	unsigned int		file_attr;
>> +	struct unicode_map	*um;
>>  };
>>
>>  /* ksmbd kstat wrapper to get valid create time when reading dir entry *> /
>> --
>> 2.37.3
>>
>>
>
Namjae Jeon Sept. 27, 2022, 1:27 a.m. UTC | #3
2022-09-25 23:23 GMT+09:00, Atte Heikkilä <atteh.mailbox@gmail.com>:
> On Sun, 25 Sep 2022 22:18:19 +0900, Namjae Jeon wrote:
>> 2022-09-24 11:23 GMT+09:00, Atte Heikkilä <atteh.mailbox@gmail.com>:
>>> Case-insensitive file name lookups with __caseless_lookup() use
>>> strncasecmp() for file name comparison. strncasecmp() assumes an
>>> ISO8859-1-compatible encoding, which is not the case here as UTF-8
>>> is always used. As such, use of strncasecmp() here produces correct
>>> results only if both strings use characters in the ASCII range only.
>>> Fix this by using utf8_strncasecmp() if CONFIG_UNICODE is set. On
>>> failure or if CONFIG_UNICODE is not set, fallback to strncasecmp().
>>> Also, as we are adding an include for `linux/unicode.h', include it
>>> in `fs/ksmbd/connection.h' as well since it should be explicit there.
>>>
>>> Signed-off-by: Atte Heikkilä <atteh.mailbox@gmail.com>
>>> ---
>>>  fs/ksmbd/connection.h |  1 +
>>>  fs/ksmbd/vfs.c        | 20 +++++++++++++++++---
>>>  fs/ksmbd/vfs.h        |  2 ++
>>>  3 files changed, 20 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h
>>> index 41d96f5cef06..3643354a3fa7 100644
>>> --- a/fs/ksmbd/connection.h
>>> +++ b/fs/ksmbd/connection.h
>>> @@ -14,6 +14,7 @@
>>>  #include <net/request_sock.h>
>>>  #include <linux/kthread.h>
>>>  #include <linux/nls.h>
>>> +#include <linux/unicode.h>
>>>
>>>  #include "smb_common.h"
>>>  #include "ksmbd_work.h"
>>> diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
>>> index 4fcf96a01c16..a3269df7c7b3 100644
>>> --- a/fs/ksmbd/vfs.c
>>> +++ b/fs/ksmbd/vfs.c
>>> @@ -1145,12 +1145,23 @@ static int __caseless_lookup(struct dir_context
>>> *ctx, const char *name,
>>>  			     unsigned int d_type)
>>>  {
>>>  	struct ksmbd_readdir_data *buf;
>>> +	int cmp;
>> cmp should be initialized with -EINVAL to fallback strncasecmp() ?
>
> Please see below for the explanation.
>>
>>>
>>>  	buf = container_of(ctx, struct ksmbd_readdir_data, ctx);
>>>
>>>  	if (buf->used != namlen)
>>>  		return 0;
>>> -	if (!strncasecmp((char *)buf->private, name, namlen)) {
>>> +	if (IS_ENABLED(CONFIG_UNICODE) && buf->um) {
>>> +		const struct qstr q_buf = {.name = buf->private,
>>> +					   .len = buf->used};
>>> +		const struct qstr q_name = {.name = name,
>>> +					    .len = namlen};
>>> +
>>> +		cmp = utf8_strncasecmp(buf->um, &q_buf, &q_name);
>>> +	}
>>> +	if (!(IS_ENABLED(CONFIG_UNICODE) && buf->um) || cmp < 0)
>> I wonder why ->um is checked with CONFIG_UNICODE.
>
> If !(IS_ENABLED(CONFIG_UNICODE) && buf->um) is true, then
> utf8_strncasecmp()
> was not called. If !(IS_ENABLED(CONFIG_UNICODE) && buf->um) is false, then
> utf8_strncasecmp() was called and we check for an error with `cmp < 0'.
> Alternatively, `cmp' can be initialized to -EINVAL and then
> !(IS_ENABLED(CONFIG_UNICODE) && buf->um) can be removed.
> The latter is preferred, right?
Yes:) The latter would be better.

>>
>> Thanks.
>>> +		cmp = strncasecmp((char *)buf->private, name, namlen);
>>> +	if (!cmp) {
>>>  		memcpy((char *)buf->private, name, namlen);
>>>  		buf->dirent_count = 1;
>>>  		return -EEXIST;
>>> @@ -1166,7 +1177,8 @@ static int __caseless_lookup(struct dir_context
>>> *ct> x,
>>> const char *name,
>>>   *
>>>   * Return:	0 on success, otherwise error
>>>   */
>>> -static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name,
>>> size_t namelen)
>>> +static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name,
>>> +				   size_t namelen, struct unicode_map *um)
>>>  {
>>>  	int ret;
>>>  	struct file *dfilp;
>>> @@ -1176,6 +1188,7 @@ static int ksmbd_vfs_lookup_in_dir(const struct
>>> pat> h
>>> *dir, char *name, size_t na
>>>  		.private	= name,
>>>  		.used		= namelen,
>>>  		.dirent_count	= 0,
>>> +		.um		= um,
>>>  	};
>>>
>>>  	dfilp = dentry_open(dir, flags, current_cred());
>>> @@ -1238,7 +1251,8 @@ int ksmbd_vfs_kern_path(struct ksmbd_work *work,
>>> ch> ar
>>> *name,
>>>  				break;
>>>
>>>  			err = ksmbd_vfs_lookup_in_dir(&parent, filename,
>>> -						      filename_len);
>>> +						      filename_len,
>>> +						      work->conn->um);
>>>  			path_put(&parent);
>>>  			if (err)
>>>  				goto out;
>>> diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h
>>> index d7542a2dab52..593059ca8511 100644
>>> --- a/fs/ksmbd/vfs.h
>>> +++ b/fs/ksmbd/vfs.h
>>> @@ -12,6 +12,7 @@
>>>  #include <linux/namei.h>
>>>  #include <uapi/linux/xattr.h>
>>>  #include <linux/posix_acl.h>
>>> +#include <linux/unicode.h>
>>>
>>>  #include "smbacl.h"
>>>  #include "xattr.h"
>>> @@ -60,6 +61,7 @@ struct ksmbd_readdir_data {
>>>  	unsigned int		used;
>>>  	unsigned int		dirent_count;
>>>  	unsigned int		file_attr;
>>> +	struct unicode_map	*um;
>>>  };
>>>
>>>  /* ksmbd kstat wrapper to get valid create time when reading dir entry
>>> *> /
>>> --
>>> 2.37.3
>>>
>>>
>>
>
diff mbox series

Patch

diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h
index 41d96f5cef06..3643354a3fa7 100644
--- a/fs/ksmbd/connection.h
+++ b/fs/ksmbd/connection.h
@@ -14,6 +14,7 @@ 
 #include <net/request_sock.h>
 #include <linux/kthread.h>
 #include <linux/nls.h>
+#include <linux/unicode.h>
 
 #include "smb_common.h"
 #include "ksmbd_work.h"
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
index 4fcf96a01c16..a3269df7c7b3 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/ksmbd/vfs.c
@@ -1145,12 +1145,23 @@  static int __caseless_lookup(struct dir_context *ctx, const char *name,
 			     unsigned int d_type)
 {
 	struct ksmbd_readdir_data *buf;
+	int cmp;
 
 	buf = container_of(ctx, struct ksmbd_readdir_data, ctx);
 
 	if (buf->used != namlen)
 		return 0;
-	if (!strncasecmp((char *)buf->private, name, namlen)) {
+	if (IS_ENABLED(CONFIG_UNICODE) && buf->um) {
+		const struct qstr q_buf = {.name = buf->private,
+					   .len = buf->used};
+		const struct qstr q_name = {.name = name,
+					    .len = namlen};
+
+		cmp = utf8_strncasecmp(buf->um, &q_buf, &q_name);
+	}
+	if (!(IS_ENABLED(CONFIG_UNICODE) && buf->um) || cmp < 0)
+		cmp = strncasecmp((char *)buf->private, name, namlen);
+	if (!cmp) {
 		memcpy((char *)buf->private, name, namlen);
 		buf->dirent_count = 1;
 		return -EEXIST;
@@ -1166,7 +1177,8 @@  static int __caseless_lookup(struct dir_context *ctx, const char *name,
  *
  * Return:	0 on success, otherwise error
  */
-static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name, size_t namelen)
+static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name,
+				   size_t namelen, struct unicode_map *um)
 {
 	int ret;
 	struct file *dfilp;
@@ -1176,6 +1188,7 @@  static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name, size_t na
 		.private	= name,
 		.used		= namelen,
 		.dirent_count	= 0,
+		.um		= um,
 	};
 
 	dfilp = dentry_open(dir, flags, current_cred());
@@ -1238,7 +1251,8 @@  int ksmbd_vfs_kern_path(struct ksmbd_work *work, char *name,
 				break;
 
 			err = ksmbd_vfs_lookup_in_dir(&parent, filename,
-						      filename_len);
+						      filename_len,
+						      work->conn->um);
 			path_put(&parent);
 			if (err)
 				goto out;
diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h
index d7542a2dab52..593059ca8511 100644
--- a/fs/ksmbd/vfs.h
+++ b/fs/ksmbd/vfs.h
@@ -12,6 +12,7 @@ 
 #include <linux/namei.h>
 #include <uapi/linux/xattr.h>
 #include <linux/posix_acl.h>
+#include <linux/unicode.h>
 
 #include "smbacl.h"
 #include "xattr.h"
@@ -60,6 +61,7 @@  struct ksmbd_readdir_data {
 	unsigned int		used;
 	unsigned int		dirent_count;
 	unsigned int		file_attr;
+	struct unicode_map	*um;
 };
 
 /* ksmbd kstat wrapper to get valid create time when reading dir entry */