Message ID | 20220924022313.281318-1-atteh.mailbox@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | ksmbd: make utf-8 file name comparison work in __caseless_lookup() | expand |
2022-09-24 11:23 GMT+09:00, Atte Heikkilä <atteh.mailbox@gmail.com>: > Case-insensitive file name lookups with __caseless_lookup() use > strncasecmp() for file name comparison. strncasecmp() assumes an > ISO8859-1-compatible encoding, which is not the case here as UTF-8 > is always used. As such, use of strncasecmp() here produces correct > results only if both strings use characters in the ASCII range only. > Fix this by using utf8_strncasecmp() if CONFIG_UNICODE is set. On > failure or if CONFIG_UNICODE is not set, fallback to strncasecmp(). > Also, as we are adding an include for `linux/unicode.h', include it > in `fs/ksmbd/connection.h' as well since it should be explicit there. > > Signed-off-by: Atte Heikkilä <atteh.mailbox@gmail.com> > --- > fs/ksmbd/connection.h | 1 + > fs/ksmbd/vfs.c | 20 +++++++++++++++++--- > fs/ksmbd/vfs.h | 2 ++ > 3 files changed, 20 insertions(+), 3 deletions(-) > > diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h > index 41d96f5cef06..3643354a3fa7 100644 > --- a/fs/ksmbd/connection.h > +++ b/fs/ksmbd/connection.h > @@ -14,6 +14,7 @@ > #include <net/request_sock.h> > #include <linux/kthread.h> > #include <linux/nls.h> > +#include <linux/unicode.h> > > #include "smb_common.h" > #include "ksmbd_work.h" > diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c > index 4fcf96a01c16..a3269df7c7b3 100644 > --- a/fs/ksmbd/vfs.c > +++ b/fs/ksmbd/vfs.c > @@ -1145,12 +1145,23 @@ static int __caseless_lookup(struct dir_context > *ctx, const char *name, > unsigned int d_type) > { > struct ksmbd_readdir_data *buf; > + int cmp; cmp should be initialized with -EINVAL to fallback strncasecmp() ? > > buf = container_of(ctx, struct ksmbd_readdir_data, ctx); > > if (buf->used != namlen) > return 0; > - if (!strncasecmp((char *)buf->private, name, namlen)) { > + if (IS_ENABLED(CONFIG_UNICODE) && buf->um) { > + const struct qstr q_buf = {.name = buf->private, > + .len = buf->used}; > + const struct qstr q_name = {.name = name, > + .len = namlen}; > + > + cmp = utf8_strncasecmp(buf->um, &q_buf, &q_name); > + } > + if (!(IS_ENABLED(CONFIG_UNICODE) && buf->um) || cmp < 0) I wonder why ->um is checked with CONFIG_UNICODE. Thanks. > + cmp = strncasecmp((char *)buf->private, name, namlen); > + if (!cmp) { > memcpy((char *)buf->private, name, namlen); > buf->dirent_count = 1; > return -EEXIST; > @@ -1166,7 +1177,8 @@ static int __caseless_lookup(struct dir_context *ctx, > const char *name, > * > * Return: 0 on success, otherwise error > */ > -static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name, > size_t namelen) > +static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name, > + size_t namelen, struct unicode_map *um) > { > int ret; > struct file *dfilp; > @@ -1176,6 +1188,7 @@ static int ksmbd_vfs_lookup_in_dir(const struct path > *dir, char *name, size_t na > .private = name, > .used = namelen, > .dirent_count = 0, > + .um = um, > }; > > dfilp = dentry_open(dir, flags, current_cred()); > @@ -1238,7 +1251,8 @@ int ksmbd_vfs_kern_path(struct ksmbd_work *work, char > *name, > break; > > err = ksmbd_vfs_lookup_in_dir(&parent, filename, > - filename_len); > + filename_len, > + work->conn->um); > path_put(&parent); > if (err) > goto out; > diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h > index d7542a2dab52..593059ca8511 100644 > --- a/fs/ksmbd/vfs.h > +++ b/fs/ksmbd/vfs.h > @@ -12,6 +12,7 @@ > #include <linux/namei.h> > #include <uapi/linux/xattr.h> > #include <linux/posix_acl.h> > +#include <linux/unicode.h> > > #include "smbacl.h" > #include "xattr.h" > @@ -60,6 +61,7 @@ struct ksmbd_readdir_data { > unsigned int used; > unsigned int dirent_count; > unsigned int file_attr; > + struct unicode_map *um; > }; > > /* ksmbd kstat wrapper to get valid create time when reading dir entry */ > -- > 2.37.3 > >
On Sun, 25 Sep 2022 22:18:19 +0900, Namjae Jeon wrote: > 2022-09-24 11:23 GMT+09:00, Atte Heikkilä <atteh.mailbox@gmail.com>: >> Case-insensitive file name lookups with __caseless_lookup() use >> strncasecmp() for file name comparison. strncasecmp() assumes an >> ISO8859-1-compatible encoding, which is not the case here as UTF-8 >> is always used. As such, use of strncasecmp() here produces correct >> results only if both strings use characters in the ASCII range only. >> Fix this by using utf8_strncasecmp() if CONFIG_UNICODE is set. On >> failure or if CONFIG_UNICODE is not set, fallback to strncasecmp(). >> Also, as we are adding an include for `linux/unicode.h', include it >> in `fs/ksmbd/connection.h' as well since it should be explicit there. >> >> Signed-off-by: Atte Heikkilä <atteh.mailbox@gmail.com> >> --- >> fs/ksmbd/connection.h | 1 + >> fs/ksmbd/vfs.c | 20 +++++++++++++++++--- >> fs/ksmbd/vfs.h | 2 ++ >> 3 files changed, 20 insertions(+), 3 deletions(-) >> >> diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h >> index 41d96f5cef06..3643354a3fa7 100644 >> --- a/fs/ksmbd/connection.h >> +++ b/fs/ksmbd/connection.h >> @@ -14,6 +14,7 @@ >> #include <net/request_sock.h> >> #include <linux/kthread.h> >> #include <linux/nls.h> >> +#include <linux/unicode.h> >> >> #include "smb_common.h" >> #include "ksmbd_work.h" >> diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c >> index 4fcf96a01c16..a3269df7c7b3 100644 >> --- a/fs/ksmbd/vfs.c >> +++ b/fs/ksmbd/vfs.c >> @@ -1145,12 +1145,23 @@ static int __caseless_lookup(struct dir_context >> *ctx, const char *name, >> unsigned int d_type) >> { >> struct ksmbd_readdir_data *buf; >> + int cmp; > cmp should be initialized with -EINVAL to fallback strncasecmp() ? Please see below for the explanation. > >> >> buf = container_of(ctx, struct ksmbd_readdir_data, ctx); >> >> if (buf->used != namlen) >> return 0; >> - if (!strncasecmp((char *)buf->private, name, namlen)) { >> + if (IS_ENABLED(CONFIG_UNICODE) && buf->um) { >> + const struct qstr q_buf = {.name = buf->private, >> + .len = buf->used}; >> + const struct qstr q_name = {.name = name, >> + .len = namlen}; >> + >> + cmp = utf8_strncasecmp(buf->um, &q_buf, &q_name); >> + } >> + if (!(IS_ENABLED(CONFIG_UNICODE) && buf->um) || cmp < 0) > I wonder why ->um is checked with CONFIG_UNICODE. If !(IS_ENABLED(CONFIG_UNICODE) && buf->um) is true, then utf8_strncasecmp() was not called. If !(IS_ENABLED(CONFIG_UNICODE) && buf->um) is false, then utf8_strncasecmp() was called and we check for an error with `cmp < 0'. Alternatively, `cmp' can be initialized to -EINVAL and then !(IS_ENABLED(CONFIG_UNICODE) && buf->um) can be removed. The latter is preferred, right? > > Thanks. >> + cmp = strncasecmp((char *)buf->private, name, namlen); >> + if (!cmp) { >> memcpy((char *)buf->private, name, namlen); >> buf->dirent_count = 1; >> return -EEXIST; >> @@ -1166,7 +1177,8 @@ static int __caseless_lookup(struct dir_context *ct> x, >> const char *name, >> * >> * Return: 0 on success, otherwise error >> */ >> -static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name, >> size_t namelen) >> +static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name, >> + size_t namelen, struct unicode_map *um) >> { >> int ret; >> struct file *dfilp; >> @@ -1176,6 +1188,7 @@ static int ksmbd_vfs_lookup_in_dir(const struct pat> h >> *dir, char *name, size_t na >> .private = name, >> .used = namelen, >> .dirent_count = 0, >> + .um = um, >> }; >> >> dfilp = dentry_open(dir, flags, current_cred()); >> @@ -1238,7 +1251,8 @@ int ksmbd_vfs_kern_path(struct ksmbd_work *work, ch> ar >> *name, >> break; >> >> err = ksmbd_vfs_lookup_in_dir(&parent, filename, >> - filename_len); >> + filename_len, >> + work->conn->um); >> path_put(&parent); >> if (err) >> goto out; >> diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h >> index d7542a2dab52..593059ca8511 100644 >> --- a/fs/ksmbd/vfs.h >> +++ b/fs/ksmbd/vfs.h >> @@ -12,6 +12,7 @@ >> #include <linux/namei.h> >> #include <uapi/linux/xattr.h> >> #include <linux/posix_acl.h> >> +#include <linux/unicode.h> >> >> #include "smbacl.h" >> #include "xattr.h" >> @@ -60,6 +61,7 @@ struct ksmbd_readdir_data { >> unsigned int used; >> unsigned int dirent_count; >> unsigned int file_attr; >> + struct unicode_map *um; >> }; >> >> /* ksmbd kstat wrapper to get valid create time when reading dir entry *> / >> -- >> 2.37.3 >> >> >
2022-09-25 23:23 GMT+09:00, Atte Heikkilä <atteh.mailbox@gmail.com>: > On Sun, 25 Sep 2022 22:18:19 +0900, Namjae Jeon wrote: >> 2022-09-24 11:23 GMT+09:00, Atte Heikkilä <atteh.mailbox@gmail.com>: >>> Case-insensitive file name lookups with __caseless_lookup() use >>> strncasecmp() for file name comparison. strncasecmp() assumes an >>> ISO8859-1-compatible encoding, which is not the case here as UTF-8 >>> is always used. As such, use of strncasecmp() here produces correct >>> results only if both strings use characters in the ASCII range only. >>> Fix this by using utf8_strncasecmp() if CONFIG_UNICODE is set. On >>> failure or if CONFIG_UNICODE is not set, fallback to strncasecmp(). >>> Also, as we are adding an include for `linux/unicode.h', include it >>> in `fs/ksmbd/connection.h' as well since it should be explicit there. >>> >>> Signed-off-by: Atte Heikkilä <atteh.mailbox@gmail.com> >>> --- >>> fs/ksmbd/connection.h | 1 + >>> fs/ksmbd/vfs.c | 20 +++++++++++++++++--- >>> fs/ksmbd/vfs.h | 2 ++ >>> 3 files changed, 20 insertions(+), 3 deletions(-) >>> >>> diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h >>> index 41d96f5cef06..3643354a3fa7 100644 >>> --- a/fs/ksmbd/connection.h >>> +++ b/fs/ksmbd/connection.h >>> @@ -14,6 +14,7 @@ >>> #include <net/request_sock.h> >>> #include <linux/kthread.h> >>> #include <linux/nls.h> >>> +#include <linux/unicode.h> >>> >>> #include "smb_common.h" >>> #include "ksmbd_work.h" >>> diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c >>> index 4fcf96a01c16..a3269df7c7b3 100644 >>> --- a/fs/ksmbd/vfs.c >>> +++ b/fs/ksmbd/vfs.c >>> @@ -1145,12 +1145,23 @@ static int __caseless_lookup(struct dir_context >>> *ctx, const char *name, >>> unsigned int d_type) >>> { >>> struct ksmbd_readdir_data *buf; >>> + int cmp; >> cmp should be initialized with -EINVAL to fallback strncasecmp() ? > > Please see below for the explanation. >> >>> >>> buf = container_of(ctx, struct ksmbd_readdir_data, ctx); >>> >>> if (buf->used != namlen) >>> return 0; >>> - if (!strncasecmp((char *)buf->private, name, namlen)) { >>> + if (IS_ENABLED(CONFIG_UNICODE) && buf->um) { >>> + const struct qstr q_buf = {.name = buf->private, >>> + .len = buf->used}; >>> + const struct qstr q_name = {.name = name, >>> + .len = namlen}; >>> + >>> + cmp = utf8_strncasecmp(buf->um, &q_buf, &q_name); >>> + } >>> + if (!(IS_ENABLED(CONFIG_UNICODE) && buf->um) || cmp < 0) >> I wonder why ->um is checked with CONFIG_UNICODE. > > If !(IS_ENABLED(CONFIG_UNICODE) && buf->um) is true, then > utf8_strncasecmp() > was not called. If !(IS_ENABLED(CONFIG_UNICODE) && buf->um) is false, then > utf8_strncasecmp() was called and we check for an error with `cmp < 0'. > Alternatively, `cmp' can be initialized to -EINVAL and then > !(IS_ENABLED(CONFIG_UNICODE) && buf->um) can be removed. > The latter is preferred, right? Yes:) The latter would be better. >> >> Thanks. >>> + cmp = strncasecmp((char *)buf->private, name, namlen); >>> + if (!cmp) { >>> memcpy((char *)buf->private, name, namlen); >>> buf->dirent_count = 1; >>> return -EEXIST; >>> @@ -1166,7 +1177,8 @@ static int __caseless_lookup(struct dir_context >>> *ct> x, >>> const char *name, >>> * >>> * Return: 0 on success, otherwise error >>> */ >>> -static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name, >>> size_t namelen) >>> +static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name, >>> + size_t namelen, struct unicode_map *um) >>> { >>> int ret; >>> struct file *dfilp; >>> @@ -1176,6 +1188,7 @@ static int ksmbd_vfs_lookup_in_dir(const struct >>> pat> h >>> *dir, char *name, size_t na >>> .private = name, >>> .used = namelen, >>> .dirent_count = 0, >>> + .um = um, >>> }; >>> >>> dfilp = dentry_open(dir, flags, current_cred()); >>> @@ -1238,7 +1251,8 @@ int ksmbd_vfs_kern_path(struct ksmbd_work *work, >>> ch> ar >>> *name, >>> break; >>> >>> err = ksmbd_vfs_lookup_in_dir(&parent, filename, >>> - filename_len); >>> + filename_len, >>> + work->conn->um); >>> path_put(&parent); >>> if (err) >>> goto out; >>> diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h >>> index d7542a2dab52..593059ca8511 100644 >>> --- a/fs/ksmbd/vfs.h >>> +++ b/fs/ksmbd/vfs.h >>> @@ -12,6 +12,7 @@ >>> #include <linux/namei.h> >>> #include <uapi/linux/xattr.h> >>> #include <linux/posix_acl.h> >>> +#include <linux/unicode.h> >>> >>> #include "smbacl.h" >>> #include "xattr.h" >>> @@ -60,6 +61,7 @@ struct ksmbd_readdir_data { >>> unsigned int used; >>> unsigned int dirent_count; >>> unsigned int file_attr; >>> + struct unicode_map *um; >>> }; >>> >>> /* ksmbd kstat wrapper to get valid create time when reading dir entry >>> *> / >>> -- >>> 2.37.3 >>> >>> >> >
diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h index 41d96f5cef06..3643354a3fa7 100644 --- a/fs/ksmbd/connection.h +++ b/fs/ksmbd/connection.h @@ -14,6 +14,7 @@ #include <net/request_sock.h> #include <linux/kthread.h> #include <linux/nls.h> +#include <linux/unicode.h> #include "smb_common.h" #include "ksmbd_work.h" diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index 4fcf96a01c16..a3269df7c7b3 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -1145,12 +1145,23 @@ static int __caseless_lookup(struct dir_context *ctx, const char *name, unsigned int d_type) { struct ksmbd_readdir_data *buf; + int cmp; buf = container_of(ctx, struct ksmbd_readdir_data, ctx); if (buf->used != namlen) return 0; - if (!strncasecmp((char *)buf->private, name, namlen)) { + if (IS_ENABLED(CONFIG_UNICODE) && buf->um) { + const struct qstr q_buf = {.name = buf->private, + .len = buf->used}; + const struct qstr q_name = {.name = name, + .len = namlen}; + + cmp = utf8_strncasecmp(buf->um, &q_buf, &q_name); + } + if (!(IS_ENABLED(CONFIG_UNICODE) && buf->um) || cmp < 0) + cmp = strncasecmp((char *)buf->private, name, namlen); + if (!cmp) { memcpy((char *)buf->private, name, namlen); buf->dirent_count = 1; return -EEXIST; @@ -1166,7 +1177,8 @@ static int __caseless_lookup(struct dir_context *ctx, const char *name, * * Return: 0 on success, otherwise error */ -static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name, size_t namelen) +static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name, + size_t namelen, struct unicode_map *um) { int ret; struct file *dfilp; @@ -1176,6 +1188,7 @@ static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name, size_t na .private = name, .used = namelen, .dirent_count = 0, + .um = um, }; dfilp = dentry_open(dir, flags, current_cred()); @@ -1238,7 +1251,8 @@ int ksmbd_vfs_kern_path(struct ksmbd_work *work, char *name, break; err = ksmbd_vfs_lookup_in_dir(&parent, filename, - filename_len); + filename_len, + work->conn->um); path_put(&parent); if (err) goto out; diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h index d7542a2dab52..593059ca8511 100644 --- a/fs/ksmbd/vfs.h +++ b/fs/ksmbd/vfs.h @@ -12,6 +12,7 @@ #include <linux/namei.h> #include <uapi/linux/xattr.h> #include <linux/posix_acl.h> +#include <linux/unicode.h> #include "smbacl.h" #include "xattr.h" @@ -60,6 +61,7 @@ struct ksmbd_readdir_data { unsigned int used; unsigned int dirent_count; unsigned int file_attr; + struct unicode_map *um; }; /* ksmbd kstat wrapper to get valid create time when reading dir entry */
Case-insensitive file name lookups with __caseless_lookup() use strncasecmp() for file name comparison. strncasecmp() assumes an ISO8859-1-compatible encoding, which is not the case here as UTF-8 is always used. As such, use of strncasecmp() here produces correct results only if both strings use characters in the ASCII range only. Fix this by using utf8_strncasecmp() if CONFIG_UNICODE is set. On failure or if CONFIG_UNICODE is not set, fallback to strncasecmp(). Also, as we are adding an include for `linux/unicode.h', include it in `fs/ksmbd/connection.h' as well since it should be explicit there. Signed-off-by: Atte Heikkilä <atteh.mailbox@gmail.com> --- fs/ksmbd/connection.h | 1 + fs/ksmbd/vfs.c | 20 +++++++++++++++++--- fs/ksmbd/vfs.h | 2 ++ 3 files changed, 20 insertions(+), 3 deletions(-)