Message ID | 20230609073239.957184-2-amir73il@gmail.com (mailing list archive) |
---|---|
State | Under Review |
Headers | show |
Series | Reduce impact of overlayfs fake path files | expand |
On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote: > Overlayfs and cachefiles use open_with_fake_path() to allocate internal > files, where overlayfs also puts a "fake" path in f_path - a path which > is not on the same fs as f_inode. > > Allocate a container struct file_fake for those internal files, that > will be used to hold the fake path qlong with the real path. > > This commit does not populate the extra fake_path field and leaves the > overlayfs internal file's f_path fake. > > Signed-off-by: Amir Goldstein <amir73il@gmail.com> > --- > fs/cachefiles/namei.c | 2 +- > fs/file_table.c | 85 +++++++++++++++++++++++++++++++++++-------- > fs/internal.h | 5 ++- > fs/namei.c | 2 +- > fs/open.c | 11 +++--- > fs/overlayfs/file.c | 2 +- > include/linux/fs.h | 13 ++++--- > 7 files changed, 90 insertions(+), 30 deletions(-) > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c > index 82219a8f6084..a71bdf2d03ba 100644 > --- a/fs/cachefiles/namei.c > +++ b/fs/cachefiles/namei.c > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object, > path.mnt = cache->mnt; > path.dentry = dentry; > file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT, > - d_backing_inode(dentry), cache->cache_cred); > + &path, cache->cache_cred); > if (IS_ERR(file)) { > trace_cachefiles_vfs_error(object, d_backing_inode(dentry), > PTR_ERR(file), > diff --git a/fs/file_table.c b/fs/file_table.c > index 372653b92617..adc2a92faa52 100644 > --- a/fs/file_table.c > +++ b/fs/file_table.c > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly; > > static struct percpu_counter nr_files __cacheline_aligned_in_smp; > > +/* Container for file with optional fake path to display in /proc files */ > +struct file_fake { > + struct file file; > + struct path fake_path; > +}; > + > +static inline struct file_fake *file_fake(struct file *f) > +{ > + return container_of(f, struct file_fake, file); > +} > + > +/* Returns fake_path if one exists, f_path otherwise */ > +const struct path *file_fake_path(struct file *f) > +{ > + struct file_fake *ff = file_fake(f); > + > + if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry) > + return &f->f_path; > + > + return &ff->fake_path; > +} > +EXPORT_SYMBOL(file_fake_path); > + > static void file_free_rcu(struct rcu_head *head) > { > struct file *f = container_of(head, struct file, f_rcuhead); > > put_cred(f->f_cred); > - kmem_cache_free(filp_cachep, f); > + if (f->f_mode & FMODE_FAKE_PATH) > + kfree(file_fake(f)); > + else > + kmem_cache_free(filp_cachep, f); > } > > static inline void file_free(struct file *f) > { > + struct file_fake *ff = file_fake(f); > + > security_file_free(f); > - if (!(f->f_mode & FMODE_NOACCOUNT)) > + if (f->f_mode & FMODE_FAKE_PATH) > + path_put(&ff->fake_path); > + else > percpu_counter_dec(&nr_files); > call_rcu(&f->f_rcuhead, file_free_rcu); > } > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void) > fs_initcall(init_fs_stat_sysctls); > #endif > > -static struct file *__alloc_file(int flags, const struct cred *cred) > +static int init_file(struct file *f, int flags, const struct cred *cred) > { > - struct file *f; > int error; > > - f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); > - if (unlikely(!f)) > - return ERR_PTR(-ENOMEM); > - > f->f_cred = get_cred(cred); > error = security_file_alloc(f); > if (unlikely(error)) { > file_free_rcu(&f->f_rcuhead); > - return ERR_PTR(error); > + return error; > } > > atomic_long_set(&f->f_count, 1); > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred) > f->f_mode = OPEN_FMODE(flags); > /* f->f_version: 0 */ > > + return 0; > +} > + > +static struct file *__alloc_file(int flags, const struct cred *cred) > +{ > + struct file *f; > + int error; > + > + f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); > + if (unlikely(!f)) > + return ERR_PTR(-ENOMEM); > + > + error = init_file(f, flags, cred); > + if (unlikely(error)) > + return ERR_PTR(error); > + > return f; > } > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred) > } > > /* > - * Variant of alloc_empty_file() that doesn't check and modify nr_files. > + * Variant of alloc_empty_file() that allocates a file_fake container > + * and doesn't check and modify nr_files. > * > * Should not be used unless there's a very good reason to do so. > */ > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred) > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags, > + const struct cred *cred) > { > - struct file *f = __alloc_file(flags, cred); > + struct file_fake *ff; > + int error; > > - if (!IS_ERR(f)) > - f->f_mode |= FMODE_NOACCOUNT; > + ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL); > + if (unlikely(!ff)) > + return ERR_PTR(-ENOMEM); > > - return f; > + error = init_file(&ff->file, flags, cred); > + if (unlikely(error)) > + return ERR_PTR(error); > + > + ff->file.f_mode |= FMODE_FAKE_PATH; > + if (fake_path) { > + path_get(fake_path); > + ff->fake_path = *fake_path; > + } Hm, I see that this check is mostly done for vfs_tmpfile_open() which only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path NULL. So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL is an invitation for NULL derefs sooner or later. I would simply document that it's required to set ff->fake_path. For callers such as vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open() should set ff->fake_path to file->f_path.
On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote: > > On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote: > > Overlayfs and cachefiles use open_with_fake_path() to allocate internal > > files, where overlayfs also puts a "fake" path in f_path - a path which > > is not on the same fs as f_inode. > > > > Allocate a container struct file_fake for those internal files, that > > will be used to hold the fake path qlong with the real path. > > > > This commit does not populate the extra fake_path field and leaves the > > overlayfs internal file's f_path fake. > > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com> > > --- > > fs/cachefiles/namei.c | 2 +- > > fs/file_table.c | 85 +++++++++++++++++++++++++++++++++++-------- > > fs/internal.h | 5 ++- > > fs/namei.c | 2 +- > > fs/open.c | 11 +++--- > > fs/overlayfs/file.c | 2 +- > > include/linux/fs.h | 13 ++++--- > > 7 files changed, 90 insertions(+), 30 deletions(-) > > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c > > index 82219a8f6084..a71bdf2d03ba 100644 > > --- a/fs/cachefiles/namei.c > > +++ b/fs/cachefiles/namei.c > > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object, > > path.mnt = cache->mnt; > > path.dentry = dentry; > > file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT, > > - d_backing_inode(dentry), cache->cache_cred); > > + &path, cache->cache_cred); > > if (IS_ERR(file)) { > > trace_cachefiles_vfs_error(object, d_backing_inode(dentry), > > PTR_ERR(file), > > diff --git a/fs/file_table.c b/fs/file_table.c > > index 372653b92617..adc2a92faa52 100644 > > --- a/fs/file_table.c > > +++ b/fs/file_table.c > > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly; > > > > static struct percpu_counter nr_files __cacheline_aligned_in_smp; > > > > +/* Container for file with optional fake path to display in /proc files */ > > +struct file_fake { > > + struct file file; > > + struct path fake_path; > > +}; > > + > > +static inline struct file_fake *file_fake(struct file *f) > > +{ > > + return container_of(f, struct file_fake, file); > > +} > > + > > +/* Returns fake_path if one exists, f_path otherwise */ > > +const struct path *file_fake_path(struct file *f) > > +{ > > + struct file_fake *ff = file_fake(f); > > + > > + if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry) > > + return &f->f_path; > > + > > + return &ff->fake_path; > > +} > > +EXPORT_SYMBOL(file_fake_path); > > + > > static void file_free_rcu(struct rcu_head *head) > > { > > struct file *f = container_of(head, struct file, f_rcuhead); > > > > put_cred(f->f_cred); > > - kmem_cache_free(filp_cachep, f); > > + if (f->f_mode & FMODE_FAKE_PATH) > > + kfree(file_fake(f)); > > + else > > + kmem_cache_free(filp_cachep, f); > > } > > > > static inline void file_free(struct file *f) > > { > > + struct file_fake *ff = file_fake(f); > > + > > security_file_free(f); > > - if (!(f->f_mode & FMODE_NOACCOUNT)) > > + if (f->f_mode & FMODE_FAKE_PATH) > > + path_put(&ff->fake_path); > > + else > > percpu_counter_dec(&nr_files); > > call_rcu(&f->f_rcuhead, file_free_rcu); > > } > > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void) > > fs_initcall(init_fs_stat_sysctls); > > #endif > > > > -static struct file *__alloc_file(int flags, const struct cred *cred) > > +static int init_file(struct file *f, int flags, const struct cred *cred) > > { > > - struct file *f; > > int error; > > > > - f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); > > - if (unlikely(!f)) > > - return ERR_PTR(-ENOMEM); > > - > > f->f_cred = get_cred(cred); > > error = security_file_alloc(f); > > if (unlikely(error)) { > > file_free_rcu(&f->f_rcuhead); > > - return ERR_PTR(error); > > + return error; > > } > > > > atomic_long_set(&f->f_count, 1); > > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred) > > f->f_mode = OPEN_FMODE(flags); > > /* f->f_version: 0 */ > > > > + return 0; > > +} > > + > > +static struct file *__alloc_file(int flags, const struct cred *cred) > > +{ > > + struct file *f; > > + int error; > > + > > + f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); > > + if (unlikely(!f)) > > + return ERR_PTR(-ENOMEM); > > + > > + error = init_file(f, flags, cred); > > + if (unlikely(error)) > > + return ERR_PTR(error); > > + > > return f; > > } > > > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred) > > } > > > > /* > > - * Variant of alloc_empty_file() that doesn't check and modify nr_files. > > + * Variant of alloc_empty_file() that allocates a file_fake container > > + * and doesn't check and modify nr_files. > > * > > * Should not be used unless there's a very good reason to do so. > > */ > > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred) > > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags, > > + const struct cred *cred) > > { > > - struct file *f = __alloc_file(flags, cred); > > + struct file_fake *ff; > > + int error; > > > > - if (!IS_ERR(f)) > > - f->f_mode |= FMODE_NOACCOUNT; > > + ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL); > > + if (unlikely(!ff)) > > + return ERR_PTR(-ENOMEM); > > > > - return f; > > + error = init_file(&ff->file, flags, cred); > > + if (unlikely(error)) > > + return ERR_PTR(error); > > + > > + ff->file.f_mode |= FMODE_FAKE_PATH; > > + if (fake_path) { > > + path_get(fake_path); > > + ff->fake_path = *fake_path; > > + } > > Hm, I see that this check is mostly done for vfs_tmpfile_open() which > only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path > NULL. > > So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL > is an invitation for NULL derefs sooner or later. I would simply > document that it's required to set ff->fake_path. For callers such as > vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open() > should set ff->fake_path to file->f_path. Makes sense. I also took the liberty to re-arrange vfs_tmpfile_open() without the unneeded if (!error) { nesting depth. Thanks, Amir.
On Fri, Jun 09, 2023 at 02:57:20PM +0300, Amir Goldstein wrote: > On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote: > > > > On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote: > > > Overlayfs and cachefiles use open_with_fake_path() to allocate internal > > > files, where overlayfs also puts a "fake" path in f_path - a path which > > > is not on the same fs as f_inode. > > > > > > Allocate a container struct file_fake for those internal files, that > > > will be used to hold the fake path qlong with the real path. > > > > > > This commit does not populate the extra fake_path field and leaves the > > > overlayfs internal file's f_path fake. > > > > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com> > > > --- > > > fs/cachefiles/namei.c | 2 +- > > > fs/file_table.c | 85 +++++++++++++++++++++++++++++++++++-------- > > > fs/internal.h | 5 ++- > > > fs/namei.c | 2 +- > > > fs/open.c | 11 +++--- > > > fs/overlayfs/file.c | 2 +- > > > include/linux/fs.h | 13 ++++--- > > > 7 files changed, 90 insertions(+), 30 deletions(-) > > > > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c > > > index 82219a8f6084..a71bdf2d03ba 100644 > > > --- a/fs/cachefiles/namei.c > > > +++ b/fs/cachefiles/namei.c > > > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object, > > > path.mnt = cache->mnt; > > > path.dentry = dentry; > > > file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT, > > > - d_backing_inode(dentry), cache->cache_cred); > > > + &path, cache->cache_cred); > > > if (IS_ERR(file)) { > > > trace_cachefiles_vfs_error(object, d_backing_inode(dentry), > > > PTR_ERR(file), > > > diff --git a/fs/file_table.c b/fs/file_table.c > > > index 372653b92617..adc2a92faa52 100644 > > > --- a/fs/file_table.c > > > +++ b/fs/file_table.c > > > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly; > > > > > > static struct percpu_counter nr_files __cacheline_aligned_in_smp; > > > > > > +/* Container for file with optional fake path to display in /proc files */ > > > +struct file_fake { > > > + struct file file; > > > + struct path fake_path; > > > +}; > > > + > > > +static inline struct file_fake *file_fake(struct file *f) > > > +{ > > > + return container_of(f, struct file_fake, file); > > > +} > > > + > > > +/* Returns fake_path if one exists, f_path otherwise */ > > > +const struct path *file_fake_path(struct file *f) > > > +{ > > > + struct file_fake *ff = file_fake(f); > > > + > > > + if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry) > > > + return &f->f_path; > > > + > > > + return &ff->fake_path; > > > +} > > > +EXPORT_SYMBOL(file_fake_path); > > > + > > > static void file_free_rcu(struct rcu_head *head) > > > { > > > struct file *f = container_of(head, struct file, f_rcuhead); > > > > > > put_cred(f->f_cred); > > > - kmem_cache_free(filp_cachep, f); > > > + if (f->f_mode & FMODE_FAKE_PATH) > > > + kfree(file_fake(f)); > > > + else > > > + kmem_cache_free(filp_cachep, f); > > > } > > > > > > static inline void file_free(struct file *f) > > > { > > > + struct file_fake *ff = file_fake(f); > > > + > > > security_file_free(f); > > > - if (!(f->f_mode & FMODE_NOACCOUNT)) > > > + if (f->f_mode & FMODE_FAKE_PATH) > > > + path_put(&ff->fake_path); > > > + else > > > percpu_counter_dec(&nr_files); > > > call_rcu(&f->f_rcuhead, file_free_rcu); > > > } > > > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void) > > > fs_initcall(init_fs_stat_sysctls); > > > #endif > > > > > > -static struct file *__alloc_file(int flags, const struct cred *cred) > > > +static int init_file(struct file *f, int flags, const struct cred *cred) > > > { > > > - struct file *f; > > > int error; > > > > > > - f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); > > > - if (unlikely(!f)) > > > - return ERR_PTR(-ENOMEM); > > > - > > > f->f_cred = get_cred(cred); > > > error = security_file_alloc(f); > > > if (unlikely(error)) { > > > file_free_rcu(&f->f_rcuhead); > > > - return ERR_PTR(error); > > > + return error; > > > } > > > > > > atomic_long_set(&f->f_count, 1); > > > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred) > > > f->f_mode = OPEN_FMODE(flags); > > > /* f->f_version: 0 */ > > > > > > + return 0; > > > +} > > > + > > > +static struct file *__alloc_file(int flags, const struct cred *cred) > > > +{ > > > + struct file *f; > > > + int error; > > > + > > > + f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); > > > + if (unlikely(!f)) > > > + return ERR_PTR(-ENOMEM); > > > + > > > + error = init_file(f, flags, cred); > > > + if (unlikely(error)) > > > + return ERR_PTR(error); > > > + > > > return f; > > > } > > > > > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred) > > > } > > > > > > /* > > > - * Variant of alloc_empty_file() that doesn't check and modify nr_files. > > > + * Variant of alloc_empty_file() that allocates a file_fake container > > > + * and doesn't check and modify nr_files. > > > * > > > * Should not be used unless there's a very good reason to do so. > > > */ > > > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred) > > > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags, > > > + const struct cred *cred) > > > { > > > - struct file *f = __alloc_file(flags, cred); > > > + struct file_fake *ff; > > > + int error; > > > > > > - if (!IS_ERR(f)) > > > - f->f_mode |= FMODE_NOACCOUNT; > > > + ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL); > > > + if (unlikely(!ff)) > > > + return ERR_PTR(-ENOMEM); > > > > > > - return f; > > > + error = init_file(&ff->file, flags, cred); > > > + if (unlikely(error)) > > > + return ERR_PTR(error); > > > + > > > + ff->file.f_mode |= FMODE_FAKE_PATH; > > > + if (fake_path) { > > > + path_get(fake_path); > > > + ff->fake_path = *fake_path; > > > + } > > > > Hm, I see that this check is mostly done for vfs_tmpfile_open() which > > only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path > > NULL. > > > > So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL > > is an invitation for NULL derefs sooner or later. I would simply > > document that it's required to set ff->fake_path. For callers such as > > vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open() > > should set ff->fake_path to file->f_path. > > Makes sense. > I also took the liberty to re-arrange vfs_tmpfile_open() without the > unneeded if (!error) { nesting depth. Yes, please. I had a rough sketch just for my own amusement... fs/namei.c struct file *vfs_tmpfile_open(struct mnt_idmap *idmap, const struct path *parentpath, umode_t mode, int open_flag, const struct cred *cred) { struct file *file; int error; file = alloc_empty_file_fake(open_flag, cred); if (IS_ERR(file)) return file; error = vfs_tmpfile(idmap, parentpath, file, mode); if (error) { fput(file); return ERR_PTR(error); } return file_set_fake_path(file, &file->f_path); } EXPORT_SYMBOL(vfs_tmpfile_open); fs/internal.h struct file *file_set_fake_path(struct file *file, const struct path *fake_path); fs/open.c struct file *open_with_fake_path(const struct path *fake_path, int flags, const struct path *path, const struct cred *cred) { int error; struct file *file; file = alloc_empty_file_fake(flags, cred); if (IS_ERR(file)) return file; file->f_path = *path; error = do_dentry_open(file, d_inode(path->dentry), NULL); if (error) { fput(file); return ERR_PTR(error); } return file_set_fake_path(file, fake_path); } fs/file_table.c struct file *alloc_empty_file_fake(int flags, const struct cred *cred) { struct file_fake *ff; int error; ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL); if (unlikely(!ff)) return ERR_PTR(-ENOMEM); error = init_file(&ff->file, flags, cred); if (unlikely(error)) return ERR_PTR(error); ff->file.f_mode |= FMODE_FAKE_PATH; return &ff->file; } struct file *file_set_fake_path(struct file *file, const struct path *fake_path) { if (file->f_mode & FMODE_FAKE_PATH) { struct file_fake *ff = file_fake(file); ff->fake_path = *fake_path; } return file; }
On Fri, Jun 9, 2023 at 3:12 PM Christian Brauner <brauner@kernel.org> wrote: > > On Fri, Jun 09, 2023 at 02:57:20PM +0300, Amir Goldstein wrote: > > On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote: > > > > > > On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote: > > > > Overlayfs and cachefiles use open_with_fake_path() to allocate internal > > > > files, where overlayfs also puts a "fake" path in f_path - a path which > > > > is not on the same fs as f_inode. > > > > > > > > Allocate a container struct file_fake for those internal files, that > > > > will be used to hold the fake path qlong with the real path. > > > > > > > > This commit does not populate the extra fake_path field and leaves the > > > > overlayfs internal file's f_path fake. > > > > > > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com> > > > > --- > > > > fs/cachefiles/namei.c | 2 +- > > > > fs/file_table.c | 85 +++++++++++++++++++++++++++++++++++-------- > > > > fs/internal.h | 5 ++- > > > > fs/namei.c | 2 +- > > > > fs/open.c | 11 +++--- > > > > fs/overlayfs/file.c | 2 +- > > > > include/linux/fs.h | 13 ++++--- > > > > 7 files changed, 90 insertions(+), 30 deletions(-) > > > > > > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c > > > > index 82219a8f6084..a71bdf2d03ba 100644 > > > > --- a/fs/cachefiles/namei.c > > > > +++ b/fs/cachefiles/namei.c > > > > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object, > > > > path.mnt = cache->mnt; > > > > path.dentry = dentry; > > > > file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT, > > > > - d_backing_inode(dentry), cache->cache_cred); > > > > + &path, cache->cache_cred); > > > > if (IS_ERR(file)) { > > > > trace_cachefiles_vfs_error(object, d_backing_inode(dentry), > > > > PTR_ERR(file), > > > > diff --git a/fs/file_table.c b/fs/file_table.c > > > > index 372653b92617..adc2a92faa52 100644 > > > > --- a/fs/file_table.c > > > > +++ b/fs/file_table.c > > > > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly; > > > > > > > > static struct percpu_counter nr_files __cacheline_aligned_in_smp; > > > > > > > > +/* Container for file with optional fake path to display in /proc files */ > > > > +struct file_fake { > > > > + struct file file; > > > > + struct path fake_path; > > > > +}; > > > > + > > > > +static inline struct file_fake *file_fake(struct file *f) > > > > +{ > > > > + return container_of(f, struct file_fake, file); > > > > +} > > > > + > > > > +/* Returns fake_path if one exists, f_path otherwise */ > > > > +const struct path *file_fake_path(struct file *f) > > > > +{ > > > > + struct file_fake *ff = file_fake(f); > > > > + > > > > + if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry) > > > > + return &f->f_path; > > > > + > > > > + return &ff->fake_path; > > > > +} > > > > +EXPORT_SYMBOL(file_fake_path); > > > > + > > > > static void file_free_rcu(struct rcu_head *head) > > > > { > > > > struct file *f = container_of(head, struct file, f_rcuhead); > > > > > > > > put_cred(f->f_cred); > > > > - kmem_cache_free(filp_cachep, f); > > > > + if (f->f_mode & FMODE_FAKE_PATH) > > > > + kfree(file_fake(f)); > > > > + else > > > > + kmem_cache_free(filp_cachep, f); > > > > } > > > > > > > > static inline void file_free(struct file *f) > > > > { > > > > + struct file_fake *ff = file_fake(f); > > > > + > > > > security_file_free(f); > > > > - if (!(f->f_mode & FMODE_NOACCOUNT)) > > > > + if (f->f_mode & FMODE_FAKE_PATH) > > > > + path_put(&ff->fake_path); > > > > + else > > > > percpu_counter_dec(&nr_files); > > > > call_rcu(&f->f_rcuhead, file_free_rcu); > > > > } > > > > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void) > > > > fs_initcall(init_fs_stat_sysctls); > > > > #endif > > > > > > > > -static struct file *__alloc_file(int flags, const struct cred *cred) > > > > +static int init_file(struct file *f, int flags, const struct cred *cred) > > > > { > > > > - struct file *f; > > > > int error; > > > > > > > > - f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); > > > > - if (unlikely(!f)) > > > > - return ERR_PTR(-ENOMEM); > > > > - > > > > f->f_cred = get_cred(cred); > > > > error = security_file_alloc(f); > > > > if (unlikely(error)) { > > > > file_free_rcu(&f->f_rcuhead); > > > > - return ERR_PTR(error); > > > > + return error; > > > > } > > > > > > > > atomic_long_set(&f->f_count, 1); > > > > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred) > > > > f->f_mode = OPEN_FMODE(flags); > > > > /* f->f_version: 0 */ > > > > > > > > + return 0; > > > > +} > > > > + > > > > +static struct file *__alloc_file(int flags, const struct cred *cred) > > > > +{ > > > > + struct file *f; > > > > + int error; > > > > + > > > > + f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); > > > > + if (unlikely(!f)) > > > > + return ERR_PTR(-ENOMEM); > > > > + > > > > + error = init_file(f, flags, cred); > > > > + if (unlikely(error)) > > > > + return ERR_PTR(error); > > > > + > > > > return f; > > > > } > > > > > > > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred) > > > > } > > > > > > > > /* > > > > - * Variant of alloc_empty_file() that doesn't check and modify nr_files. > > > > + * Variant of alloc_empty_file() that allocates a file_fake container > > > > + * and doesn't check and modify nr_files. > > > > * > > > > * Should not be used unless there's a very good reason to do so. > > > > */ > > > > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred) > > > > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags, > > > > + const struct cred *cred) > > > > { > > > > - struct file *f = __alloc_file(flags, cred); > > > > + struct file_fake *ff; > > > > + int error; > > > > > > > > - if (!IS_ERR(f)) > > > > - f->f_mode |= FMODE_NOACCOUNT; > > > > + ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL); > > > > + if (unlikely(!ff)) > > > > + return ERR_PTR(-ENOMEM); > > > > > > > > - return f; > > > > + error = init_file(&ff->file, flags, cred); > > > > + if (unlikely(error)) > > > > + return ERR_PTR(error); > > > > + > > > > + ff->file.f_mode |= FMODE_FAKE_PATH; > > > > + if (fake_path) { > > > > + path_get(fake_path); > > > > + ff->fake_path = *fake_path; > > > > + } > > > > > > Hm, I see that this check is mostly done for vfs_tmpfile_open() which > > > only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path > > > NULL. > > > > > > So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL > > > is an invitation for NULL derefs sooner or later. I would simply > > > document that it's required to set ff->fake_path. For callers such as > > > vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open() > > > should set ff->fake_path to file->f_path. > > > > Makes sense. > > I also took the liberty to re-arrange vfs_tmpfile_open() without the > > unneeded if (!error) { nesting depth. > > Yes, please. I had a rough sketch just for my own amusement... Happy to make your Friday more amusing :-D > > fs/namei.c > struct file *vfs_tmpfile_open(struct mnt_idmap *idmap, > const struct path *parentpath, umode_t mode, > int open_flag, const struct cred *cred) > { > struct file *file; > int error; > > file = alloc_empty_file_fake(open_flag, cred); > if (IS_ERR(file)) > return file; > > error = vfs_tmpfile(idmap, parentpath, file, mode); > if (error) { > fput(file); > return ERR_PTR(error); > } > > return file_set_fake_path(file, &file->f_path); > } > EXPORT_SYMBOL(vfs_tmpfile_open); > > fs/internal.h > struct file *file_set_fake_path(struct file *file, const struct path *fake_path); > > fs/open.c > struct file *open_with_fake_path(const struct path *fake_path, int flags, > const struct path *path, > const struct cred *cred) > { > int error; > struct file *file; > > file = alloc_empty_file_fake(flags, cred); > if (IS_ERR(file)) > return file; > > file->f_path = *path; > error = do_dentry_open(file, d_inode(path->dentry), NULL); > if (error) { > fput(file); > return ERR_PTR(error); > } > > return file_set_fake_path(file, fake_path); > } > > fs/file_table.c > struct file *alloc_empty_file_fake(int flags, const struct cred *cred) > { > struct file_fake *ff; > int error; > > ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL); > if (unlikely(!ff)) > return ERR_PTR(-ENOMEM); > > error = init_file(&ff->file, flags, cred); > if (unlikely(error)) > return ERR_PTR(error); > > ff->file.f_mode |= FMODE_FAKE_PATH; > return &ff->file; > } > > struct file *file_set_fake_path(struct file *file, const struct path *fake_path) > { > if (file->f_mode & FMODE_FAKE_PATH) { > struct file_fake *ff = file_fake(file); > ff->fake_path = *fake_path; > } > > return file; > } > Heh, I also started with file_set_fake_path() but I decided that it's not worth it, because no code should be messing with this and I just changed file_fake_path() to be non-const and used *file_fake_path(file) = fake_path in these two helpers. I will add file_set_fake_path *only* if you insist. Thanks, Amir.
On Fri, Jun 09, 2023 at 03:20:14PM +0300, Amir Goldstein wrote: > On Fri, Jun 9, 2023 at 3:12 PM Christian Brauner <brauner@kernel.org> wrote: > > > > On Fri, Jun 09, 2023 at 02:57:20PM +0300, Amir Goldstein wrote: > > > On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote: > > > > > > > > On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote: > > > > > Overlayfs and cachefiles use open_with_fake_path() to allocate internal > > > > > files, where overlayfs also puts a "fake" path in f_path - a path which > > > > > is not on the same fs as f_inode. > > > > > > > > > > Allocate a container struct file_fake for those internal files, that > > > > > will be used to hold the fake path qlong with the real path. > > > > > > > > > > This commit does not populate the extra fake_path field and leaves the > > > > > overlayfs internal file's f_path fake. > > > > > > > > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com> > > > > > --- > > > > > fs/cachefiles/namei.c | 2 +- > > > > > fs/file_table.c | 85 +++++++++++++++++++++++++++++++++++-------- > > > > > fs/internal.h | 5 ++- > > > > > fs/namei.c | 2 +- > > > > > fs/open.c | 11 +++--- > > > > > fs/overlayfs/file.c | 2 +- > > > > > include/linux/fs.h | 13 ++++--- > > > > > 7 files changed, 90 insertions(+), 30 deletions(-) > > > > > > > > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c > > > > > index 82219a8f6084..a71bdf2d03ba 100644 > > > > > --- a/fs/cachefiles/namei.c > > > > > +++ b/fs/cachefiles/namei.c > > > > > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object, > > > > > path.mnt = cache->mnt; > > > > > path.dentry = dentry; > > > > > file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT, > > > > > - d_backing_inode(dentry), cache->cache_cred); > > > > > + &path, cache->cache_cred); > > > > > if (IS_ERR(file)) { > > > > > trace_cachefiles_vfs_error(object, d_backing_inode(dentry), > > > > > PTR_ERR(file), > > > > > diff --git a/fs/file_table.c b/fs/file_table.c > > > > > index 372653b92617..adc2a92faa52 100644 > > > > > --- a/fs/file_table.c > > > > > +++ b/fs/file_table.c > > > > > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly; > > > > > > > > > > static struct percpu_counter nr_files __cacheline_aligned_in_smp; > > > > > > > > > > +/* Container for file with optional fake path to display in /proc files */ > > > > > +struct file_fake { > > > > > + struct file file; > > > > > + struct path fake_path; > > > > > +}; > > > > > + > > > > > +static inline struct file_fake *file_fake(struct file *f) > > > > > +{ > > > > > + return container_of(f, struct file_fake, file); > > > > > +} > > > > > + > > > > > +/* Returns fake_path if one exists, f_path otherwise */ > > > > > +const struct path *file_fake_path(struct file *f) > > > > > +{ > > > > > + struct file_fake *ff = file_fake(f); > > > > > + > > > > > + if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry) > > > > > + return &f->f_path; > > > > > + > > > > > + return &ff->fake_path; > > > > > +} > > > > > +EXPORT_SYMBOL(file_fake_path); > > > > > + > > > > > static void file_free_rcu(struct rcu_head *head) > > > > > { > > > > > struct file *f = container_of(head, struct file, f_rcuhead); > > > > > > > > > > put_cred(f->f_cred); > > > > > - kmem_cache_free(filp_cachep, f); > > > > > + if (f->f_mode & FMODE_FAKE_PATH) > > > > > + kfree(file_fake(f)); > > > > > + else > > > > > + kmem_cache_free(filp_cachep, f); > > > > > } > > > > > > > > > > static inline void file_free(struct file *f) > > > > > { > > > > > + struct file_fake *ff = file_fake(f); > > > > > + > > > > > security_file_free(f); > > > > > - if (!(f->f_mode & FMODE_NOACCOUNT)) > > > > > + if (f->f_mode & FMODE_FAKE_PATH) > > > > > + path_put(&ff->fake_path); > > > > > + else > > > > > percpu_counter_dec(&nr_files); > > > > > call_rcu(&f->f_rcuhead, file_free_rcu); > > > > > } > > > > > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void) > > > > > fs_initcall(init_fs_stat_sysctls); > > > > > #endif > > > > > > > > > > -static struct file *__alloc_file(int flags, const struct cred *cred) > > > > > +static int init_file(struct file *f, int flags, const struct cred *cred) > > > > > { > > > > > - struct file *f; > > > > > int error; > > > > > > > > > > - f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); > > > > > - if (unlikely(!f)) > > > > > - return ERR_PTR(-ENOMEM); > > > > > - > > > > > f->f_cred = get_cred(cred); > > > > > error = security_file_alloc(f); > > > > > if (unlikely(error)) { > > > > > file_free_rcu(&f->f_rcuhead); > > > > > - return ERR_PTR(error); > > > > > + return error; > > > > > } > > > > > > > > > > atomic_long_set(&f->f_count, 1); > > > > > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred) > > > > > f->f_mode = OPEN_FMODE(flags); > > > > > /* f->f_version: 0 */ > > > > > > > > > > + return 0; > > > > > +} > > > > > + > > > > > +static struct file *__alloc_file(int flags, const struct cred *cred) > > > > > +{ > > > > > + struct file *f; > > > > > + int error; > > > > > + > > > > > + f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); > > > > > + if (unlikely(!f)) > > > > > + return ERR_PTR(-ENOMEM); > > > > > + > > > > > + error = init_file(f, flags, cred); > > > > > + if (unlikely(error)) > > > > > + return ERR_PTR(error); > > > > > + > > > > > return f; > > > > > } > > > > > > > > > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred) > > > > > } > > > > > > > > > > /* > > > > > - * Variant of alloc_empty_file() that doesn't check and modify nr_files. > > > > > + * Variant of alloc_empty_file() that allocates a file_fake container > > > > > + * and doesn't check and modify nr_files. > > > > > * > > > > > * Should not be used unless there's a very good reason to do so. > > > > > */ > > > > > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred) > > > > > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags, > > > > > + const struct cred *cred) > > > > > { > > > > > - struct file *f = __alloc_file(flags, cred); > > > > > + struct file_fake *ff; > > > > > + int error; > > > > > > > > > > - if (!IS_ERR(f)) > > > > > - f->f_mode |= FMODE_NOACCOUNT; > > > > > + ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL); > > > > > + if (unlikely(!ff)) > > > > > + return ERR_PTR(-ENOMEM); > > > > > > > > > > - return f; > > > > > + error = init_file(&ff->file, flags, cred); > > > > > + if (unlikely(error)) > > > > > + return ERR_PTR(error); > > > > > + > > > > > + ff->file.f_mode |= FMODE_FAKE_PATH; > > > > > + if (fake_path) { > > > > > + path_get(fake_path); > > > > > + ff->fake_path = *fake_path; > > > > > + } > > > > > > > > Hm, I see that this check is mostly done for vfs_tmpfile_open() which > > > > only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path > > > > NULL. > > > > > > > > So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL > > > > is an invitation for NULL derefs sooner or later. I would simply > > > > document that it's required to set ff->fake_path. For callers such as > > > > vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open() > > > > should set ff->fake_path to file->f_path. > > > > > > Makes sense. > > > I also took the liberty to re-arrange vfs_tmpfile_open() without the > > > unneeded if (!error) { nesting depth. > > > > Yes, please. I had a rough sketch just for my own amusement... > > Happy to make your Friday more amusing :-D > > > > > fs/namei.c > > struct file *vfs_tmpfile_open(struct mnt_idmap *idmap, > > const struct path *parentpath, umode_t mode, > > int open_flag, const struct cred *cred) > > { > > struct file *file; > > int error; > > > > file = alloc_empty_file_fake(open_flag, cred); > > if (IS_ERR(file)) > > return file; > > > > error = vfs_tmpfile(idmap, parentpath, file, mode); > > if (error) { > > fput(file); > > return ERR_PTR(error); > > } > > > > return file_set_fake_path(file, &file->f_path); > > } > > EXPORT_SYMBOL(vfs_tmpfile_open); > > > > fs/internal.h > > struct file *file_set_fake_path(struct file *file, const struct path *fake_path); > > > > fs/open.c > > struct file *open_with_fake_path(const struct path *fake_path, int flags, > > const struct path *path, > > const struct cred *cred) > > { > > int error; > > struct file *file; > > > > file = alloc_empty_file_fake(flags, cred); > > if (IS_ERR(file)) > > return file; > > > > file->f_path = *path; > > error = do_dentry_open(file, d_inode(path->dentry), NULL); > > if (error) { > > fput(file); > > return ERR_PTR(error); > > } > > > > return file_set_fake_path(file, fake_path); > > } > > > > fs/file_table.c > > struct file *alloc_empty_file_fake(int flags, const struct cred *cred) > > { > > struct file_fake *ff; > > int error; > > > > ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL); > > if (unlikely(!ff)) > > return ERR_PTR(-ENOMEM); > > > > error = init_file(&ff->file, flags, cred); > > if (unlikely(error)) > > return ERR_PTR(error); > > > > ff->file.f_mode |= FMODE_FAKE_PATH; > > return &ff->file; > > } > > > > struct file *file_set_fake_path(struct file *file, const struct path *fake_path) > > { > > if (file->f_mode & FMODE_FAKE_PATH) { > > struct file_fake *ff = file_fake(file); > > ff->fake_path = *fake_path; > > } > > > > return file; > > } > > > > Heh, I also started with file_set_fake_path() but I decided that it's not > worth it, because no code should be messing with this and I just changed > file_fake_path() to be non-const and used *file_fake_path(file) = fake_path > in these two helpers. Hm, I don't understand. This is non-exported and only visible in thing that can use internal.h so only core vfs coe. The only places that use this are vfs_tmpfile_open() and open_with_fake_path(). It allows us to remove the additional argument from alloc_empty_file_fake() and that's what I really like because now we don't have this pass NULL in vfs_tmpfile_open() and fill in later vs doing it in one step. Hm, one thing I realized is that this moves vfs_tmpfile_open() out of filp_cachep which isn't great, no? That's a pretty heavily used codepath so it feels that it should probably continue to use the cache? What about keeping FMODE_NOACCOUNT and adding FMODE_FAKE_PATH. I think that might be better. Christoph has just removed 3 FMODE_* flags with his decoupling of block specific flags from file generic flags. So as far as I'm concerned this wouldn't be a problem.
On Fri, Jun 09, 2023 at 02:54:32PM +0200, Christian Brauner wrote: > On Fri, Jun 09, 2023 at 03:20:14PM +0300, Amir Goldstein wrote: > > On Fri, Jun 9, 2023 at 3:12 PM Christian Brauner <brauner@kernel.org> wrote: > > > > > > On Fri, Jun 09, 2023 at 02:57:20PM +0300, Amir Goldstein wrote: > > > > On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote: > > > > > > > > > > On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote: > > > > > > Overlayfs and cachefiles use open_with_fake_path() to allocate internal > > > > > > files, where overlayfs also puts a "fake" path in f_path - a path which > > > > > > is not on the same fs as f_inode. > > > > > > > > > > > > Allocate a container struct file_fake for those internal files, that > > > > > > will be used to hold the fake path qlong with the real path. > > > > > > > > > > > > This commit does not populate the extra fake_path field and leaves the > > > > > > overlayfs internal file's f_path fake. > > > > > > > > > > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com> > > > > > > --- > > > > > > fs/cachefiles/namei.c | 2 +- > > > > > > fs/file_table.c | 85 +++++++++++++++++++++++++++++++++++-------- > > > > > > fs/internal.h | 5 ++- > > > > > > fs/namei.c | 2 +- > > > > > > fs/open.c | 11 +++--- > > > > > > fs/overlayfs/file.c | 2 +- > > > > > > include/linux/fs.h | 13 ++++--- > > > > > > 7 files changed, 90 insertions(+), 30 deletions(-) > > > > > > > > > > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c > > > > > > index 82219a8f6084..a71bdf2d03ba 100644 > > > > > > --- a/fs/cachefiles/namei.c > > > > > > +++ b/fs/cachefiles/namei.c > > > > > > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object, > > > > > > path.mnt = cache->mnt; > > > > > > path.dentry = dentry; > > > > > > file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT, > > > > > > - d_backing_inode(dentry), cache->cache_cred); > > > > > > + &path, cache->cache_cred); > > > > > > if (IS_ERR(file)) { > > > > > > trace_cachefiles_vfs_error(object, d_backing_inode(dentry), > > > > > > PTR_ERR(file), > > > > > > diff --git a/fs/file_table.c b/fs/file_table.c > > > > > > index 372653b92617..adc2a92faa52 100644 > > > > > > --- a/fs/file_table.c > > > > > > +++ b/fs/file_table.c > > > > > > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly; > > > > > > > > > > > > static struct percpu_counter nr_files __cacheline_aligned_in_smp; > > > > > > > > > > > > +/* Container for file with optional fake path to display in /proc files */ > > > > > > +struct file_fake { > > > > > > + struct file file; > > > > > > + struct path fake_path; > > > > > > +}; > > > > > > + > > > > > > +static inline struct file_fake *file_fake(struct file *f) > > > > > > +{ > > > > > > + return container_of(f, struct file_fake, file); > > > > > > +} > > > > > > + > > > > > > +/* Returns fake_path if one exists, f_path otherwise */ > > > > > > +const struct path *file_fake_path(struct file *f) > > > > > > +{ > > > > > > + struct file_fake *ff = file_fake(f); > > > > > > + > > > > > > + if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry) > > > > > > + return &f->f_path; > > > > > > + > > > > > > + return &ff->fake_path; > > > > > > +} > > > > > > +EXPORT_SYMBOL(file_fake_path); > > > > > > + > > > > > > static void file_free_rcu(struct rcu_head *head) > > > > > > { > > > > > > struct file *f = container_of(head, struct file, f_rcuhead); > > > > > > > > > > > > put_cred(f->f_cred); > > > > > > - kmem_cache_free(filp_cachep, f); > > > > > > + if (f->f_mode & FMODE_FAKE_PATH) > > > > > > + kfree(file_fake(f)); > > > > > > + else > > > > > > + kmem_cache_free(filp_cachep, f); > > > > > > } > > > > > > > > > > > > static inline void file_free(struct file *f) > > > > > > { > > > > > > + struct file_fake *ff = file_fake(f); > > > > > > + > > > > > > security_file_free(f); > > > > > > - if (!(f->f_mode & FMODE_NOACCOUNT)) > > > > > > + if (f->f_mode & FMODE_FAKE_PATH) > > > > > > + path_put(&ff->fake_path); > > > > > > + else > > > > > > percpu_counter_dec(&nr_files); > > > > > > call_rcu(&f->f_rcuhead, file_free_rcu); > > > > > > } > > > > > > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void) > > > > > > fs_initcall(init_fs_stat_sysctls); > > > > > > #endif > > > > > > > > > > > > -static struct file *__alloc_file(int flags, const struct cred *cred) > > > > > > +static int init_file(struct file *f, int flags, const struct cred *cred) > > > > > > { > > > > > > - struct file *f; > > > > > > int error; > > > > > > > > > > > > - f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); > > > > > > - if (unlikely(!f)) > > > > > > - return ERR_PTR(-ENOMEM); > > > > > > - > > > > > > f->f_cred = get_cred(cred); > > > > > > error = security_file_alloc(f); > > > > > > if (unlikely(error)) { > > > > > > file_free_rcu(&f->f_rcuhead); > > > > > > - return ERR_PTR(error); > > > > > > + return error; > > > > > > } > > > > > > > > > > > > atomic_long_set(&f->f_count, 1); > > > > > > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred) > > > > > > f->f_mode = OPEN_FMODE(flags); > > > > > > /* f->f_version: 0 */ > > > > > > > > > > > > + return 0; > > > > > > +} > > > > > > + > > > > > > +static struct file *__alloc_file(int flags, const struct cred *cred) > > > > > > +{ > > > > > > + struct file *f; > > > > > > + int error; > > > > > > + > > > > > > + f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); > > > > > > + if (unlikely(!f)) > > > > > > + return ERR_PTR(-ENOMEM); > > > > > > + > > > > > > + error = init_file(f, flags, cred); > > > > > > + if (unlikely(error)) > > > > > > + return ERR_PTR(error); > > > > > > + > > > > > > return f; > > > > > > } > > > > > > > > > > > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred) > > > > > > } > > > > > > > > > > > > /* > > > > > > - * Variant of alloc_empty_file() that doesn't check and modify nr_files. > > > > > > + * Variant of alloc_empty_file() that allocates a file_fake container > > > > > > + * and doesn't check and modify nr_files. > > > > > > * > > > > > > * Should not be used unless there's a very good reason to do so. > > > > > > */ > > > > > > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred) > > > > > > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags, > > > > > > + const struct cred *cred) > > > > > > { > > > > > > - struct file *f = __alloc_file(flags, cred); > > > > > > + struct file_fake *ff; > > > > > > + int error; > > > > > > > > > > > > - if (!IS_ERR(f)) > > > > > > - f->f_mode |= FMODE_NOACCOUNT; > > > > > > + ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL); > > > > > > + if (unlikely(!ff)) > > > > > > + return ERR_PTR(-ENOMEM); > > > > > > > > > > > > - return f; > > > > > > + error = init_file(&ff->file, flags, cred); > > > > > > + if (unlikely(error)) > > > > > > + return ERR_PTR(error); > > > > > > + > > > > > > + ff->file.f_mode |= FMODE_FAKE_PATH; > > > > > > + if (fake_path) { > > > > > > + path_get(fake_path); > > > > > > + ff->fake_path = *fake_path; > > > > > > + } > > > > > > > > > > Hm, I see that this check is mostly done for vfs_tmpfile_open() which > > > > > only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path > > > > > NULL. > > > > > > > > > > So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL > > > > > is an invitation for NULL derefs sooner or later. I would simply > > > > > document that it's required to set ff->fake_path. For callers such as > > > > > vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open() > > > > > should set ff->fake_path to file->f_path. > > > > > > > > Makes sense. > > > > I also took the liberty to re-arrange vfs_tmpfile_open() without the > > > > unneeded if (!error) { nesting depth. > > > > > > Yes, please. I had a rough sketch just for my own amusement... > > > > Happy to make your Friday more amusing :-D > > > > > > > > fs/namei.c > > > struct file *vfs_tmpfile_open(struct mnt_idmap *idmap, > > > const struct path *parentpath, umode_t mode, > > > int open_flag, const struct cred *cred) > > > { > > > struct file *file; > > > int error; > > > > > > file = alloc_empty_file_fake(open_flag, cred); > > > if (IS_ERR(file)) > > > return file; > > > > > > error = vfs_tmpfile(idmap, parentpath, file, mode); > > > if (error) { > > > fput(file); > > > return ERR_PTR(error); > > > } > > > > > > return file_set_fake_path(file, &file->f_path); > > > } > > > EXPORT_SYMBOL(vfs_tmpfile_open); > > > > > > fs/internal.h > > > struct file *file_set_fake_path(struct file *file, const struct path *fake_path); > > > > > > fs/open.c > > > struct file *open_with_fake_path(const struct path *fake_path, int flags, > > > const struct path *path, > > > const struct cred *cred) > > > { > > > int error; > > > struct file *file; > > > > > > file = alloc_empty_file_fake(flags, cred); > > > if (IS_ERR(file)) > > > return file; > > > > > > file->f_path = *path; > > > error = do_dentry_open(file, d_inode(path->dentry), NULL); > > > if (error) { > > > fput(file); > > > return ERR_PTR(error); > > > } > > > > > > return file_set_fake_path(file, fake_path); > > > } > > > > > > fs/file_table.c > > > struct file *alloc_empty_file_fake(int flags, const struct cred *cred) > > > { > > > struct file_fake *ff; > > > int error; > > > > > > ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL); > > > if (unlikely(!ff)) > > > return ERR_PTR(-ENOMEM); > > > > > > error = init_file(&ff->file, flags, cred); > > > if (unlikely(error)) > > > return ERR_PTR(error); > > > > > > ff->file.f_mode |= FMODE_FAKE_PATH; > > > return &ff->file; > > > } > > > > > > struct file *file_set_fake_path(struct file *file, const struct path *fake_path) > > > { > > > if (file->f_mode & FMODE_FAKE_PATH) { > > > struct file_fake *ff = file_fake(file); > > > ff->fake_path = *fake_path; > > > } > > > > > > return file; > > > } > > > > > > > Heh, I also started with file_set_fake_path() but I decided that it's not > > worth it, because no code should be messing with this and I just changed > > file_fake_path() to be non-const and used *file_fake_path(file) = fake_path > > in these two helpers. > > Hm, I don't understand. This is non-exported and only visible in thing > that can use internal.h so only core vfs coe. > > The only places that use this are vfs_tmpfile_open() and > open_with_fake_path(). It allows us to remove the additional argument > from alloc_empty_file_fake() and that's what I really like because now > we don't have this pass NULL in vfs_tmpfile_open() and fill in later vs > doing it in one step. > > Hm, one thing I realized is that this moves vfs_tmpfile_open() out of > filp_cachep which isn't great, no? That's a pretty heavily used codepath > so it feels that it should probably continue to use the cache? Uh, I misread as that's only used in cachefiles and in overlayfs so it's probably fine. I thought this was the generic version. Though it might still be preferable to keep FMODE_NOACCOUNT and FMODE_FAKE_PATH distinct since there's really no reason why tmpfiles should partake in the fake path stuff...
On Fri, Jun 9, 2023 at 4:00 PM Christian Brauner <brauner@kernel.org> wrote: > > On Fri, Jun 09, 2023 at 02:54:32PM +0200, Christian Brauner wrote: [...] > > Uh, I misread as that's only used in cachefiles and in overlayfs so it's > probably fine. I thought this was the generic version. Though it might > still be preferable to keep FMODE_NOACCOUNT and FMODE_FAKE_PATH distinct > since there's really no reason why tmpfiles should partake in the fake > path stuff... The reason is (wait for it) no more available bits in f_flags. Yeh, there is one place left in 0x4000000, but I didn't want to waste it given that FMODE_NOACCOUNT and FMODE_FAKE_PATH use cases are pretty close. BTW, you reminded me that I forgot to CC dhowells (add now). Thanks, Amir.
On Fri, Jun 9, 2023 at 3:12 PM Christian Brauner <brauner@kernel.org> wrote: > > On Fri, Jun 09, 2023 at 02:57:20PM +0300, Amir Goldstein wrote: > > On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote: > > > > > > On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote: > > > > Overlayfs and cachefiles use open_with_fake_path() to allocate internal > > > > files, where overlayfs also puts a "fake" path in f_path - a path which > > > > is not on the same fs as f_inode. > > > > > > > > Allocate a container struct file_fake for those internal files, that > > > > will be used to hold the fake path qlong with the real path. > > > > > > > > This commit does not populate the extra fake_path field and leaves the > > > > overlayfs internal file's f_path fake. > > > > > > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com> > > > > --- > > > > fs/cachefiles/namei.c | 2 +- > > > > fs/file_table.c | 85 +++++++++++++++++++++++++++++++++++-------- > > > > fs/internal.h | 5 ++- > > > > fs/namei.c | 2 +- > > > > fs/open.c | 11 +++--- > > > > fs/overlayfs/file.c | 2 +- > > > > include/linux/fs.h | 13 ++++--- > > > > 7 files changed, 90 insertions(+), 30 deletions(-) > > > > > > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c > > > > index 82219a8f6084..a71bdf2d03ba 100644 > > > > --- a/fs/cachefiles/namei.c > > > > +++ b/fs/cachefiles/namei.c > > > > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object, > > > > path.mnt = cache->mnt; > > > > path.dentry = dentry; > > > > file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT, > > > > - d_backing_inode(dentry), cache->cache_cred); > > > > + &path, cache->cache_cred); > > > > if (IS_ERR(file)) { > > > > trace_cachefiles_vfs_error(object, d_backing_inode(dentry), > > > > PTR_ERR(file), > > > > diff --git a/fs/file_table.c b/fs/file_table.c > > > > index 372653b92617..adc2a92faa52 100644 > > > > --- a/fs/file_table.c > > > > +++ b/fs/file_table.c > > > > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly; > > > > > > > > static struct percpu_counter nr_files __cacheline_aligned_in_smp; > > > > > > > > +/* Container for file with optional fake path to display in /proc files */ > > > > +struct file_fake { > > > > + struct file file; > > > > + struct path fake_path; > > > > +}; > > > > + > > > > +static inline struct file_fake *file_fake(struct file *f) > > > > +{ > > > > + return container_of(f, struct file_fake, file); > > > > +} > > > > + > > > > +/* Returns fake_path if one exists, f_path otherwise */ > > > > +const struct path *file_fake_path(struct file *f) > > > > +{ > > > > + struct file_fake *ff = file_fake(f); > > > > + > > > > + if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry) > > > > + return &f->f_path; > > > > + > > > > + return &ff->fake_path; > > > > +} > > > > +EXPORT_SYMBOL(file_fake_path); > > > > + > > > > static void file_free_rcu(struct rcu_head *head) > > > > { > > > > struct file *f = container_of(head, struct file, f_rcuhead); > > > > > > > > put_cred(f->f_cred); > > > > - kmem_cache_free(filp_cachep, f); > > > > + if (f->f_mode & FMODE_FAKE_PATH) > > > > + kfree(file_fake(f)); > > > > + else > > > > + kmem_cache_free(filp_cachep, f); > > > > } > > > > > > > > static inline void file_free(struct file *f) > > > > { > > > > + struct file_fake *ff = file_fake(f); > > > > + > > > > security_file_free(f); > > > > - if (!(f->f_mode & FMODE_NOACCOUNT)) > > > > + if (f->f_mode & FMODE_FAKE_PATH) > > > > + path_put(&ff->fake_path); > > > > + else > > > > percpu_counter_dec(&nr_files); > > > > call_rcu(&f->f_rcuhead, file_free_rcu); > > > > } > > > > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void) > > > > fs_initcall(init_fs_stat_sysctls); > > > > #endif > > > > > > > > -static struct file *__alloc_file(int flags, const struct cred *cred) > > > > +static int init_file(struct file *f, int flags, const struct cred *cred) > > > > { > > > > - struct file *f; > > > > int error; > > > > > > > > - f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); > > > > - if (unlikely(!f)) > > > > - return ERR_PTR(-ENOMEM); > > > > - > > > > f->f_cred = get_cred(cred); > > > > error = security_file_alloc(f); > > > > if (unlikely(error)) { > > > > file_free_rcu(&f->f_rcuhead); > > > > - return ERR_PTR(error); > > > > + return error; > > > > } > > > > > > > > atomic_long_set(&f->f_count, 1); > > > > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred) > > > > f->f_mode = OPEN_FMODE(flags); > > > > /* f->f_version: 0 */ > > > > > > > > + return 0; > > > > +} > > > > + > > > > +static struct file *__alloc_file(int flags, const struct cred *cred) > > > > +{ > > > > + struct file *f; > > > > + int error; > > > > + > > > > + f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); > > > > + if (unlikely(!f)) > > > > + return ERR_PTR(-ENOMEM); > > > > + > > > > + error = init_file(f, flags, cred); > > > > + if (unlikely(error)) > > > > + return ERR_PTR(error); > > > > + > > > > return f; > > > > } > > > > > > > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred) > > > > } > > > > > > > > /* > > > > - * Variant of alloc_empty_file() that doesn't check and modify nr_files. > > > > + * Variant of alloc_empty_file() that allocates a file_fake container > > > > + * and doesn't check and modify nr_files. > > > > * > > > > * Should not be used unless there's a very good reason to do so. > > > > */ > > > > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred) > > > > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags, > > > > + const struct cred *cred) > > > > { > > > > - struct file *f = __alloc_file(flags, cred); > > > > + struct file_fake *ff; > > > > + int error; > > > > > > > > - if (!IS_ERR(f)) > > > > - f->f_mode |= FMODE_NOACCOUNT; > > > > + ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL); > > > > + if (unlikely(!ff)) > > > > + return ERR_PTR(-ENOMEM); > > > > > > > > - return f; > > > > + error = init_file(&ff->file, flags, cred); > > > > + if (unlikely(error)) > > > > + return ERR_PTR(error); > > > > + > > > > + ff->file.f_mode |= FMODE_FAKE_PATH; > > > > + if (fake_path) { > > > > + path_get(fake_path); > > > > + ff->fake_path = *fake_path; > > > > + } > > > > > > Hm, I see that this check is mostly done for vfs_tmpfile_open() which > > > only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path > > > NULL. > > > > > > So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL > > > is an invitation for NULL derefs sooner or later. I would simply > > > document that it's required to set ff->fake_path. For callers such as > > > vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open() > > > should set ff->fake_path to file->f_path. > > > > Makes sense. > > I also took the liberty to re-arrange vfs_tmpfile_open() without the > > unneeded if (!error) { nesting depth. > > Yes, please. I had a rough sketch just for my own amusement... > > fs/namei.c > struct file *vfs_tmpfile_open(struct mnt_idmap *idmap, > const struct path *parentpath, umode_t mode, > int open_flag, const struct cred *cred) > { > struct file *file; > int error; > > file = alloc_empty_file_fake(open_flag, cred); > if (IS_ERR(file)) > return file; > > error = vfs_tmpfile(idmap, parentpath, file, mode); > if (error) { > fput(file); > return ERR_PTR(error); > } > > return file_set_fake_path(file, &file->f_path); FYI, this is not enough to guarantee that the fake_path cannot be empty, for example in fput() above. So I did keep the real_path empty in this case in v3 and I have an accessor that verifies that real_path is not empty before returning it. Thanks, Amir.
On Sun, Jun 11, 2023 at 10:11:29PM +0300, Amir Goldstein wrote: > On Fri, Jun 9, 2023 at 3:12 PM Christian Brauner <brauner@kernel.org> wrote: > > > > On Fri, Jun 09, 2023 at 02:57:20PM +0300, Amir Goldstein wrote: > > > On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote: > > > > > > > > On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote: > > > > > Overlayfs and cachefiles use open_with_fake_path() to allocate internal > > > > > files, where overlayfs also puts a "fake" path in f_path - a path which > > > > > is not on the same fs as f_inode. > > > > > > > > > > Allocate a container struct file_fake for those internal files, that > > > > > will be used to hold the fake path qlong with the real path. > > > > > > > > > > This commit does not populate the extra fake_path field and leaves the > > > > > overlayfs internal file's f_path fake. > > > > > > > > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com> > > > > > --- > > > > > fs/cachefiles/namei.c | 2 +- > > > > > fs/file_table.c | 85 +++++++++++++++++++++++++++++++++++-------- > > > > > fs/internal.h | 5 ++- > > > > > fs/namei.c | 2 +- > > > > > fs/open.c | 11 +++--- > > > > > fs/overlayfs/file.c | 2 +- > > > > > include/linux/fs.h | 13 ++++--- > > > > > 7 files changed, 90 insertions(+), 30 deletions(-) > > > > > > > > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c > > > > > index 82219a8f6084..a71bdf2d03ba 100644 > > > > > --- a/fs/cachefiles/namei.c > > > > > +++ b/fs/cachefiles/namei.c > > > > > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object, > > > > > path.mnt = cache->mnt; > > > > > path.dentry = dentry; > > > > > file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT, > > > > > - d_backing_inode(dentry), cache->cache_cred); > > > > > + &path, cache->cache_cred); > > > > > if (IS_ERR(file)) { > > > > > trace_cachefiles_vfs_error(object, d_backing_inode(dentry), > > > > > PTR_ERR(file), > > > > > diff --git a/fs/file_table.c b/fs/file_table.c > > > > > index 372653b92617..adc2a92faa52 100644 > > > > > --- a/fs/file_table.c > > > > > +++ b/fs/file_table.c > > > > > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly; > > > > > > > > > > static struct percpu_counter nr_files __cacheline_aligned_in_smp; > > > > > > > > > > +/* Container for file with optional fake path to display in /proc files */ > > > > > +struct file_fake { > > > > > + struct file file; > > > > > + struct path fake_path; > > > > > +}; > > > > > + > > > > > +static inline struct file_fake *file_fake(struct file *f) > > > > > +{ > > > > > + return container_of(f, struct file_fake, file); > > > > > +} > > > > > + > > > > > +/* Returns fake_path if one exists, f_path otherwise */ > > > > > +const struct path *file_fake_path(struct file *f) > > > > > +{ > > > > > + struct file_fake *ff = file_fake(f); > > > > > + > > > > > + if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry) > > > > > + return &f->f_path; > > > > > + > > > > > + return &ff->fake_path; > > > > > +} > > > > > +EXPORT_SYMBOL(file_fake_path); > > > > > + > > > > > static void file_free_rcu(struct rcu_head *head) > > > > > { > > > > > struct file *f = container_of(head, struct file, f_rcuhead); > > > > > > > > > > put_cred(f->f_cred); > > > > > - kmem_cache_free(filp_cachep, f); > > > > > + if (f->f_mode & FMODE_FAKE_PATH) > > > > > + kfree(file_fake(f)); > > > > > + else > > > > > + kmem_cache_free(filp_cachep, f); > > > > > } > > > > > > > > > > static inline void file_free(struct file *f) > > > > > { > > > > > + struct file_fake *ff = file_fake(f); > > > > > + > > > > > security_file_free(f); > > > > > - if (!(f->f_mode & FMODE_NOACCOUNT)) > > > > > + if (f->f_mode & FMODE_FAKE_PATH) > > > > > + path_put(&ff->fake_path); > > > > > + else > > > > > percpu_counter_dec(&nr_files); > > > > > call_rcu(&f->f_rcuhead, file_free_rcu); > > > > > } > > > > > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void) > > > > > fs_initcall(init_fs_stat_sysctls); > > > > > #endif > > > > > > > > > > -static struct file *__alloc_file(int flags, const struct cred *cred) > > > > > +static int init_file(struct file *f, int flags, const struct cred *cred) > > > > > { > > > > > - struct file *f; > > > > > int error; > > > > > > > > > > - f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); > > > > > - if (unlikely(!f)) > > > > > - return ERR_PTR(-ENOMEM); > > > > > - > > > > > f->f_cred = get_cred(cred); > > > > > error = security_file_alloc(f); > > > > > if (unlikely(error)) { > > > > > file_free_rcu(&f->f_rcuhead); > > > > > - return ERR_PTR(error); > > > > > + return error; > > > > > } > > > > > > > > > > atomic_long_set(&f->f_count, 1); > > > > > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred) > > > > > f->f_mode = OPEN_FMODE(flags); > > > > > /* f->f_version: 0 */ > > > > > > > > > > + return 0; > > > > > +} > > > > > + > > > > > +static struct file *__alloc_file(int flags, const struct cred *cred) > > > > > +{ > > > > > + struct file *f; > > > > > + int error; > > > > > + > > > > > + f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); > > > > > + if (unlikely(!f)) > > > > > + return ERR_PTR(-ENOMEM); > > > > > + > > > > > + error = init_file(f, flags, cred); > > > > > + if (unlikely(error)) > > > > > + return ERR_PTR(error); > > > > > + > > > > > return f; > > > > > } > > > > > > > > > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred) > > > > > } > > > > > > > > > > /* > > > > > - * Variant of alloc_empty_file() that doesn't check and modify nr_files. > > > > > + * Variant of alloc_empty_file() that allocates a file_fake container > > > > > + * and doesn't check and modify nr_files. > > > > > * > > > > > * Should not be used unless there's a very good reason to do so. > > > > > */ > > > > > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred) > > > > > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags, > > > > > + const struct cred *cred) > > > > > { > > > > > - struct file *f = __alloc_file(flags, cred); > > > > > + struct file_fake *ff; > > > > > + int error; > > > > > > > > > > - if (!IS_ERR(f)) > > > > > - f->f_mode |= FMODE_NOACCOUNT; > > > > > + ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL); > > > > > + if (unlikely(!ff)) > > > > > + return ERR_PTR(-ENOMEM); > > > > > > > > > > - return f; > > > > > + error = init_file(&ff->file, flags, cred); > > > > > + if (unlikely(error)) > > > > > + return ERR_PTR(error); > > > > > + > > > > > + ff->file.f_mode |= FMODE_FAKE_PATH; > > > > > + if (fake_path) { > > > > > + path_get(fake_path); > > > > > + ff->fake_path = *fake_path; > > > > > + } > > > > > > > > Hm, I see that this check is mostly done for vfs_tmpfile_open() which > > > > only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path > > > > NULL. > > > > > > > > So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL > > > > is an invitation for NULL derefs sooner or later. I would simply > > > > document that it's required to set ff->fake_path. For callers such as > > > > vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open() > > > > should set ff->fake_path to file->f_path. > > > > > > Makes sense. > > > I also took the liberty to re-arrange vfs_tmpfile_open() without the > > > unneeded if (!error) { nesting depth. > > > > Yes, please. I had a rough sketch just for my own amusement... > > > > fs/namei.c > > struct file *vfs_tmpfile_open(struct mnt_idmap *idmap, > > const struct path *parentpath, umode_t mode, > > int open_flag, const struct cred *cred) > > { > > struct file *file; > > int error; > > > > file = alloc_empty_file_fake(open_flag, cred); > > if (IS_ERR(file)) > > return file; > > > > error = vfs_tmpfile(idmap, parentpath, file, mode); > > if (error) { > > fput(file); > > return ERR_PTR(error); > > } > > > > return file_set_fake_path(file, &file->f_path); > > FYI, this is not enough to guarantee that the fake_path cannot > be empty, for example in fput() above. > So I did keep the real_path empty in this case in v3 and > I have an accessor that verifies that real_path is not empty > before returning it. Ok, I'm just making it to v3 now.
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 82219a8f6084..a71bdf2d03ba 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object, path.mnt = cache->mnt; path.dentry = dentry; file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT, - d_backing_inode(dentry), cache->cache_cred); + &path, cache->cache_cred); if (IS_ERR(file)) { trace_cachefiles_vfs_error(object, d_backing_inode(dentry), PTR_ERR(file), diff --git a/fs/file_table.c b/fs/file_table.c index 372653b92617..adc2a92faa52 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly; static struct percpu_counter nr_files __cacheline_aligned_in_smp; +/* Container for file with optional fake path to display in /proc files */ +struct file_fake { + struct file file; + struct path fake_path; +}; + +static inline struct file_fake *file_fake(struct file *f) +{ + return container_of(f, struct file_fake, file); +} + +/* Returns fake_path if one exists, f_path otherwise */ +const struct path *file_fake_path(struct file *f) +{ + struct file_fake *ff = file_fake(f); + + if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry) + return &f->f_path; + + return &ff->fake_path; +} +EXPORT_SYMBOL(file_fake_path); + static void file_free_rcu(struct rcu_head *head) { struct file *f = container_of(head, struct file, f_rcuhead); put_cred(f->f_cred); - kmem_cache_free(filp_cachep, f); + if (f->f_mode & FMODE_FAKE_PATH) + kfree(file_fake(f)); + else + kmem_cache_free(filp_cachep, f); } static inline void file_free(struct file *f) { + struct file_fake *ff = file_fake(f); + security_file_free(f); - if (!(f->f_mode & FMODE_NOACCOUNT)) + if (f->f_mode & FMODE_FAKE_PATH) + path_put(&ff->fake_path); + else percpu_counter_dec(&nr_files); call_rcu(&f->f_rcuhead, file_free_rcu); } @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void) fs_initcall(init_fs_stat_sysctls); #endif -static struct file *__alloc_file(int flags, const struct cred *cred) +static int init_file(struct file *f, int flags, const struct cred *cred) { - struct file *f; int error; - f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); - if (unlikely(!f)) - return ERR_PTR(-ENOMEM); - f->f_cred = get_cred(cred); error = security_file_alloc(f); if (unlikely(error)) { file_free_rcu(&f->f_rcuhead); - return ERR_PTR(error); + return error; } atomic_long_set(&f->f_count, 1); @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred) f->f_mode = OPEN_FMODE(flags); /* f->f_version: 0 */ + return 0; +} + +static struct file *__alloc_file(int flags, const struct cred *cred) +{ + struct file *f; + int error; + + f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); + if (unlikely(!f)) + return ERR_PTR(-ENOMEM); + + error = init_file(f, flags, cred); + if (unlikely(error)) + return ERR_PTR(error); + return f; } @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred) } /* - * Variant of alloc_empty_file() that doesn't check and modify nr_files. + * Variant of alloc_empty_file() that allocates a file_fake container + * and doesn't check and modify nr_files. * * Should not be used unless there's a very good reason to do so. */ -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred) +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags, + const struct cred *cred) { - struct file *f = __alloc_file(flags, cred); + struct file_fake *ff; + int error; - if (!IS_ERR(f)) - f->f_mode |= FMODE_NOACCOUNT; + ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL); + if (unlikely(!ff)) + return ERR_PTR(-ENOMEM); - return f; + error = init_file(&ff->file, flags, cred); + if (unlikely(error)) + return ERR_PTR(error); + + ff->file.f_mode |= FMODE_FAKE_PATH; + if (fake_path) { + path_get(fake_path); + ff->fake_path = *fake_path; + } + + return &ff->file; } /** diff --git a/fs/internal.h b/fs/internal.h index bd3b2810a36b..8890c694745b 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -97,8 +97,9 @@ extern void chroot_fs_refs(const struct path *, const struct path *); /* * file_table.c */ -extern struct file *alloc_empty_file(int, const struct cred *); -extern struct file *alloc_empty_file_noaccount(int, const struct cred *); +extern struct file *alloc_empty_file(int flags, const struct cred *cred); +extern struct file *alloc_empty_file_fake(const struct path *fake_path, + int flags, const struct cred *cred); static inline void put_file_access(struct file *file) { diff --git a/fs/namei.c b/fs/namei.c index e4fe0879ae55..5e6de1f29f4e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3721,7 +3721,7 @@ struct file *vfs_tmpfile_open(struct mnt_idmap *idmap, struct file *file; int error; - file = alloc_empty_file_noaccount(open_flag, cred); + file = alloc_empty_file_fake(NULL, open_flag, cred); if (!IS_ERR(file)) { error = vfs_tmpfile(idmap, parentpath, file, mode); if (error) { diff --git a/fs/open.c b/fs/open.c index 4478adcc4f3a..c9e2300a037d 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1116,15 +1116,16 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode, } EXPORT_SYMBOL(dentry_create); -struct file *open_with_fake_path(const struct path *path, int flags, - struct inode *inode, const struct cred *cred) +struct file *open_with_fake_path(const struct path *fake_path, int flags, + const struct path *path, + const struct cred *cred) { - struct file *f = alloc_empty_file_noaccount(flags, cred); + struct file *f = alloc_empty_file_fake(NULL, flags, cred); if (!IS_ERR(f)) { int error; - f->f_path = *path; - error = do_dentry_open(f, inode, NULL); + f->f_path = *fake_path; + error = do_dentry_open(f, d_inode(path->dentry), NULL); if (error) { fput(f); f = ERR_PTR(error); diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 7c04f033aadd..f5987377e9eb 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -61,7 +61,7 @@ static struct file *ovl_open_realfile(const struct file *file, if (!inode_owner_or_capable(real_idmap, realinode)) flags &= ~O_NOATIME; - realfile = open_with_fake_path(&file->f_path, flags, realinode, + realfile = open_with_fake_path(&file->f_path, flags, realpath, current_cred()); } revert_creds(old_cred); diff --git a/include/linux/fs.h b/include/linux/fs.h index 21a981680856..b112a3c9b499 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -180,8 +180,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File represents mount that needs unmounting */ #define FMODE_NEED_UNMOUNT ((__force fmode_t)0x10000000) -/* File does not contribute to nr_files count */ -#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) +/* File is embedded in file_fake and doesn't contribute to nr_files */ +#define FMODE_FAKE_PATH ((__force fmode_t)0x20000000) /* File supports async buffered reads */ #define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000) @@ -2349,11 +2349,14 @@ static inline struct file *file_open_root_mnt(struct vfsmount *mnt, return file_open_root(&(struct path){.mnt = mnt, .dentry = mnt->mnt_root}, name, flags, mode); } -extern struct file * dentry_open(const struct path *, int, const struct cred *); +extern struct file *dentry_open(const struct path *path, int flags, + const struct cred *creds); extern struct file *dentry_create(const struct path *path, int flags, umode_t mode, const struct cred *cred); -extern struct file * open_with_fake_path(const struct path *, int, - struct inode*, const struct cred *); +extern struct file *open_with_fake_path(const struct path *fake_path, int flags, + const struct path *path, + const struct cred *cred); +extern const struct path *file_fake_path(struct file *file); static inline struct file *file_clone_open(struct file *file) { return dentry_open(&file->f_path, file->f_flags, file->f_cred);
Overlayfs and cachefiles use open_with_fake_path() to allocate internal files, where overlayfs also puts a "fake" path in f_path - a path which is not on the same fs as f_inode. Allocate a container struct file_fake for those internal files, that will be used to hold the fake path qlong with the real path. This commit does not populate the extra fake_path field and leaves the overlayfs internal file's f_path fake. Signed-off-by: Amir Goldstein <amir73il@gmail.com> --- fs/cachefiles/namei.c | 2 +- fs/file_table.c | 85 +++++++++++++++++++++++++++++++++++-------- fs/internal.h | 5 ++- fs/namei.c | 2 +- fs/open.c | 11 +++--- fs/overlayfs/file.c | 2 +- include/linux/fs.h | 13 ++++--- 7 files changed, 90 insertions(+), 30 deletions(-)