Message ID | 1484588765-9397-2-git-send-email-amir73il@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Jan 16, 2017 at 6:46 PM, Amir Goldstein <amir73il@gmail.com> wrote: > Factor out some common vfs bits from do_tmpfile() > to be used by overlayfs for concurrent copy up. > > Signed-off-by: Amir Goldstein <amir73il@gmail.com> > --- > fs/namei.c | 66 +++++++++++++++++++++++++++++++++++------------------- > include/linux/fs.h | 3 +++ > 2 files changed, 46 insertions(+), 23 deletions(-) > > diff --git a/fs/namei.c b/fs/namei.c > index ad74877..3e7c7a6 100644 > --- a/fs/namei.c > +++ b/fs/namei.c > @@ -3353,11 +3353,49 @@ static int do_last(struct nameidata *nd, > return error; > } > > +struct dentry *vfs_tmpfile(struct inode *dir, struct dentry *dentry, dir and dentry refer to the same thing; can just pass the dentry. > + umode_t mode, int open_flag) > +{ > + static const struct qstr name = QSTR_INIT("/", 1); > + struct dentry *child = NULL; > + struct inode *inode; > + int error; > + > + /* we want directory to be writable */ > + error = inode_permission(dir, MAY_WRITE | MAY_EXEC); This is not in the scope of this patch, but shoudln't we be using may_create() here? Or at least a variant without the audit thing... Al? Thanks, Miklos > + if (error) > + goto out_err; > + error = -EOPNOTSUPP; > + if (!dir->i_op->tmpfile) > + goto out_err; > + error = -ENOMEM; > + child = d_alloc(dentry, &name); > + if (unlikely(!child)) > + goto out_err; > + error = dir->i_op->tmpfile(dir, child, mode); > + if (error) > + goto out_err; > + error = -ENOENT; > + inode = child->d_inode; > + if (unlikely(!inode)) > + goto out_err; > + if (!(open_flag & O_EXCL)) { > + spin_lock(&inode->i_lock); > + inode->i_state |= I_LINKABLE; > + spin_unlock(&inode->i_lock); > + } > + return child; > + > +out_err: > + dput(child); > + return ERR_PTR(error); > +} > +EXPORT_SYMBOL(vfs_tmpfile); > + > static int do_tmpfile(struct nameidata *nd, unsigned flags, > const struct open_flags *op, > struct file *file, int *opened) > { > - static const struct qstr name = QSTR_INIT("/", 1); > struct dentry *child; > struct inode *dir; > struct path path; > @@ -3368,24 +3406,12 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, > if (unlikely(error)) > goto out; > dir = path.dentry->d_inode; > - /* we want directory to be writable */ > - error = inode_permission(dir, MAY_WRITE | MAY_EXEC); > - if (error) > - goto out2; > - if (!dir->i_op->tmpfile) { > - error = -EOPNOTSUPP; > - goto out2; > - } > - child = d_alloc(path.dentry, &name); > - if (unlikely(!child)) { > - error = -ENOMEM; > + child = vfs_tmpfile(dir, path.dentry, op->mode, op->open_flag); > + error = PTR_ERR(child); > + if (unlikely(IS_ERR(child))) > goto out2; > - } > dput(path.dentry); > path.dentry = child; > - error = dir->i_op->tmpfile(dir, child, op->mode); > - if (error) > - goto out2; > audit_inode(nd->name, child, 0); > /* Don't check for other permissions, the inode was just created */ > error = may_open(&path, 0, op->open_flag); > @@ -3396,14 +3422,8 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, > if (error) > goto out2; > error = open_check_o_direct(file); > - if (error) { > + if (error) > fput(file); > - } else if (!(op->open_flag & O_EXCL)) { > - struct inode *inode = file_inode(file); > - spin_lock(&inode->i_lock); > - inode->i_state |= I_LINKABLE; > - spin_unlock(&inode->i_lock); > - } > out2: > mnt_drop_write(path.mnt); > out: > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 2ba0743..8c7cbcb 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -1561,6 +1561,9 @@ extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); > extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); > extern int vfs_whiteout(struct inode *, struct dentry *); > > +extern struct dentry *vfs_tmpfile(struct inode *dir, struct dentry *dentry, > + umode_t mode, int open_flag); > + > /* > * VFS file helper functions. > */ > -- > 2.7.4 > -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Jan 16, 2017 at 08:47:32PM +0100, Miklos Szeredi wrote: > > + umode_t mode, int open_flag) > > +{ > > + static const struct qstr name = QSTR_INIT("/", 1); > > + struct dentry *child = NULL; > > + struct inode *inode; > > + int error; > > + > > + /* we want directory to be writable */ > > + error = inode_permission(dir, MAY_WRITE | MAY_EXEC); > > This is not in the scope of this patch, but shoudln't we be using > may_create() here? Or at least a variant without the audit thing... > > Al? may_create() expects directory + child dentry; here we have only parent. IS_DEADDIR is rather pointless here - directory is not locked, for starters, so rmdir might happen right under you. Or right after you've returned from your function, for that matter. userns checks... FWIW, no such checks are done in ->atomic_open() paths, so I'm not sure how much are those worth...
On Sun, Feb 19, 2017 at 4:27 AM, Al Viro <viro@zeniv.linux.org.uk> wrote: > On Mon, Jan 16, 2017 at 08:47:32PM +0100, Miklos Szeredi wrote: > >> > + umode_t mode, int open_flag) >> > +{ >> > + static const struct qstr name = QSTR_INIT("/", 1); >> > + struct dentry *child = NULL; >> > + struct inode *inode; >> > + int error; >> > + >> > + /* we want directory to be writable */ >> > + error = inode_permission(dir, MAY_WRITE | MAY_EXEC); >> >> This is not in the scope of this patch, but shoudln't we be using >> may_create() here? Or at least a variant without the audit thing... >> >> Al? > > may_create() expects directory + child dentry; here we have only parent. > IS_DEADDIR is rather pointless here - directory is not locked, for > starters, so rmdir might happen right under you. Or right after you've > returned from your function, for that matter. userns checks... > FWIW, no such checks are done in ->atomic_open() paths, so I'm not sure > how much are those worth... Eric would know since he added those checks. Thanks, Miklos
Miklos Szeredi <miklos@szeredi.hu> writes: > On Sun, Feb 19, 2017 at 4:27 AM, Al Viro <viro@zeniv.linux.org.uk> wrote: >> On Mon, Jan 16, 2017 at 08:47:32PM +0100, Miklos Szeredi wrote: >> >>> > + umode_t mode, int open_flag) >>> > +{ >>> > + static const struct qstr name = QSTR_INIT("/", 1); >>> > + struct dentry *child = NULL; >>> > + struct inode *inode; >>> > + int error; >>> > + >>> > + /* we want directory to be writable */ >>> > + error = inode_permission(dir, MAY_WRITE | MAY_EXEC); >>> >>> This is not in the scope of this patch, but shoudln't we be using >>> may_create() here? Or at least a variant without the audit thing... >>> >>> Al? >> >> may_create() expects directory + child dentry; here we have only parent. >> IS_DEADDIR is rather pointless here - directory is not locked, for >> starters, so rmdir might happen right under you. Or right after you've >> returned from your function, for that matter. userns checks... >> FWIW, no such checks are done in ->atomic_open() paths, so I'm not sure >> how much are those worth... > > Eric would know since he added those checks. Unless I am missing something the atomic_open path was fixed this merge window when may_o_create was fixed. Missing places any place where we create files is an oversight. The point of those checks is when we have a filesystem mounted by root in a user namespace like tmpfs or hopefully soon fuse that it will let the vfs filter out uids and gids that the filesystem does not know how to map thus has no hope of understanding. Since the filesystem does not care about the uids and gids odds are filesystems won't be bothered to test or deal with that case and corruption will result. As far as I can see not filtering out umappable uids and gids is just laying a trap for filesystem developers. Which means vfs_tmpfile is definitely something that needs to be patched to verify that the current_fsuid and current_fsgid are valid from the filesystems point of view. At the same time this only matters for filesystems that set FS_USERNS_MOUNT and implement tmpfile. Which right now is tmpfs. Given that tmpfs actually only uses the vfs inode, there are no corruption or other filesystem misbehaviors right now. So it won't kill us if we don't fix this for 4.11. I am hoping things are far enough along that we can merge the patches to fuse that make it safe to set FS_USER_NS for 4.12-rc1, and have truly unprivileged fuse mounts. At which point this will matter more. Eric
diff --git a/fs/namei.c b/fs/namei.c index ad74877..3e7c7a6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3353,11 +3353,49 @@ static int do_last(struct nameidata *nd, return error; } +struct dentry *vfs_tmpfile(struct inode *dir, struct dentry *dentry, + umode_t mode, int open_flag) +{ + static const struct qstr name = QSTR_INIT("/", 1); + struct dentry *child = NULL; + struct inode *inode; + int error; + + /* we want directory to be writable */ + error = inode_permission(dir, MAY_WRITE | MAY_EXEC); + if (error) + goto out_err; + error = -EOPNOTSUPP; + if (!dir->i_op->tmpfile) + goto out_err; + error = -ENOMEM; + child = d_alloc(dentry, &name); + if (unlikely(!child)) + goto out_err; + error = dir->i_op->tmpfile(dir, child, mode); + if (error) + goto out_err; + error = -ENOENT; + inode = child->d_inode; + if (unlikely(!inode)) + goto out_err; + if (!(open_flag & O_EXCL)) { + spin_lock(&inode->i_lock); + inode->i_state |= I_LINKABLE; + spin_unlock(&inode->i_lock); + } + return child; + +out_err: + dput(child); + return ERR_PTR(error); +} +EXPORT_SYMBOL(vfs_tmpfile); + static int do_tmpfile(struct nameidata *nd, unsigned flags, const struct open_flags *op, struct file *file, int *opened) { - static const struct qstr name = QSTR_INIT("/", 1); struct dentry *child; struct inode *dir; struct path path; @@ -3368,24 +3406,12 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, if (unlikely(error)) goto out; dir = path.dentry->d_inode; - /* we want directory to be writable */ - error = inode_permission(dir, MAY_WRITE | MAY_EXEC); - if (error) - goto out2; - if (!dir->i_op->tmpfile) { - error = -EOPNOTSUPP; - goto out2; - } - child = d_alloc(path.dentry, &name); - if (unlikely(!child)) { - error = -ENOMEM; + child = vfs_tmpfile(dir, path.dentry, op->mode, op->open_flag); + error = PTR_ERR(child); + if (unlikely(IS_ERR(child))) goto out2; - } dput(path.dentry); path.dentry = child; - error = dir->i_op->tmpfile(dir, child, op->mode); - if (error) - goto out2; audit_inode(nd->name, child, 0); /* Don't check for other permissions, the inode was just created */ error = may_open(&path, 0, op->open_flag); @@ -3396,14 +3422,8 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, if (error) goto out2; error = open_check_o_direct(file); - if (error) { + if (error) fput(file); - } else if (!(op->open_flag & O_EXCL)) { - struct inode *inode = file_inode(file); - spin_lock(&inode->i_lock); - inode->i_state |= I_LINKABLE; - spin_unlock(&inode->i_lock); - } out2: mnt_drop_write(path.mnt); out: diff --git a/include/linux/fs.h b/include/linux/fs.h index 2ba0743..8c7cbcb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1561,6 +1561,9 @@ extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); extern int vfs_whiteout(struct inode *, struct dentry *); +extern struct dentry *vfs_tmpfile(struct inode *dir, struct dentry *dentry, + umode_t mode, int open_flag); + /* * VFS file helper functions. */
Factor out some common vfs bits from do_tmpfile() to be used by overlayfs for concurrent copy up. Signed-off-by: Amir Goldstein <amir73il@gmail.com> --- fs/namei.c | 66 +++++++++++++++++++++++++++++++++++------------------- include/linux/fs.h | 3 +++ 2 files changed, 46 insertions(+), 23 deletions(-)