Message ID | 20190515192715.18000-3-vgoyal@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | virtio-fs: shared file system for virtual machines | expand |
On Wed, May 15, 2019 at 03:26:47PM -0400, Vivek Goyal wrote: > If fuse daemon is started with cache=never, fuse falls back to direct IO. > In that write path we don't call file_remove_privs() and that means setuid > bit is not cleared if unpriviliged user writes to a file with setuid bit set. > > pjdfstest chmod test 12.t tests this and fails. I think better sulution is to tell the server if the suid bit needs to be removed, so it can do so in a race free way. Here's the kernel patch, and I'll reply with the libfuse patch. --- fs/fuse2/file.c | 2 ++ include/uapi/linux/fuse.h | 3 +++ 2 files changed, 5 insertions(+) --- a/fs/fuse2/file.c +++ b/fs/fuse2/file.c @@ -363,6 +363,8 @@ static ssize_t fuse_send_write(struct fu inarg->flags |= O_DSYNC; if (iocb->ki_flags & IOCB_SYNC) inarg->flags |= O_SYNC; + if (!capable(CAP_FSETID)) + inarg->write_flags |= FUSE_WRITE_KILL_PRIV; req->inh.opcode = FUSE_WRITE; req->inh.nodeid = ff->nodeid; req->inh.len = req->inline_inlen + count; --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -125,6 +125,7 @@ * * 7.29 * - add FUSE_NO_OPENDIR_SUPPORT flag + * - add FUSE_WRITE_KILL_PRIV flag */ #ifndef _LINUX_FUSE_H @@ -318,9 +319,11 @@ struct fuse_file_lock { * * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed * FUSE_WRITE_LOCKOWNER: lock_owner field is valid + * FUSE_WRITE_KILL_PRIV: kill suid and sgid bits */ #define FUSE_WRITE_CACHE (1 << 0) #define FUSE_WRITE_LOCKOWNER (1 << 1) +#define FUSE_WRITE_KILL_PRIV (1 << 2) /** * Read flags
On Mon, May 20, 2019 at 04:41:37PM +0200, Miklos Szeredi wrote: > On Wed, May 15, 2019 at 03:26:47PM -0400, Vivek Goyal wrote: > > If fuse daemon is started with cache=never, fuse falls back to direct IO. > > In that write path we don't call file_remove_privs() and that means setuid > > bit is not cleared if unpriviliged user writes to a file with setuid bit set. > > > > pjdfstest chmod test 12.t tests this and fails. > > I think better sulution is to tell the server if the suid bit needs to be > removed, so it can do so in a race free way. > > Here's the kernel patch, and I'll reply with the libfuse patch. Here are the patches for libfuse and passthrough_ll. --- include/fuse_common.h | 5 ++++- include/fuse_kernel.h | 2 ++ lib/fuse_lowlevel.c | 12 ++++++++---- 3 files changed, 14 insertions(+), 5 deletions(-) --- a/include/fuse_common.h +++ b/include/fuse_common.h @@ -64,8 +64,11 @@ struct fuse_file_info { May only be set in ->release(). */ unsigned int flock_release : 1; + /* Kill suid and sgid bits on write */ + unsigned int write_kill_priv : 1; + /** Padding. Do not use*/ - unsigned int padding : 27; + unsigned int padding : 26; /** File handle. May be filled in by filesystem in open(). Available in all other file operations */ --- a/include/fuse_kernel.h +++ b/include/fuse_kernel.h @@ -304,9 +304,11 @@ struct fuse_file_lock { * * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed * FUSE_WRITE_LOCKOWNER: lock_owner field is valid + * FUSE_WRITE_KILL_PRIV: kill suid and sgid bits */ #define FUSE_WRITE_CACHE (1 << 0) #define FUSE_WRITE_LOCKOWNER (1 << 1) +#define FUSE_WRITE_KILL_PRIV (1 << 2) /** * Read flags --- a/lib/fuse_lowlevel.c +++ b/lib/fuse_lowlevel.c @@ -1315,12 +1315,14 @@ static void do_write(fuse_req_t req, fus memset(&fi, 0, sizeof(fi)); fi.fh = arg->fh; - fi.writepage = (arg->write_flags & 1) != 0; + fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; + fi.write_kill_priv = (arg->write_flags & FUSE_WRITE_KILL_PRIV) != 0; if (req->se->conn.proto_minor < 9) { param = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; } else { - fi.lock_owner = arg->lock_owner; + if (arg->write_flags & FUSE_WRITE_LOCKOWNER) + fi.lock_owner = arg->lock_owner; fi.flags = arg->flags; param = PARAM(arg); } @@ -1345,7 +1347,8 @@ static void do_write_buf(fuse_req_t req, memset(&fi, 0, sizeof(fi)); fi.fh = arg->fh; - fi.writepage = arg->write_flags & 1; + fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; + fi.write_kill_priv = (arg->write_flags & FUSE_WRITE_KILL_PRIV) != 0; if (se->conn.proto_minor < 9) { bufv.buf[0].mem = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; @@ -1353,7 +1356,8 @@ static void do_write_buf(fuse_req_t req, FUSE_COMPAT_WRITE_IN_SIZE; assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); } else { - fi.lock_owner = arg->lock_owner; + if (arg->write_flags & FUSE_WRITE_LOCKOWNER) + fi.lock_owner = arg->lock_owner; fi.flags = arg->flags; if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) bufv.buf[0].mem = PARAM(arg); --- example/passthrough_ll.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) --- a/example/passthrough_ll.c +++ b/example/passthrough_ll.c @@ -56,6 +56,7 @@ #include <sys/file.h> #include <sys/xattr.h> #include <sys/syscall.h> +#include <sys/capability.h> /* We are re-using pointers to our `struct lo_inode` and `struct lo_dirp` elements as inodes. This means that we must be able to @@ -965,6 +966,11 @@ static void lo_write_buf(fuse_req_t req, (void) ino; ssize_t res; struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); + struct __user_cap_header_struct cap_hdr = { + .version = _LINUX_CAPABILITY_VERSION_1, + }; + struct __user_cap_data_struct cap_orig; + struct __user_cap_data_struct cap_new; out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; out_buf.buf[0].fd = fi->fh; @@ -974,7 +980,28 @@ static void lo_write_buf(fuse_req_t req, fprintf(stderr, "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, out_buf.buf[0].size, (unsigned long) off); + if (fi->write_kill_priv) { + res = capget(&cap_hdr, &cap_orig); + if (res == -1) { + fuse_reply_err(req, errno); + return; + } + cap_new = cap_orig; + cap_new.effective &= ~(1 << CAP_FSETID); + res = capset(&cap_hdr, &cap_new); + if (res == -1) { + fuse_reply_err(req, errno); + return; + } + } + res = fuse_buf_copy(&out_buf, in_buf, 0); + + if (fi->write_kill_priv) { + if (capset(&cap_hdr, &cap_orig) != 0) + abort(); + } + if(res < 0) fuse_reply_err(req, -res); else @@ -1215,7 +1242,7 @@ static void lo_copy_file_range(fuse_req_ res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, flags); if (res < 0) - fuse_reply_err(req, -errno); + fuse_reply_err(req, errno); else fuse_reply_write(req, res); }
On May 20 2019, Miklos Szeredi <miklos@szeredi.hu> wrote: > On Mon, May 20, 2019 at 04:41:37PM +0200, Miklos Szeredi wrote: >> On Wed, May 15, 2019 at 03:26:47PM -0400, Vivek Goyal wrote: >> > If fuse daemon is started with cache=never, fuse falls back to direct IO. >> > In that write path we don't call file_remove_privs() and that means setuid >> > bit is not cleared if unpriviliged user writes to a file with setuid bit set. >> > >> > pjdfstest chmod test 12.t tests this and fails. >> >> I think better sulution is to tell the server if the suid bit needs to be >> removed, so it can do so in a race free way. >> >> Here's the kernel patch, and I'll reply with the libfuse patch. > > Here are the patches for libfuse and passthrough_ll. Could you also submit them as pull requests at https://github.com/libfuse/libfuse/pulls? Best, -Nikolaus
On Mon, May 20, 2019 at 04:41:37PM +0200, Miklos Szeredi wrote: > On Wed, May 15, 2019 at 03:26:47PM -0400, Vivek Goyal wrote: > > If fuse daemon is started with cache=never, fuse falls back to direct IO. > > In that write path we don't call file_remove_privs() and that means setuid > > bit is not cleared if unpriviliged user writes to a file with setuid bit set. > > > > pjdfstest chmod test 12.t tests this and fails. > > I think better sulution is to tell the server if the suid bit needs to be > removed, so it can do so in a race free way. > > Here's the kernel patch, and I'll reply with the libfuse patch. Hi Miklos, I tested and it works for me. Vivek > > --- > fs/fuse2/file.c | 2 ++ > include/uapi/linux/fuse.h | 3 +++ > 2 files changed, 5 insertions(+) > > --- a/fs/fuse2/file.c > +++ b/fs/fuse2/file.c > @@ -363,6 +363,8 @@ static ssize_t fuse_send_write(struct fu > inarg->flags |= O_DSYNC; > if (iocb->ki_flags & IOCB_SYNC) > inarg->flags |= O_SYNC; > + if (!capable(CAP_FSETID)) > + inarg->write_flags |= FUSE_WRITE_KILL_PRIV; > req->inh.opcode = FUSE_WRITE; > req->inh.nodeid = ff->nodeid; > req->inh.len = req->inline_inlen + count; > --- a/include/uapi/linux/fuse.h > +++ b/include/uapi/linux/fuse.h > @@ -125,6 +125,7 @@ > * > * 7.29 > * - add FUSE_NO_OPENDIR_SUPPORT flag > + * - add FUSE_WRITE_KILL_PRIV flag > */ > > #ifndef _LINUX_FUSE_H > @@ -318,9 +319,11 @@ struct fuse_file_lock { > * > * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed > * FUSE_WRITE_LOCKOWNER: lock_owner field is valid > + * FUSE_WRITE_KILL_PRIV: kill suid and sgid bits > */ > #define FUSE_WRITE_CACHE (1 << 0) > #define FUSE_WRITE_LOCKOWNER (1 << 1) > +#define FUSE_WRITE_KILL_PRIV (1 << 2) > > /** > * Read flags > >
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 06096b60f1df..5baf07fd2876 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1456,14 +1456,18 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) /* Don't allow parallel writes to the same file */ inode_lock(inode); res = generic_write_checks(iocb, from); - if (res > 0) { - if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) { - res = fuse_direct_IO(iocb, from); - } else { - res = fuse_direct_io(&io, from, &iocb->ki_pos, - FUSE_DIO_WRITE); - } + if (res <= 0) + goto out; + + res = file_remove_privs(iocb->ki_filp); + if (res) + goto out; + if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) { + res = fuse_direct_IO(iocb, from); + } else { + res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); } +out: fuse_invalidate_attr(inode); if (res > 0) fuse_write_update_size(inode, iocb->ki_pos);
If fuse daemon is started with cache=never, fuse falls back to direct IO. In that write path we don't call file_remove_privs() and that means setuid bit is not cleared if unpriviliged user writes to a file with setuid bit set. pjdfstest chmod test 12.t tests this and fails. Fix this by calling fuse_remove_privs() even for direct I/O path. I tested this as follows. - Run fuse example pasthrough fs. $ passthrough_ll /mnt/pasthrough-mnt -o default_permissions,allow_other,cache=never $ mkdir /mnt/pasthrough-mnt/testdir $ cd /mnt/pasthrough-mnt/testdir $ prove -rv pjdfstests/tests/chmod/12.t Signed-off-by: Vivek Goyal <vgoyal@redhat.com> --- fs/fuse/file.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-)