Message ID | 20181203083416.28978-5-david@fromorbit.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | fs: fixes for major copy_file_range() issues | expand |
On Mon, Dec 3, 2018 at 10:34 AM Dave Chinner <david@fromorbit.com> wrote: > > From: Dave Chinner <dchinner@redhat.com> > > Like the clone and dedupe interfaces we've recently fixed, the > copy_file_range() implementation is missing basic sanity, limits and > boundary condition tests on the parameters that are passed to it > from userspace. Create a new "generic_copy_file_checks()" function > modelled on the generic_remap_checks() function to provide this > missing functionality. > > Signed-off-by: Dave Chinner <dchinner@redhat.com> > --- Looks good. Reviewed-by: Amir Goldstein <amir73il@gmail.com>
On Mon, Dec 03, 2018 at 07:34:09PM +1100, Dave Chinner wrote: > From: Dave Chinner <dchinner@redhat.com> > > Like the clone and dedupe interfaces we've recently fixed, the > copy_file_range() implementation is missing basic sanity, limits and > boundary condition tests on the parameters that are passed to it > from userspace. Create a new "generic_copy_file_checks()" function > modelled on the generic_remap_checks() function to provide this > missing functionality. > > Signed-off-by: Dave Chinner <dchinner@redhat.com> > --- > fs/read_write.c | 27 ++++++------------ > include/linux/fs.h | 3 ++ > mm/filemap.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 81 insertions(+), 18 deletions(-) > > diff --git a/fs/read_write.c b/fs/read_write.c > index 44339b44accc..69809345977e 100644 > --- a/fs/read_write.c > +++ b/fs/read_write.c > @@ -1578,7 +1578,7 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, > return file_out->f_op->copy_file_range(file_in, pos_in, file_out, > pos_out, len, flags); > > - return generic_copy_file_range(file_in, &pos_in, file_out, &pos_out, > + return generic_copy_file_range(file_in, pos_in, file_out, pos_out, > len, flags); > } > > @@ -1598,10 +1598,14 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, > if (flags != 0) > return -EINVAL; > > - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) > - return -EISDIR; > - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) > - return -EINVAL; > + /* this could be relaxed once a method supports cross-fs copies */ > + if (inode_in->i_sb != inode_out->i_sb) > + return -EXDEV; > + > + ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, > + flags); > + if (ret < 0) > + return ret; > > ret = rw_verify_area(READ, file_in, &pos_in, len); > if (unlikely(ret)) > @@ -1611,22 +1615,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, > if (unlikely(ret)) > return ret; > > - if (!(file_in->f_mode & FMODE_READ) || > - !(file_out->f_mode & FMODE_WRITE) || > - (file_out->f_flags & O_APPEND)) > - return -EBADF; > - > - /* this could be relaxed once a method supports cross-fs copies */ > - if (inode_in->i_sb != inode_out->i_sb) > - return -EXDEV; > - > if (len == 0) > return 0; > > - /* If the source range crosses EOF, fail the copy */ > - if (pos_in >= i_size(inode_in) || pos_in + len > i_size(inode_in)) > - return -EINVAL; > - > file_start_write(file_out); > > /* > diff --git a/include/linux/fs.h b/include/linux/fs.h > index a4478764cf63..0d9d2d93d4df 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -3022,6 +3022,9 @@ extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); > extern int generic_remap_checks(struct file *file_in, loff_t pos_in, > struct file *file_out, loff_t pos_out, > loff_t *count, unsigned int remap_flags); > +extern int generic_copy_file_checks(struct file *file_in, loff_t pos_in, > + struct file *file_out, loff_t pos_out, > + size_t *count, unsigned int flags); > extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); > extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); > extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); > diff --git a/mm/filemap.c b/mm/filemap.c > index 81adec8ee02c..0a170425935b 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -2975,6 +2975,75 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in, > return 0; > } > > + > +/* > + * Performs necessary checks before doing a file copy > + * > + * Can adjust amount of bytes to copy > + * Returns appropriate error code that caller should return or > + * zero in case the copy should be allowed. > + */ > +int generic_copy_file_checks(struct file *file_in, loff_t pos_in, > + struct file *file_out, loff_t pos_out, > + size_t *req_count, unsigned int flags) > +{ > + struct inode *inode_in = file_inode(file_in); > + struct inode *inode_out = file_inode(file_out); > + uint64_t count = *req_count; > + uint64_t bcount; > + loff_t size_in, size_out; > + loff_t bs = inode_out->i_sb->s_blocksize; > + int ret; > + > + /* Don't touch certain kinds of inodes */ > + if (IS_IMMUTABLE(inode_out)) > + return -EPERM; > + > + if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) > + return -ETXTBSY; > + > + /* Don't copy dirs, pipes, sockets... */ > + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) > + return -EISDIR; > + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) > + return -EINVAL; > + > + if (!(file_in->f_mode & FMODE_READ) || > + !(file_out->f_mode & FMODE_WRITE) || > + (file_out->f_flags & O_APPEND)) > + return -EBADF; These five checks are the same as the ones that are split between do_clone_file_range and generic_remap_file_range_prep. Perhaps they should be factored into a single function that can be called from the do_clone_file_range function as well as do_copy_file_range? (I suspect also vfs_dedupe_file_range_one() should call it too, but the dedupe code is so grotty and weird...) --D > + > + /* Ensure offsets don't wrap. */ > + if (pos_in + count < pos_in || pos_out + count < pos_out) > + return -EOVERFLOW; > + > + size_in = i_size_read(inode_in); > + size_out = i_size_read(inode_out); > + > + /* If the source range crosses EOF, fail the copy */ > + if (pos_in >= size_in) > + return -EINVAL; > + if (pos_in + count > size_in) > + return -EINVAL; > + > + ret = generic_access_check_limits(file_in, pos_in, &count); > + if (ret) > + return ret; > + > + ret = generic_write_check_limits(file_out, pos_out, &count); > + if (ret) > + return ret; > + > + /* Don't allow overlapped copying within the same file. */ > + if (inode_in == inode_out && > + pos_out + count > pos_in && > + pos_out < pos_in + count) > + return -EINVAL; > + > + *req_count = count; > + return 0; > +} > + > int pagecache_write_begin(struct file *file, struct address_space *mapping, > loff_t pos, unsigned len, unsigned flags, > struct page **pagep, void **fsdata) > -- > 2.19.1 >
On Mon, Dec 3, 2018 at 3:34 AM Dave Chinner <david@fromorbit.com> wrote: > > From: Dave Chinner <dchinner@redhat.com> > > Like the clone and dedupe interfaces we've recently fixed, the > copy_file_range() implementation is missing basic sanity, limits and > boundary condition tests on the parameters that are passed to it > from userspace. Create a new "generic_copy_file_checks()" function > modelled on the generic_remap_checks() function to provide this > missing functionality. > > Signed-off-by: Dave Chinner <dchinner@redhat.com> > --- > fs/read_write.c | 27 ++++++------------ > include/linux/fs.h | 3 ++ > mm/filemap.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 81 insertions(+), 18 deletions(-) > > diff --git a/fs/read_write.c b/fs/read_write.c > index 44339b44accc..69809345977e 100644 > --- a/fs/read_write.c > +++ b/fs/read_write.c > @@ -1578,7 +1578,7 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, > return file_out->f_op->copy_file_range(file_in, pos_in, file_out, > pos_out, len, flags); > > - return generic_copy_file_range(file_in, &pos_in, file_out, &pos_out, > + return generic_copy_file_range(file_in, pos_in, file_out, pos_out, > len, flags); > } > > @@ -1598,10 +1598,14 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, > if (flags != 0) > return -EINVAL; > > - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) > - return -EISDIR; > - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) > - return -EINVAL; > + /* this could be relaxed once a method supports cross-fs copies */ > + if (inode_in->i_sb != inode_out->i_sb) > + return -EXDEV; > + > + ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, > + flags); > + if (ret < 0) > + return ret; > > ret = rw_verify_area(READ, file_in, &pos_in, len); > if (unlikely(ret)) > @@ -1611,22 +1615,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, > if (unlikely(ret)) > return ret; > > - if (!(file_in->f_mode & FMODE_READ) || > - !(file_out->f_mode & FMODE_WRITE) || > - (file_out->f_flags & O_APPEND)) > - return -EBADF; > - > - /* this could be relaxed once a method supports cross-fs copies */ > - if (inode_in->i_sb != inode_out->i_sb) > - return -EXDEV; > - > if (len == 0) > return 0; > > - /* If the source range crosses EOF, fail the copy */ > - if (pos_in >= i_size(inode_in) || pos_in + len > i_size(inode_in)) > - return -EINVAL; > - > file_start_write(file_out); > > /* > diff --git a/include/linux/fs.h b/include/linux/fs.h > index a4478764cf63..0d9d2d93d4df 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -3022,6 +3022,9 @@ extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); > extern int generic_remap_checks(struct file *file_in, loff_t pos_in, > struct file *file_out, loff_t pos_out, > loff_t *count, unsigned int remap_flags); > +extern int generic_copy_file_checks(struct file *file_in, loff_t pos_in, > + struct file *file_out, loff_t pos_out, > + size_t *count, unsigned int flags); > extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); > extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); > extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); > diff --git a/mm/filemap.c b/mm/filemap.c > index 81adec8ee02c..0a170425935b 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -2975,6 +2975,75 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in, > return 0; > } > > + > +/* > + * Performs necessary checks before doing a file copy > + * > + * Can adjust amount of bytes to copy > + * Returns appropriate error code that caller should return or > + * zero in case the copy should be allowed. > + */ > +int generic_copy_file_checks(struct file *file_in, loff_t pos_in, > + struct file *file_out, loff_t pos_out, > + size_t *req_count, unsigned int flags) > +{ > + struct inode *inode_in = file_inode(file_in); > + struct inode *inode_out = file_inode(file_out); > + uint64_t count = *req_count; > + uint64_t bcount; > + loff_t size_in, size_out; > + loff_t bs = inode_out->i_sb->s_blocksize; > + int ret; I got compile warnings: mm/filemap.c: In function ‘generic_copy_file_checks’: mm/filemap.c:2995:9: warning: unused variable ‘bs’ [-Wunused-variable] loff_t bs = inode_out->i_sb->s_blocksize; ^ mm/filemap.c:2993:11: warning: unused variable ‘bcount’ [-Wunused-variable] uint64_t bcount; > + > + /* Don't touch certain kinds of inodes */ > + if (IS_IMMUTABLE(inode_out)) > + return -EPERM; > + > + if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) > + return -ETXTBSY; > + > + /* Don't copy dirs, pipes, sockets... */ > + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) > + return -EISDIR; > + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) > + return -EINVAL; > + > + if (!(file_in->f_mode & FMODE_READ) || > + !(file_out->f_mode & FMODE_WRITE) || > + (file_out->f_flags & O_APPEND)) > + return -EBADF; > + > + /* Ensure offsets don't wrap. */ > + if (pos_in + count < pos_in || pos_out + count < pos_out) > + return -EOVERFLOW; > + > + size_in = i_size_read(inode_in); > + size_out = i_size_read(inode_out); > + > + /* If the source range crosses EOF, fail the copy */ > + if (pos_in >= size_in) > + return -EINVAL; > + if (pos_in + count > size_in) > + return -EINVAL; > + > + ret = generic_access_check_limits(file_in, pos_in, &count); > + if (ret) > + return ret; > + > + ret = generic_write_check_limits(file_out, pos_out, &count); > + if (ret) > + return ret; > + > + /* Don't allow overlapped copying within the same file. */ > + if (inode_in == inode_out && > + pos_out + count > pos_in && > + pos_out < pos_in + count) > + return -EINVAL; > + > + *req_count = count; > + return 0; > +} > + > int pagecache_write_begin(struct file *file, struct address_space *mapping, > loff_t pos, unsigned len, unsigned flags, > struct page **pagep, void **fsdata) > -- > 2.19.1 >
On Mon, Dec 03, 2018 at 04:33:30PM -0500, Olga Kornievskaia wrote: > On Mon, Dec 3, 2018 at 3:34 AM Dave Chinner <david@fromorbit.com> wrote: > > --- a/include/linux/fs.h > > +++ b/include/linux/fs.h > > @@ -3022,6 +3022,9 @@ extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); > > extern int generic_remap_checks(struct file *file_in, loff_t pos_in, > > struct file *file_out, loff_t pos_out, > > loff_t *count, unsigned int remap_flags); > > +extern int generic_copy_file_checks(struct file *file_in, loff_t pos_in, > > + struct file *file_out, loff_t pos_out, > > + size_t *count, unsigned int flags); > > extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); > > extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); > > extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); > > diff --git a/mm/filemap.c b/mm/filemap.c > > index 81adec8ee02c..0a170425935b 100644 > > --- a/mm/filemap.c > > +++ b/mm/filemap.c > > @@ -2975,6 +2975,75 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in, > > return 0; > > } > > > > + > > +/* > > + * Performs necessary checks before doing a file copy > > + * > > + * Can adjust amount of bytes to copy > > + * Returns appropriate error code that caller should return or > > + * zero in case the copy should be allowed. > > + */ > > +int generic_copy_file_checks(struct file *file_in, loff_t pos_in, > > + struct file *file_out, loff_t pos_out, > > + size_t *req_count, unsigned int flags) > > +{ > > + struct inode *inode_in = file_inode(file_in); > > + struct inode *inode_out = file_inode(file_out); > > + uint64_t count = *req_count; > > + uint64_t bcount; > > + loff_t size_in, size_out; > > + loff_t bs = inode_out->i_sb->s_blocksize; > > + int ret; > > I got compile warnings: > > mm/filemap.c: In function ‘generic_copy_file_checks’: > mm/filemap.c:2995:9: warning: unused variable ‘bs’ [-Wunused-variable] > loff_t bs = inode_out->i_sb->s_blocksize; > ^ > mm/filemap.c:2993:11: warning: unused variable ‘bcount’ [-Wunused-variable] > uint64_t bcount; Strange. Yes, they certainly are there when I compile my stack up to this point, but when I compile the whole series they aren't there. I'll fix it up. Cheers, Dave.
Ok, this fixes the earlier i_size() compile failure..
Dave Chinner <david@fromorbit.com> writes: <snip> > +int generic_copy_file_checks(struct file *file_in, loff_t pos_in, > + struct file *file_out, loff_t pos_out, > + size_t *req_count, unsigned int flags) > +{ <snip> > + /* Don't allow overlapped copying within the same file. */ > + if (inode_in == inode_out && > + pos_out + count > pos_in && > + pos_out < pos_in + count) > + return -EINVAL; I was wondering if, with the above check, it would make sense to also have an extra patch changing some filesystems (ceph, nfs and cifs) to simply return -EOPNOTSUPP (instead of -EINVAL) when inode_in == inode_out. Something like the diff below (not tested!). This caught my attention when I was running the latest generic xfstests on ceph and realised that I had some new failures due to the recently added copy_file_range support in fsx by Darrick. The failures were caused by the usage of the same fd both as source and destination. Cheers,
On Wed, Dec 12, 2018 at 11:31:23AM +0000, Luis Henriques wrote: > Dave Chinner <david@fromorbit.com> writes: > > <snip> > > > +int generic_copy_file_checks(struct file *file_in, loff_t pos_in, > > + struct file *file_out, loff_t pos_out, > > + size_t *req_count, unsigned int flags) > > +{ > > <snip> > > > + /* Don't allow overlapped copying within the same file. */ > > + if (inode_in == inode_out && > > + pos_out + count > pos_in && > > + pos_out < pos_in + count) > > + return -EINVAL; > > I was wondering if, with the above check, it would make sense to also > have an extra patch changing some filesystems (ceph, nfs and cifs) to > simply return -EOPNOTSUPP (instead of -EINVAL) when inode_in == > inode_out. Something like the diff below (not tested!). > > This caught my attention when I was running the latest generic xfstests > on ceph and realised that I had some new failures due to the recently > added copy_file_range support in fsx by Darrick. The failures were > caused by the usage of the same fd both as source and destination. Looks reasonable to /me/, since EOPNOTSUPP currently triggers the splice fallback. --D > Cheers, > -- > Luis > > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > index 189df668b6a0..c22ac60ec0ba 100644 > --- a/fs/ceph/file.c > +++ b/fs/ceph/file.c > @@ -1904,7 +1904,7 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, > bool do_final_copy = false; > > if (src_inode == dst_inode) > - return -EINVAL; > + return -EOPNOTSUPP; > if (ceph_snap(dst_inode) != CEPH_NOSNAP) > return -EROFS; > > diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c > index 865706edb307..d4f63eae531e 100644 > --- a/fs/cifs/cifsfs.c > +++ b/fs/cifs/cifsfs.c > @@ -1068,7 +1068,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, > cifs_dbg(FYI, "copychunk range\n"); > > if (src_inode == target_inode) { > - rc = -EINVAL; > + rc = -EOPNOTSUPP; > goto out; > } > > diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c > index 46d691ba04bc..910a2abade92 100644 > --- a/fs/nfs/nfs4file.c > +++ b/fs/nfs/nfs4file.c > @@ -136,7 +136,7 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, > ssize_t ret; > > if (file_inode(file_in) == file_inode(file_out)) > - return -EINVAL; > + return -EOPNOTSUPP; > retry: > ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); > if (ret == -EAGAIN)
On Wed, Dec 12, 2018 at 6:31 AM Luis Henriques <lhenriques@suse.com> wrote: > > Dave Chinner <david@fromorbit.com> writes: > > <snip> > > > +int generic_copy_file_checks(struct file *file_in, loff_t pos_in, > > + struct file *file_out, loff_t pos_out, > > + size_t *req_count, unsigned int flags) > > +{ > > <snip> > > > + /* Don't allow overlapped copying within the same file. */ > > + if (inode_in == inode_out && > > + pos_out + count > pos_in && > > + pos_out < pos_in + count) > > + return -EINVAL; > > I was wondering if, with the above check, it would make sense to also > have an extra patch changing some filesystems (ceph, nfs and cifs) to > simply return -EOPNOTSUPP (instead of -EINVAL) when inode_in == > inode_out. Something like the diff below (not tested!). > > This caught my attention when I was running the latest generic xfstests > on ceph and realised that I had some new failures due to the recently > added copy_file_range support in fsx by Darrick. The failures were > caused by the usage of the same fd both as source and destination. > > Cheers, > -- > Luis > > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > index 189df668b6a0..c22ac60ec0ba 100644 > --- a/fs/ceph/file.c > +++ b/fs/ceph/file.c > @@ -1904,7 +1904,7 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, > bool do_final_copy = false; > > if (src_inode == dst_inode) > - return -EINVAL; > + return -EOPNOTSUPP; > if (ceph_snap(dst_inode) != CEPH_NOSNAP) > return -EROFS; > > diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c > index 865706edb307..d4f63eae531e 100644 > --- a/fs/cifs/cifsfs.c > +++ b/fs/cifs/cifsfs.c > @@ -1068,7 +1068,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, > cifs_dbg(FYI, "copychunk range\n"); > > if (src_inode == target_inode) { > - rc = -EINVAL; > + rc = -EOPNOTSUPP; > goto out; > } > > diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c > index 46d691ba04bc..910a2abade92 100644 > --- a/fs/nfs/nfs4file.c > +++ b/fs/nfs/nfs4file.c > @@ -136,7 +136,7 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, > ssize_t ret; > > if (file_inode(file_in) == file_inode(file_out)) > - return -EINVAL; > + return -EOPNOTSUPP; Please don't change the NFS bits. This is against the NFS specifications. RFC 7862 15.2.3 (snippet) SAVED_FH and CURRENT_FH must be different files. If SAVED_FH and CURRENT_FH refer to the same file, the operation MUST fail with NFS4ERR_INVAL. > retry: > ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); > if (ret == -EAGAIN)
On Wed, Dec 12, 2018 at 01:55:28PM -0500, Olga Kornievskaia wrote: > On Wed, Dec 12, 2018 at 6:31 AM Luis Henriques <lhenriques@suse.com> wrote: > > I was wondering if, with the above check, it would make sense to also > > have an extra patch changing some filesystems (ceph, nfs and cifs) to > > simply return -EOPNOTSUPP (instead of -EINVAL) when inode_in == > > inode_out. Something like the diff below (not tested!). > > +++ b/fs/nfs/nfs4file.c > > @@ -136,7 +136,7 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, > > ssize_t ret; > > > > if (file_inode(file_in) == file_inode(file_out)) > > - return -EINVAL; > > + return -EOPNOTSUPP; > > Please don't change the NFS bits. This is against the NFS > specifications. RFC 7862 15.2.3 > > (snippet) > SAVED_FH and CURRENT_FH must be different files. If SAVED_FH and > CURRENT_FH refer to the same file, the operation MUST fail with > NFS4ERR_INVAL. I don't see how that applies. That refers to a requirement _in the protocol_ that determines what the server MUST do if the client sends it two FHs which refer to the same file. What we're talking about here is how a Linux filesystem behaves when receiving a copy_file_range() referring to the same file. As long as the Linux filesystem doesn't react by sending out one of these invalid protocol messages, I don't see the problem.
On Wed, Dec 12, 2018 at 2:43 PM Matthew Wilcox <willy@infradead.org> wrote: > > On Wed, Dec 12, 2018 at 01:55:28PM -0500, Olga Kornievskaia wrote: > > On Wed, Dec 12, 2018 at 6:31 AM Luis Henriques <lhenriques@suse.com> wrote: > > > I was wondering if, with the above check, it would make sense to also > > > have an extra patch changing some filesystems (ceph, nfs and cifs) to > > > simply return -EOPNOTSUPP (instead of -EINVAL) when inode_in == > > > inode_out. Something like the diff below (not tested!). > > > > +++ b/fs/nfs/nfs4file.c > > > @@ -136,7 +136,7 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, > > > ssize_t ret; > > > > > > if (file_inode(file_in) == file_inode(file_out)) > > > - return -EINVAL; > > > + return -EOPNOTSUPP; > > > > Please don't change the NFS bits. This is against the NFS > > specifications. RFC 7862 15.2.3 > > > > (snippet) > > SAVED_FH and CURRENT_FH must be different files. If SAVED_FH and > > CURRENT_FH refer to the same file, the operation MUST fail with > > NFS4ERR_INVAL. > > I don't see how that applies. That refers to a requirement _in the > protocol_ that determines what the server MUST do if the client sends > it two FHs which refer to the same file. > > What we're talking about here is how a Linux filesystem behaves when > receiving a copy_file_range() referring to the same file. As long as > the Linux filesystem doesn't react by sending out one of these invalid > protocol messages, I don't see the problem. Ok then this should be changed to call generic_copy_file_range() not returning the EOPNOTSUPP since there is no longer fallback in vfs to call the generic_copy_file_range() and in turn responsibility of each file system.
Olga Kornievskaia <olga.kornievskaia@gmail.com> writes: > On Wed, Dec 12, 2018 at 2:43 PM Matthew Wilcox <willy@infradead.org> wrote: >> >> On Wed, Dec 12, 2018 at 01:55:28PM -0500, Olga Kornievskaia wrote: >> > On Wed, Dec 12, 2018 at 6:31 AM Luis Henriques <lhenriques@suse.com> wrote: >> > > I was wondering if, with the above check, it would make sense to also >> > > have an extra patch changing some filesystems (ceph, nfs and cifs) to >> > > simply return -EOPNOTSUPP (instead of -EINVAL) when inode_in == >> > > inode_out. Something like the diff below (not tested!). >> >> > > +++ b/fs/nfs/nfs4file.c >> > > @@ -136,7 +136,7 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, >> > > ssize_t ret; >> > > >> > > if (file_inode(file_in) == file_inode(file_out)) >> > > - return -EINVAL; >> > > + return -EOPNOTSUPP; >> > >> > Please don't change the NFS bits. This is against the NFS >> > specifications. RFC 7862 15.2.3 >> > >> > (snippet) >> > SAVED_FH and CURRENT_FH must be different files. If SAVED_FH and >> > CURRENT_FH refer to the same file, the operation MUST fail with >> > NFS4ERR_INVAL. >> >> I don't see how that applies. That refers to a requirement _in the >> protocol_ that determines what the server MUST do if the client sends >> it two FHs which refer to the same file. >> >> What we're talking about here is how a Linux filesystem behaves when >> receiving a copy_file_range() referring to the same file. As long as >> the Linux filesystem doesn't react by sending out one of these invalid >> protocol messages, I don't see the problem. > > Ok then this should be changed to call generic_copy_file_range() not > returning the EOPNOTSUPP since there is no longer fallback in vfs to > call the generic_copy_file_range() and in turn responsibility of each > file system. Ah, I didn't look close enough and didn't realised the nfs code was doing something slightly different from the other 2 FSs. In that case simply deleting that check seems to be enough to fallback to the vfs generic_copy_file_range. Anyway, please find below an updated patch (with proper changelog). Cheers,
diff --git a/fs/read_write.c b/fs/read_write.c index 44339b44accc..69809345977e 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1578,7 +1578,7 @@ static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, return file_out->f_op->copy_file_range(file_in, pos_in, file_out, pos_out, len, flags); - return generic_copy_file_range(file_in, &pos_in, file_out, &pos_out, + return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, flags); } @@ -1598,10 +1598,14 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, if (flags != 0) return -EINVAL; - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) - return -EISDIR; - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) - return -EINVAL; + /* this could be relaxed once a method supports cross-fs copies */ + if (inode_in->i_sb != inode_out->i_sb) + return -EXDEV; + + ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, + flags); + if (ret < 0) + return ret; ret = rw_verify_area(READ, file_in, &pos_in, len); if (unlikely(ret)) @@ -1611,22 +1615,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, if (unlikely(ret)) return ret; - if (!(file_in->f_mode & FMODE_READ) || - !(file_out->f_mode & FMODE_WRITE) || - (file_out->f_flags & O_APPEND)) - return -EBADF; - - /* this could be relaxed once a method supports cross-fs copies */ - if (inode_in->i_sb != inode_out->i_sb) - return -EXDEV; - if (len == 0) return 0; - /* If the source range crosses EOF, fail the copy */ - if (pos_in >= i_size(inode_in) || pos_in + len > i_size(inode_in)) - return -EINVAL; - file_start_write(file_out); /* diff --git a/include/linux/fs.h b/include/linux/fs.h index a4478764cf63..0d9d2d93d4df 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3022,6 +3022,9 @@ extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); extern int generic_remap_checks(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *count, unsigned int remap_flags); +extern int generic_copy_file_checks(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t *count, unsigned int flags); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); diff --git a/mm/filemap.c b/mm/filemap.c index 81adec8ee02c..0a170425935b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2975,6 +2975,75 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in, return 0; } + +/* + * Performs necessary checks before doing a file copy + * + * Can adjust amount of bytes to copy + * Returns appropriate error code that caller should return or + * zero in case the copy should be allowed. + */ +int generic_copy_file_checks(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t *req_count, unsigned int flags) +{ + struct inode *inode_in = file_inode(file_in); + struct inode *inode_out = file_inode(file_out); + uint64_t count = *req_count; + uint64_t bcount; + loff_t size_in, size_out; + loff_t bs = inode_out->i_sb->s_blocksize; + int ret; + + /* Don't touch certain kinds of inodes */ + if (IS_IMMUTABLE(inode_out)) + return -EPERM; + + if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) + return -ETXTBSY; + + /* Don't copy dirs, pipes, sockets... */ + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) + return -EISDIR; + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) + return -EINVAL; + + if (!(file_in->f_mode & FMODE_READ) || + !(file_out->f_mode & FMODE_WRITE) || + (file_out->f_flags & O_APPEND)) + return -EBADF; + + /* Ensure offsets don't wrap. */ + if (pos_in + count < pos_in || pos_out + count < pos_out) + return -EOVERFLOW; + + size_in = i_size_read(inode_in); + size_out = i_size_read(inode_out); + + /* If the source range crosses EOF, fail the copy */ + if (pos_in >= size_in) + return -EINVAL; + if (pos_in + count > size_in) + return -EINVAL; + + ret = generic_access_check_limits(file_in, pos_in, &count); + if (ret) + return ret; + + ret = generic_write_check_limits(file_out, pos_out, &count); + if (ret) + return ret; + + /* Don't allow overlapped copying within the same file. */ + if (inode_in == inode_out && + pos_out + count > pos_in && + pos_out < pos_in + count) + return -EINVAL; + + *req_count = count; + return 0; +} + int pagecache_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata)