Message ID | 20181203083416.28978-10-david@fromorbit.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | fs: fixes for major copy_file_range() issues | expand |
On Mon, Dec 3, 2018 at 10:34 AM Dave Chinner <david@fromorbit.com> wrote: > > From: Dave Chinner <dchinner@redhat.com> > > We want to enable cross-filesystem copy_file_range functionality > where possible, so push the "same superblock only" checks down to > the individual filesystem callouts so they can make their own > decisions about cross-superblock copy offload. > > Signed-off-by: Dave Chinner <dchinner@redhat.com> Looks good. You may add Reviewed-by: Amir Goldstein <amir73il@gmail.com> Similar comment about overlayfs as patch 3. diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c > index 68736e5d6a56..34fb0398d016 100644 > --- a/fs/overlayfs/file.c > +++ b/fs/overlayfs/file.c > @@ -443,6 +443,14 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, > const struct cred *old_cred; > loff_t ret; > > + /* > + * Temporary. Cross device copy checks should be left to the copy file > + * call on the real inodes, but existing behaviour checks the upper > + * files only. > + */ > + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) > + return -EXDEV; > + > ret = ovl_real_fdget(file_out, &real_out); > if (ret) > return ret; > @@ -491,7 +499,7 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, > ret = ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, > OVL_COPY); > > - if (ret == -EOPNOTSUPP) > + if (ret == -EOPNOTSUPP || ret == -EXDEV) > ret = generic_copy_file_range(file_in, pos_in, file_out, > pos_out, len, flags); This fallback is already provided by vfs_copy_file_range(). Thanks, Amir.
On Mon, Dec 3, 2018 at 3:34 AM Dave Chinner <david@fromorbit.com> wrote: > > From: Dave Chinner <dchinner@redhat.com> > > We want to enable cross-filesystem copy_file_range functionality > where possible, so push the "same superblock only" checks down to > the individual filesystem callouts so they can make their own > decisions about cross-superblock copy offload. > > Signed-off-by: Dave Chinner <dchinner@redhat.com> Overall VFS/NFS bits look good to me. I'm re-basing my client and server patch series on top of this and will test it out. Thank you. > --- > fs/ceph/file.c | 4 +++- > fs/cifs/cifsfs.c | 8 +++++++- > fs/fuse/file.c | 5 ++++- > fs/nfs/nfs4file.c | 16 ++++++++++------ > fs/overlayfs/file.c | 10 +++++++++- > fs/read_write.c | 10 ++++------ > 6 files changed, 37 insertions(+), 16 deletions(-) > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > index cf29f0410dcb..eb876e19c1dc 100644 > --- a/fs/ceph/file.c > +++ b/fs/ceph/file.c > @@ -1905,6 +1905,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, > > if (src_inode == dst_inode) > return -EINVAL; > + if (src_inode->i_sb != dst_inode->i_sb) > + return -EXDEV; > if (ceph_snap(dst_inode) != CEPH_NOSNAP) > return -EROFS; > > @@ -2105,7 +2107,7 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, > ret = __ceph_copy_file_range(src_file, src_off, dst_file, dst_off, > len, flags); > > - if (ret == -EOPNOTSUPP) > + if (ret == -EOPNOTSUPP || ret == -EXDEV) > ret = generic_copy_file_range(src_file, src_off, dst_file, > dst_off, len, flags); > return ret; > diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c > index 5ef4baec6234..03e4b9eacbd1 100644 > --- a/fs/cifs/cifsfs.c > +++ b/fs/cifs/cifsfs.c > @@ -1072,6 +1072,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, > goto out; > } > > + if (src_inode->i_sb != target_inode->i_sb) { > + rc = -EXDEV; > + goto out; > + } > + > + > if (!src_file->private_data || !dst_file->private_data) { > rc = -EBADF; > cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); > @@ -1142,7 +1148,7 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, > len, flags); > free_xid(xid); > > - if (rc == -EOPNOTSUPP) > + if (rc == -EOPNOTSUPP || rc == -EXDEV) > rc = generic_copy_file_range(src_file, off, dst_file, > destoff, len, flags); > return rc; > diff --git a/fs/fuse/file.c b/fs/fuse/file.c > index b86fb0298739..0758f831a4eb 100644 > --- a/fs/fuse/file.c > +++ b/fs/fuse/file.c > @@ -3053,6 +3053,9 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, > if (fc->no_copy_file_range) > return -EOPNOTSUPP; > > + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) > + return -EXDEV; > + > inode_lock(inode_out); > > if (fc->writeback_cache) { > @@ -3109,7 +3112,7 @@ static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off, > ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off, > len, flags); > > - if (ret == -EOPNOTSUPP) > + if (ret == -EOPNOTSUPP || ret == -EXDEV) > ret = generic_copy_file_range(src_file, src_off, dst_file, > dst_off, len, flags); > return ret; > diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c > index d7766a6eb0f4..4783c0c1c49e 100644 > --- a/fs/nfs/nfs4file.c > +++ b/fs/nfs/nfs4file.c > @@ -133,16 +133,20 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, > struct file *file_out, loff_t pos_out, > size_t count, unsigned int flags) > { > - ssize_t ret; > + ssize_t ret = -EXDEV; > > if (file_inode(file_in) == file_inode(file_out)) > return -EINVAL; > -retry: > - ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); > - if (ret == -EAGAIN) > - goto retry; > > - if (ret == -EOPNOTSUPP) > + /* only offload copy if superblock is the same */ > + if (file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { > + do { > + ret = nfs42_proc_copy(file_in, pos_in, file_out, > + pos_out, count); > + } while (ret == -EAGAIN); > + } > + > + if (ret == -EOPNOTSUPP || ret == -EXDEV) > ret = generic_copy_file_range(file_in, pos_in, file_out, > pos_out, count, flags); > return ret; > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c > index 68736e5d6a56..34fb0398d016 100644 > --- a/fs/overlayfs/file.c > +++ b/fs/overlayfs/file.c > @@ -443,6 +443,14 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, > const struct cred *old_cred; > loff_t ret; > > + /* > + * Temporary. Cross device copy checks should be left to the copy file > + * call on the real inodes, but existing behaviour checks the upper > + * files only. > + */ > + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) > + return -EXDEV; > + > ret = ovl_real_fdget(file_out, &real_out); > if (ret) > return ret; > @@ -491,7 +499,7 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, > ret = ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, > OVL_COPY); > > - if (ret == -EOPNOTSUPP) > + if (ret == -EOPNOTSUPP || ret == -EXDEV) > ret = generic_copy_file_range(file_in, pos_in, file_out, > pos_out, len, flags); > return ret; > diff --git a/fs/read_write.c b/fs/read_write.c > index 174cf92eea1d..4e0666de0d69 100644 > --- a/fs/read_write.c > +++ b/fs/read_write.c > @@ -1565,6 +1565,10 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, > struct file *file_out, loff_t pos_out, > size_t len, unsigned int flags) > { > + /* Temporary, do_splice_direct supports cross-sb copies */ > + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) > + return -EXDEV; > + > return do_splice_direct(file_in, &pos_in, file_out, &pos_out, > len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); > } > @@ -1611,17 +1615,11 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, > struct file *file_out, loff_t pos_out, > size_t len, unsigned int flags) > { > - struct inode *inode_in = file_inode(file_in); > - struct inode *inode_out = file_inode(file_out); > ssize_t ret; > > if (flags != 0) > return -EINVAL; > > - /* this could be relaxed once a method supports cross-fs copies */ > - if (inode_in->i_sb != inode_out->i_sb) > - return -EXDEV; > - > ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, > flags); > if (ret < 0) > -- > 2.19.1 >
On Mon, 2018-12-03 at 19:34 +1100, Dave Chinner wrote: > From: Dave Chinner <dchinner@redhat.com> > > We want to enable cross-filesystem copy_file_range functionality > where possible, so push the "same superblock only" checks down to > the individual filesystem callouts so they can make their own > decisions about cross-superblock copy offload. > > Signed-off-by: Dave Chinner <dchinner@redhat.com> > --- > fs/ceph/file.c | 4 +++- > fs/cifs/cifsfs.c | 8 +++++++- > fs/fuse/file.c | 5 ++++- > fs/nfs/nfs4file.c | 16 ++++++++++------ > fs/overlayfs/file.c | 10 +++++++++- > fs/read_write.c | 10 ++++------ > 6 files changed, 37 insertions(+), 16 deletions(-) > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > index cf29f0410dcb..eb876e19c1dc 100644 > --- a/fs/ceph/file.c > +++ b/fs/ceph/file.c > @@ -1905,6 +1905,8 @@ static ssize_t __ceph_copy_file_range(struct file > *src_file, loff_t src_off, > > if (src_inode == dst_inode) > return -EINVAL; > + if (src_inode->i_sb != dst_inode->i_sb) > + return -EXDEV; > if (ceph_snap(dst_inode) != CEPH_NOSNAP) > return -EROFS; > > @@ -2105,7 +2107,7 @@ static ssize_t ceph_copy_file_range(struct file > *src_file, loff_t src_off, > ret = __ceph_copy_file_range(src_file, src_off, dst_file, dst_off, > len, flags); > > - if (ret == -EOPNOTSUPP) > + if (ret == -EOPNOTSUPP || ret == -EXDEV) > ret = generic_copy_file_range(src_file, src_off, dst_file, > dst_off, len, flags); > return ret; > diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c > index 5ef4baec6234..03e4b9eacbd1 100644 > --- a/fs/cifs/cifsfs.c > +++ b/fs/cifs/cifsfs.c > @@ -1072,6 +1072,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, > goto out; > } > > + if (src_inode->i_sb != target_inode->i_sb) { > + rc = -EXDEV; > + goto out; > + } > + > + > if (!src_file->private_data || !dst_file->private_data) { > rc = -EBADF; > cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); > @@ -1142,7 +1148,7 @@ static ssize_t cifs_copy_file_range(struct file > *src_file, loff_t off, > len, flags); > free_xid(xid); > > - if (rc == -EOPNOTSUPP) > + if (rc == -EOPNOTSUPP || rc == -EXDEV) > rc = generic_copy_file_range(src_file, off, dst_file, > destoff, len, flags); > return rc; > diff --git a/fs/fuse/file.c b/fs/fuse/file.c > index b86fb0298739..0758f831a4eb 100644 > --- a/fs/fuse/file.c > +++ b/fs/fuse/file.c > @@ -3053,6 +3053,9 @@ static ssize_t __fuse_copy_file_range(struct file > *file_in, loff_t pos_in, > if (fc->no_copy_file_range) > return -EOPNOTSUPP; > > + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) > + return -EXDEV; > + > inode_lock(inode_out); > > if (fc->writeback_cache) { > @@ -3109,7 +3112,7 @@ static ssize_t fuse_copy_file_range(struct file > *src_file, loff_t src_off, > ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off, > len, flags); > > - if (ret == -EOPNOTSUPP) > + if (ret == -EOPNOTSUPP || ret == -EXDEV) > ret = generic_copy_file_range(src_file, src_off, dst_file, > dst_off, len, flags); > return ret; > diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c > index d7766a6eb0f4..4783c0c1c49e 100644 > --- a/fs/nfs/nfs4file.c > +++ b/fs/nfs/nfs4file.c > @@ -133,16 +133,20 @@ static ssize_t nfs4_copy_file_range(struct file > *file_in, loff_t pos_in, > struct file *file_out, loff_t pos_out, > size_t count, unsigned int flags) > { > - ssize_t ret; > + ssize_t ret = -EXDEV; > > if (file_inode(file_in) == file_inode(file_out)) > return -EINVAL; > -retry: > - ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); > - if (ret == -EAGAIN) > - goto retry; > > - if (ret == -EOPNOTSUPP) > + /* only offload copy if superblock is the same */ > + if (file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { > + do { > + ret = nfs42_proc_copy(file_in, pos_in, file_out, > + pos_out, count); > + } while (ret == -EAGAIN); I'm not convinced we can actually return -EAGAIN from nfs42_proc_copy(). The nfs_get_lock_context() function doesn't return it, and if _nfs42_proc_copy() returns -EAGAIN it's immediately retried by nfs42_proc_copy() instead of returning. Olga, am I missing something here? Anna > + } > + > + if (ret == -EOPNOTSUPP || ret == -EXDEV) > ret = generic_copy_file_range(file_in, pos_in, file_out, > pos_out, count, flags); > return ret; > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c > index 68736e5d6a56..34fb0398d016 100644 > --- a/fs/overlayfs/file.c > +++ b/fs/overlayfs/file.c > @@ -443,6 +443,14 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t > pos_in, > const struct cred *old_cred; > loff_t ret; > > + /* > + * Temporary. Cross device copy checks should be left to the copy file > + * call on the real inodes, but existing behaviour checks the upper > + * files only. > + */ > + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) > + return -EXDEV; > + > ret = ovl_real_fdget(file_out, &real_out); > if (ret) > return ret; > @@ -491,7 +499,7 @@ static ssize_t ovl_copy_file_range(struct file *file_in, > loff_t pos_in, > ret = ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, > OVL_COPY); > > - if (ret == -EOPNOTSUPP) > + if (ret == -EOPNOTSUPP || ret == -EXDEV) > ret = generic_copy_file_range(file_in, pos_in, file_out, > pos_out, len, flags); > return ret; > diff --git a/fs/read_write.c b/fs/read_write.c > index 174cf92eea1d..4e0666de0d69 100644 > --- a/fs/read_write.c > +++ b/fs/read_write.c > @@ -1565,6 +1565,10 @@ ssize_t generic_copy_file_range(struct file *file_in, > loff_t pos_in, > struct file *file_out, loff_t pos_out, > size_t len, unsigned int flags) > { > + /* Temporary, do_splice_direct supports cross-sb copies */ > + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) > + return -EXDEV; > + > return do_splice_direct(file_in, &pos_in, file_out, &pos_out, > len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); > } > @@ -1611,17 +1615,11 @@ ssize_t vfs_copy_file_range(struct file *file_in, > loff_t pos_in, > struct file *file_out, loff_t pos_out, > size_t len, unsigned int flags) > { > - struct inode *inode_in = file_inode(file_in); > - struct inode *inode_out = file_inode(file_out); > ssize_t ret; > > if (flags != 0) > return -EINVAL; > > - /* this could be relaxed once a method supports cross-fs copies */ > - if (inode_in->i_sb != inode_out->i_sb) > - return -EXDEV; > - > ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, > flags); > if (ret < 0)
On Mon, Dec 3, 2018 at 1:53 PM Anna Schumaker <schumaker.anna@gmail.com> wrote: > > On Mon, 2018-12-03 at 19:34 +1100, Dave Chinner wrote: > > From: Dave Chinner <dchinner@redhat.com> > > > > We want to enable cross-filesystem copy_file_range functionality > > where possible, so push the "same superblock only" checks down to > > the individual filesystem callouts so they can make their own > > decisions about cross-superblock copy offload. > > > > Signed-off-by: Dave Chinner <dchinner@redhat.com> > > --- > > fs/ceph/file.c | 4 +++- > > fs/cifs/cifsfs.c | 8 +++++++- > > fs/fuse/file.c | 5 ++++- > > fs/nfs/nfs4file.c | 16 ++++++++++------ > > fs/overlayfs/file.c | 10 +++++++++- > > fs/read_write.c | 10 ++++------ > > 6 files changed, 37 insertions(+), 16 deletions(-) > > > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > > index cf29f0410dcb..eb876e19c1dc 100644 > > --- a/fs/ceph/file.c > > +++ b/fs/ceph/file.c > > @@ -1905,6 +1905,8 @@ static ssize_t __ceph_copy_file_range(struct file > > *src_file, loff_t src_off, > > > > if (src_inode == dst_inode) > > return -EINVAL; > > + if (src_inode->i_sb != dst_inode->i_sb) > > + return -EXDEV; > > if (ceph_snap(dst_inode) != CEPH_NOSNAP) > > return -EROFS; > > > > @@ -2105,7 +2107,7 @@ static ssize_t ceph_copy_file_range(struct file > > *src_file, loff_t src_off, > > ret = __ceph_copy_file_range(src_file, src_off, dst_file, dst_off, > > len, flags); > > > > - if (ret == -EOPNOTSUPP) > > + if (ret == -EOPNOTSUPP || ret == -EXDEV) > > ret = generic_copy_file_range(src_file, src_off, dst_file, > > dst_off, len, flags); > > return ret; > > diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c > > index 5ef4baec6234..03e4b9eacbd1 100644 > > --- a/fs/cifs/cifsfs.c > > +++ b/fs/cifs/cifsfs.c > > @@ -1072,6 +1072,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, > > goto out; > > } > > > > + if (src_inode->i_sb != target_inode->i_sb) { > > + rc = -EXDEV; > > + goto out; > > + } > > + > > + > > if (!src_file->private_data || !dst_file->private_data) { > > rc = -EBADF; > > cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); > > @@ -1142,7 +1148,7 @@ static ssize_t cifs_copy_file_range(struct file > > *src_file, loff_t off, > > len, flags); > > free_xid(xid); > > > > - if (rc == -EOPNOTSUPP) > > + if (rc == -EOPNOTSUPP || rc == -EXDEV) > > rc = generic_copy_file_range(src_file, off, dst_file, > > destoff, len, flags); > > return rc; > > diff --git a/fs/fuse/file.c b/fs/fuse/file.c > > index b86fb0298739..0758f831a4eb 100644 > > --- a/fs/fuse/file.c > > +++ b/fs/fuse/file.c > > @@ -3053,6 +3053,9 @@ static ssize_t __fuse_copy_file_range(struct file > > *file_in, loff_t pos_in, > > if (fc->no_copy_file_range) > > return -EOPNOTSUPP; > > > > + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) > > + return -EXDEV; > > + > > inode_lock(inode_out); > > > > if (fc->writeback_cache) { > > @@ -3109,7 +3112,7 @@ static ssize_t fuse_copy_file_range(struct file > > *src_file, loff_t src_off, > > ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off, > > len, flags); > > > > - if (ret == -EOPNOTSUPP) > > + if (ret == -EOPNOTSUPP || ret == -EXDEV) > > ret = generic_copy_file_range(src_file, src_off, dst_file, > > dst_off, len, flags); > > return ret; > > diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c > > index d7766a6eb0f4..4783c0c1c49e 100644 > > --- a/fs/nfs/nfs4file.c > > +++ b/fs/nfs/nfs4file.c > > @@ -133,16 +133,20 @@ static ssize_t nfs4_copy_file_range(struct file > > *file_in, loff_t pos_in, > > struct file *file_out, loff_t pos_out, > > size_t count, unsigned int flags) > > { > > - ssize_t ret; > > + ssize_t ret = -EXDEV; > > > > if (file_inode(file_in) == file_inode(file_out)) > > return -EINVAL; > > -retry: > > - ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); > > - if (ret == -EAGAIN) > > - goto retry; > > > > - if (ret == -EOPNOTSUPP) > > + /* only offload copy if superblock is the same */ > > + if (file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { > > + do { > > + ret = nfs42_proc_copy(file_in, pos_in, file_out, > > + pos_out, count); > > + } while (ret == -EAGAIN); > > I'm not convinced we can actually return -EAGAIN from nfs42_proc_copy(). The > nfs_get_lock_context() function doesn't return it, and if _nfs42_proc_copy() > returns -EAGAIN it's immediately retried by nfs42_proc_copy() instead of > returning. > > Olga, am I missing something here? I'll update it in the client patches that are coming out. > Anna > > > + } > > + > > + if (ret == -EOPNOTSUPP || ret == -EXDEV) > > ret = generic_copy_file_range(file_in, pos_in, file_out, > > pos_out, count, flags); > > return ret; > > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c > > index 68736e5d6a56..34fb0398d016 100644 > > --- a/fs/overlayfs/file.c > > +++ b/fs/overlayfs/file.c > > @@ -443,6 +443,14 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t > > pos_in, > > const struct cred *old_cred; > > loff_t ret; > > > > + /* > > + * Temporary. Cross device copy checks should be left to the copy file > > + * call on the real inodes, but existing behaviour checks the upper > > + * files only. > > + */ > > + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) > > + return -EXDEV; > > + > > ret = ovl_real_fdget(file_out, &real_out); > > if (ret) > > return ret; > > @@ -491,7 +499,7 @@ static ssize_t ovl_copy_file_range(struct file *file_in, > > loff_t pos_in, > > ret = ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, > > OVL_COPY); > > > > - if (ret == -EOPNOTSUPP) > > + if (ret == -EOPNOTSUPP || ret == -EXDEV) > > ret = generic_copy_file_range(file_in, pos_in, file_out, > > pos_out, len, flags); > > return ret; > > diff --git a/fs/read_write.c b/fs/read_write.c > > index 174cf92eea1d..4e0666de0d69 100644 > > --- a/fs/read_write.c > > +++ b/fs/read_write.c > > @@ -1565,6 +1565,10 @@ ssize_t generic_copy_file_range(struct file *file_in, > > loff_t pos_in, > > struct file *file_out, loff_t pos_out, > > size_t len, unsigned int flags) > > { > > + /* Temporary, do_splice_direct supports cross-sb copies */ > > + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) > > + return -EXDEV; > > + > > return do_splice_direct(file_in, &pos_in, file_out, &pos_out, > > len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); > > } > > @@ -1611,17 +1615,11 @@ ssize_t vfs_copy_file_range(struct file *file_in, > > loff_t pos_in, > > struct file *file_out, loff_t pos_out, > > size_t len, unsigned int flags) > > { > > - struct inode *inode_in = file_inode(file_in); > > - struct inode *inode_out = file_inode(file_out); > > ssize_t ret; > > > > if (flags != 0) > > return -EINVAL; > > > > - /* this could be relaxed once a method supports cross-fs copies */ > > - if (inode_in->i_sb != inode_out->i_sb) > > - return -EXDEV; > > - > > ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, > > flags); > > if (ret < 0) >
On Mon, Dec 03, 2018 at 01:53:35PM -0500, Anna Schumaker wrote: > On Mon, 2018-12-03 at 19:34 +1100, Dave Chinner wrote: > > From: Dave Chinner <dchinner@redhat.com> > > > > We want to enable cross-filesystem copy_file_range functionality > > where possible, so push the "same superblock only" checks down to > > the individual filesystem callouts so they can make their own > > decisions about cross-superblock copy offload. .... > > diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c > > index d7766a6eb0f4..4783c0c1c49e 100644 > > --- a/fs/nfs/nfs4file.c > > +++ b/fs/nfs/nfs4file.c > > @@ -133,16 +133,20 @@ static ssize_t nfs4_copy_file_range(struct file > > *file_in, loff_t pos_in, > > struct file *file_out, loff_t pos_out, > > size_t count, unsigned int flags) > > { > > - ssize_t ret; > > + ssize_t ret = -EXDEV; > > > > if (file_inode(file_in) == file_inode(file_out)) > > return -EINVAL; > > -retry: > > - ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); > > - if (ret == -EAGAIN) > > - goto retry; > > > > - if (ret == -EOPNOTSUPP) > > + /* only offload copy if superblock is the same */ > > + if (file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { > > + do { > > + ret = nfs42_proc_copy(file_in, pos_in, file_out, > > + pos_out, count); > > + } while (ret == -EAGAIN); > > I'm not convinced we can actually return -EAGAIN from nfs42_proc_copy(). The > nfs_get_lock_context() function doesn't return it, and if _nfs42_proc_copy() > returns -EAGAIN it's immediately retried by nfs42_proc_copy() instead of > returning. Not really my concern, nor something that should be fixed in this patchset. i.e. the function does the same thing before and after this patch, so whether EAGAIN can occurr or not is irrelevant to this patchset.... Cheers, Dave.
Well, this isn't bugfixes anymore, but adding new features..
On Tue, Dec 04, 2018 at 07:43:47AM -0800, Christoph Hellwig wrote:
> Well, this isn't bugfixes anymore, but adding new features..
I made that perfectly clear in the cover description. I called it
twice, one of them explicitly stating that this series made these
infrastructure changes because we have pending functionality that
dependents on cross-device copies being supported in a sane manner.
I'll drop it if you want, but then I'll just have to come back after
all the NFS code is merged and do yet more cleanup work.
Cheers,
Dave.
On Tue, Dec 4, 2018 at 5:18 PM Dave Chinner <david@fromorbit.com> wrote: > > On Tue, Dec 04, 2018 at 07:43:47AM -0800, Christoph Hellwig wrote: > > Well, this isn't bugfixes anymore, but adding new features.. > > I made that perfectly clear in the cover description. I called it > twice, one of them explicitly stating that this series made these > infrastructure changes because we have pending functionality that > dependents on cross-device copies being supported in a sane manner. > > I'll drop it if you want, but then I'll just have to come back after > all the NFS code is merged and do yet more cleanup work. This doesn't needs to be fixed in this patch series. I think Anna was pointing out to me for something to take a look at. > > Cheers, > > Dave. > -- > Dave Chinner > david@fromorbit.com
On Wed, Dec 05, 2018 at 09:18:47AM +1100, Dave Chinner wrote: > I'll drop it if you want, but then I'll just have to come back after > all the NFS code is merged and do yet more cleanup work. IFF we want these NFS "features" we'll have to get it right before merging the code. But even with that I'd rather fix the glaring issues you are fixing in your first patches as a priority before adding more features. In other words: don't worry about NFS, lets get the existing code right before worrying about the next round of potential issues.
On Wed, Dec 5, 2018 at 9:09 AM Christoph Hellwig <hch@infradead.org> wrote: > > On Wed, Dec 05, 2018 at 09:18:47AM +1100, Dave Chinner wrote: > > I'll drop it if you want, but then I'll just have to come back after > > all the NFS code is merged and do yet more cleanup work. > > IFF we want these NFS "features" we'll have to get it right before > merging the code. But even with that I'd rather fix the glaring > issues you are fixing in your first patches as a priority before > adding more features. In other words: don't worry about NFS, lets > get the existing code right before worrying about the next round > of potential issues. Dave, Do you mind in v2 removing the 'retry, ret=EAGAIN' piece and leave the call to the nfs42_copy_file_range() (with the superblock block check)? If not, I could provide the patch. This is a piece of code that got in as a part of the async copy patches and it was meant for the upcoming server-to-server series. This code will go right back in with the next series. But since dead piece of code is glaring wrong currently by all means let's fix it. Thank you.
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index cf29f0410dcb..eb876e19c1dc 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1905,6 +1905,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, if (src_inode == dst_inode) return -EINVAL; + if (src_inode->i_sb != dst_inode->i_sb) + return -EXDEV; if (ceph_snap(dst_inode) != CEPH_NOSNAP) return -EROFS; @@ -2105,7 +2107,7 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, ret = __ceph_copy_file_range(src_file, src_off, dst_file, dst_off, len, flags); - if (ret == -EOPNOTSUPP) + if (ret == -EOPNOTSUPP || ret == -EXDEV) ret = generic_copy_file_range(src_file, src_off, dst_file, dst_off, len, flags); return ret; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 5ef4baec6234..03e4b9eacbd1 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1072,6 +1072,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, goto out; } + if (src_inode->i_sb != target_inode->i_sb) { + rc = -EXDEV; + goto out; + } + + if (!src_file->private_data || !dst_file->private_data) { rc = -EBADF; cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); @@ -1142,7 +1148,7 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, len, flags); free_xid(xid); - if (rc == -EOPNOTSUPP) + if (rc == -EOPNOTSUPP || rc == -EXDEV) rc = generic_copy_file_range(src_file, off, dst_file, destoff, len, flags); return rc; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b86fb0298739..0758f831a4eb 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3053,6 +3053,9 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, if (fc->no_copy_file_range) return -EOPNOTSUPP; + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) + return -EXDEV; + inode_lock(inode_out); if (fc->writeback_cache) { @@ -3109,7 +3112,7 @@ static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off, ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off, len, flags); - if (ret == -EOPNOTSUPP) + if (ret == -EOPNOTSUPP || ret == -EXDEV) ret = generic_copy_file_range(src_file, src_off, dst_file, dst_off, len, flags); return ret; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index d7766a6eb0f4..4783c0c1c49e 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -133,16 +133,20 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t count, unsigned int flags) { - ssize_t ret; + ssize_t ret = -EXDEV; if (file_inode(file_in) == file_inode(file_out)) return -EINVAL; -retry: - ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); - if (ret == -EAGAIN) - goto retry; - if (ret == -EOPNOTSUPP) + /* only offload copy if superblock is the same */ + if (file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { + do { + ret = nfs42_proc_copy(file_in, pos_in, file_out, + pos_out, count); + } while (ret == -EAGAIN); + } + + if (ret == -EOPNOTSUPP || ret == -EXDEV) ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, count, flags); return ret; diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 68736e5d6a56..34fb0398d016 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -443,6 +443,14 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, const struct cred *old_cred; loff_t ret; + /* + * Temporary. Cross device copy checks should be left to the copy file + * call on the real inodes, but existing behaviour checks the upper + * files only. + */ + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) + return -EXDEV; + ret = ovl_real_fdget(file_out, &real_out); if (ret) return ret; @@ -491,7 +499,7 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, ret = ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, OVL_COPY); - if (ret == -EOPNOTSUPP) + if (ret == -EOPNOTSUPP || ret == -EXDEV) ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, flags); return ret; diff --git a/fs/read_write.c b/fs/read_write.c index 174cf92eea1d..4e0666de0d69 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1565,6 +1565,10 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { + /* Temporary, do_splice_direct supports cross-sb copies */ + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) + return -EXDEV; + return do_splice_direct(file_in, &pos_in, file_out, &pos_out, len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); } @@ -1611,17 +1615,11 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { - struct inode *inode_in = file_inode(file_in); - struct inode *inode_out = file_inode(file_out); ssize_t ret; if (flags != 0) return -EINVAL; - /* this could be relaxed once a method supports cross-fs copies */ - if (inode_in->i_sb != inode_out->i_sb) - return -EXDEV; - ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, flags); if (ret < 0)