Message ID | 20181203083416.28978-4-david@fromorbit.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | fs: fixes for major copy_file_range() issues | expand |
On Mon, Dec 3, 2018 at 10:34 AM Dave Chinner <david@fromorbit.com> wrote: > > From: Dave Chinner <dchinner@redhat.com> > > Now that we have generic_copy_file_range(), remove it as a fallback > case when offloads fail. This puts the responsibility for executing > fallbacks on the filesystems that implement ->copy_file_range and > allows us to add operational validity checks to > generic_copy_file_range(). > > Rework vfs_copy_file_range() to call a new do_copy_file_range() > helper to exceute the copying callout, and move calls to > generic_file_copy_range() into filesystem methods where they > currently return failures. > > Signed-off-by: Dave Chinner <dchinner@redhat.com> You may add Reviewed-by: Amir Goldstein <amir73il@gmail.com> After fixing the overlayfs issue below. ... > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c > index 84dd957efa24..68736e5d6a56 100644 > --- a/fs/overlayfs/file.c > +++ b/fs/overlayfs/file.c > @@ -486,8 +486,15 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, > struct file *file_out, loff_t pos_out, > size_t len, unsigned int flags) > { > - return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, > + ssize_t ret; > + > + ret = ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, > OVL_COPY); > + > + if (ret == -EOPNOTSUPP) > + ret = generic_copy_file_range(file_in, pos_in, file_out, > + pos_out, len, flags); > + return ret; > } > This is unneeded, because ovl_copyfile(OVL_COPY) is implemented by calling vfs_copy_file_range() (on the underlying files) and it is not possible to get EOPNOTSUPP from vfs_copy_file_range(). Thanks, Amir.
Hi Dave, On Mon, 2018-12-03 at 19:34 +1100, Dave Chinner wrote: > From: Dave Chinner <dchinner@redhat.com> > > Now that we have generic_copy_file_range(), remove it as a fallback > case when offloads fail. This puts the responsibility for executing > fallbacks on the filesystems that implement ->copy_file_range and > allows us to add operational validity checks to > generic_copy_file_range(). > > Rework vfs_copy_file_range() to call a new do_copy_file_range() > helper to exceute the copying callout, and move calls to > generic_file_copy_range() into filesystem methods where they > currently return failures. > > Signed-off-by: Dave Chinner <dchinner@redhat.com> > --- > fs/ceph/file.c | 17 ++++++++++++++++- > fs/cifs/cifsfs.c | 4 ++++ > fs/fuse/file.c | 17 ++++++++++++++++- > fs/nfs/nfs4file.c | 4 ++++ The NFS bits look okay to me: Acked-by: Anna Schumaker <Anna.Schumaker@Netapp.com > fs/overlayfs/file.c | 9 ++++++++- > fs/read_write.c | 24 +++++++++++++++--------- > 6 files changed, 63 insertions(+), 12 deletions(-) > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > index 189df668b6a0..cf29f0410dcb 100644 > --- a/fs/ceph/file.c > +++ b/fs/ceph/file.c > @@ -1885,7 +1885,7 @@ static int is_file_size_ok(struct inode *src_inode, > struct inode *dst_inode, > return 0; > } > > -static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, > +static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, > struct file *dst_file, loff_t dst_off, > size_t len, unsigned int flags) > { > @@ -2096,6 +2096,21 @@ static ssize_t ceph_copy_file_range(struct file > *src_file, loff_t src_off, > return ret; > } > > +static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, > + struct file *dst_file, loff_t dst_off, > + size_t len, unsigned int flags) > +{ > + ssize_t ret; > + > + ret = __ceph_copy_file_range(src_file, src_off, dst_file, dst_off, > + len, flags); > + > + if (ret == -EOPNOTSUPP) > + ret = generic_copy_file_range(src_file, src_off, dst_file, > + dst_off, len, flags); > + return ret; > +} > + > const struct file_operations ceph_file_fops = { > .open = ceph_open, > .release = ceph_release, > diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c > index 865706edb307..5ef4baec6234 100644 > --- a/fs/cifs/cifsfs.c > +++ b/fs/cifs/cifsfs.c > @@ -1141,6 +1141,10 @@ static ssize_t cifs_copy_file_range(struct file > *src_file, loff_t off, > rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff, > len, flags); > free_xid(xid); > + > + if (rc == -EOPNOTSUPP) > + rc = generic_copy_file_range(src_file, off, dst_file, > + destoff, len, flags); > return rc; > } > > diff --git a/fs/fuse/file.c b/fs/fuse/file.c > index b52f9baaa3e7..b86fb0298739 100644 > --- a/fs/fuse/file.c > +++ b/fs/fuse/file.c > @@ -3024,7 +3024,7 @@ static long fuse_file_fallocate(struct file *file, int > mode, loff_t offset, > return err; > } > > -static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in, > +static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, > struct file *file_out, loff_t pos_out, > size_t len, unsigned int flags) > { > @@ -3100,6 +3100,21 @@ static ssize_t fuse_copy_file_range(struct file > *file_in, loff_t pos_in, > return err; > } > > +static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off, > + struct file *dst_file, loff_t dst_off, > + size_t len, unsigned int flags) > +{ > + ssize_t ret; > + > + ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off, > + len, flags); > + > + if (ret == -EOPNOTSUPP) > + ret = generic_copy_file_range(src_file, src_off, dst_file, > + dst_off, len, flags); > + return ret; > +} > + > static const struct file_operations fuse_file_operations = { > .llseek = fuse_file_llseek, > .read_iter = fuse_file_read_iter, > diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c > index 46d691ba04bc..d7766a6eb0f4 100644 > --- a/fs/nfs/nfs4file.c > +++ b/fs/nfs/nfs4file.c > @@ -141,6 +141,10 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, > loff_t pos_in, > ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); > if (ret == -EAGAIN) > goto retry; > + > + if (ret == -EOPNOTSUPP) > + ret = generic_copy_file_range(file_in, pos_in, file_out, > + pos_out, count, flags); > return ret; > } > > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c > index 84dd957efa24..68736e5d6a56 100644 > --- a/fs/overlayfs/file.c > +++ b/fs/overlayfs/file.c > @@ -486,8 +486,15 @@ static ssize_t ovl_copy_file_range(struct file *file_in, > loff_t pos_in, > struct file *file_out, loff_t pos_out, > size_t len, unsigned int flags) > { > - return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, > + ssize_t ret; > + > + ret = ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, > OVL_COPY); > + > + if (ret == -EOPNOTSUPP) > + ret = generic_copy_file_range(file_in, pos_in, file_out, > + pos_out, len, flags); > + return ret; > } > > static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in, > diff --git a/fs/read_write.c b/fs/read_write.c > index 50114694c98b..44339b44accc 100644 > --- a/fs/read_write.c > +++ b/fs/read_write.c > @@ -1570,6 +1570,18 @@ ssize_t generic_copy_file_range(struct file *file_in, > loff_t pos_in, > } > EXPORT_SYMBOL(generic_copy_file_range); > > +static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, > + struct file *file_out, loff_t pos_out, > + size_t len, unsigned int flags) > +{ > + if (file_out->f_op->copy_file_range) > + return file_out->f_op->copy_file_range(file_in, pos_in, > file_out, > + pos_out, len, flags); > + > + return generic_copy_file_range(file_in, &pos_in, file_out, &pos_out, > + len, flags); > +} > + > /* > * copy_file_range() differs from regular file read and write in that it > * specifically allows return partial success. When it does so is up to > @@ -1634,15 +1646,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, > loff_t pos_in, > } > } > > - if (file_out->f_op->copy_file_range) { > - ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, > - pos_out, len, flags); > - if (ret != -EOPNOTSUPP) > - goto done; > - } > - > - ret = generic_copy_file_range(file_in, &pos_in, file_out, &pos_out, > - len, flags); > + ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len, > + flags); > + WARN_ON_ONCE(ret == -EOPNOTSUPP); > done: > if (ret > 0) { > fsnotify_access(file_in);
On Mon, Dec 03, 2018 at 12:22:21PM +0200, Amir Goldstein wrote: > On Mon, Dec 3, 2018 at 10:34 AM Dave Chinner <david@fromorbit.com> wrote: > > > > From: Dave Chinner <dchinner@redhat.com> > > > > Now that we have generic_copy_file_range(), remove it as a fallback > > case when offloads fail. This puts the responsibility for executing > > fallbacks on the filesystems that implement ->copy_file_range and > > allows us to add operational validity checks to > > generic_copy_file_range(). > > > > Rework vfs_copy_file_range() to call a new do_copy_file_range() > > helper to exceute the copying callout, and move calls to > > generic_file_copy_range() into filesystem methods where they > > currently return failures. > > > > Signed-off-by: Dave Chinner <dchinner@redhat.com> > > You may add > Reviewed-by: Amir Goldstein <amir73il@gmail.com> > > After fixing the overlayfs issue below. > ... > > > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c > > index 84dd957efa24..68736e5d6a56 100644 > > --- a/fs/overlayfs/file.c > > +++ b/fs/overlayfs/file.c > > @@ -486,8 +486,15 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, > > struct file *file_out, loff_t pos_out, > > size_t len, unsigned int flags) > > { > > - return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, > > + ssize_t ret; > > + > > + ret = ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, > > OVL_COPY); > > + > > + if (ret == -EOPNOTSUPP) > > + ret = generic_copy_file_range(file_in, pos_in, file_out, > > + pos_out, len, flags); > > + return ret; > > } > > > > This is unneeded, because ovl_copyfile(OVL_COPY) is implemented > by calling vfs_copy_file_range() (on the underlying files) and it is > not possible > to get EOPNOTSUPP from vfs_copy_file_range(). Except that it is possible. e.g. If the underlying filesystem tries a copy offload, gets a "not supported" failure from the remote server and then doesn't implement a fallback. Cheers, Dave.
Looks fine,
Reviewed-by: Christoph Hellwig <hch@lst.de>
On Tue, Dec 4, 2018 at 1:02 AM Dave Chinner <david@fromorbit.com> wrote: > > On Mon, Dec 03, 2018 at 12:22:21PM +0200, Amir Goldstein wrote: > > On Mon, Dec 3, 2018 at 10:34 AM Dave Chinner <david@fromorbit.com> wrote: > > > > > > From: Dave Chinner <dchinner@redhat.com> > > > > > > Now that we have generic_copy_file_range(), remove it as a fallback > > > case when offloads fail. This puts the responsibility for executing > > > fallbacks on the filesystems that implement ->copy_file_range and > > > allows us to add operational validity checks to > > > generic_copy_file_range(). > > > > > > Rework vfs_copy_file_range() to call a new do_copy_file_range() > > > helper to exceute the copying callout, and move calls to > > > generic_file_copy_range() into filesystem methods where they > > > currently return failures. > > > > > > Signed-off-by: Dave Chinner <dchinner@redhat.com> > > > > You may add > > Reviewed-by: Amir Goldstein <amir73il@gmail.com> > > > > After fixing the overlayfs issue below. > > ... > > > > > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c > > > index 84dd957efa24..68736e5d6a56 100644 > > > --- a/fs/overlayfs/file.c > > > +++ b/fs/overlayfs/file.c > > > @@ -486,8 +486,15 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, > > > struct file *file_out, loff_t pos_out, > > > size_t len, unsigned int flags) > > > { > > > - return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, > > > + ssize_t ret; > > > + > > > + ret = ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, > > > OVL_COPY); > > > + > > > + if (ret == -EOPNOTSUPP) > > > + ret = generic_copy_file_range(file_in, pos_in, file_out, > > > + pos_out, len, flags); > > > + return ret; > > > } > > > > > > > This is unneeded, because ovl_copyfile(OVL_COPY) is implemented > > by calling vfs_copy_file_range() (on the underlying files) and it is > > not possible > > to get EOPNOTSUPP from vfs_copy_file_range(). > > Except that it is possible. e.g. If the underlying filesystem tries > a copy offload, gets a "not supported" failure from the remote > server and then doesn't implement a fallback. > I'm in the opinion that ovl_copy_file_range() and do_copy_file_range() are a like. If you choose not to fallback in the latter to generic_copy_file_range() for misbehaving filesystem and WARN_ON this case, there is no reason for overlayfs to cover up for the misbehaving underlying filesystem. If you want to cover up for misbehaving filesystem, please do it in do_copy_file_range() and drop the WARN_ON_ONCE(). Come to think about it, I understand your reasoning for pushing generic_copy_file_range() down to filesystems so they can fallback to it in several error conditions. I do not follow the reasoning of NOT falling back to generic_copy_file_range() in vfs if EOPNOTSUPP is returned from filesystem. IOW, if we want to cover up for misbehaving filesystem, this would have been a more robust code: +static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags) +{ + ssize_t ret; + + if (file_out->f_op->copy_file_range) { + ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, + pos_out, len, flags); + if (!WARN_ON_ONCE(ret == -EOPNOTSUPP)) + return ret; + } + return generic_copy_file_range(file_in, &pos_in, file_out, &pos_out, + len, flags); +} + Thanks, Amir.
On Thu, Dec 06, 2018 at 06:16:46AM +0200, Amir Goldstein wrote: > On Tue, Dec 4, 2018 at 1:02 AM Dave Chinner <david@fromorbit.com> wrote: > > > > On Mon, Dec 03, 2018 at 12:22:21PM +0200, Amir Goldstein wrote: > > > On Mon, Dec 3, 2018 at 10:34 AM Dave Chinner <david@fromorbit.com> wrote: > > > > > > > > From: Dave Chinner <dchinner@redhat.com> > > > > > > > > Now that we have generic_copy_file_range(), remove it as a fallback > > > > case when offloads fail. This puts the responsibility for executing > > > > fallbacks on the filesystems that implement ->copy_file_range and > > > > allows us to add operational validity checks to > > > > generic_copy_file_range(). > > > > > > > > Rework vfs_copy_file_range() to call a new do_copy_file_range() > > > > helper to exceute the copying callout, and move calls to > > > > generic_file_copy_range() into filesystem methods where they > > > > currently return failures. > > > > > > > > Signed-off-by: Dave Chinner <dchinner@redhat.com> > > > > > > You may add > > > Reviewed-by: Amir Goldstein <amir73il@gmail.com> > > > > > > After fixing the overlayfs issue below. > > > ... > > > > > > > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c > > > > index 84dd957efa24..68736e5d6a56 100644 > > > > --- a/fs/overlayfs/file.c > > > > +++ b/fs/overlayfs/file.c > > > > @@ -486,8 +486,15 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, > > > > struct file *file_out, loff_t pos_out, > > > > size_t len, unsigned int flags) > > > > { > > > > - return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, > > > > + ssize_t ret; > > > > + > > > > + ret = ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, > > > > OVL_COPY); > > > > + > > > > + if (ret == -EOPNOTSUPP) > > > > + ret = generic_copy_file_range(file_in, pos_in, file_out, > > > > + pos_out, len, flags); > > > > + return ret; > > > > } > > > > > > > > > > This is unneeded, because ovl_copyfile(OVL_COPY) is implemented > > > by calling vfs_copy_file_range() (on the underlying files) and it is > > > not possible > > > to get EOPNOTSUPP from vfs_copy_file_range(). > > > > Except that it is possible. e.g. If the underlying filesystem tries > > a copy offload, gets a "not supported" failure from the remote > > server and then doesn't implement a fallback. > > > > I'm in the opinion that ovl_copy_file_range() and do_copy_file_range() > are a like. If you choose not to fallback in the latter to > generic_copy_file_range() for misbehaving filesystem and WARN_ON > this case, there is no reason for overlayfs to cover up for the > misbehaving underlying filesystem. > > If you want to cover up for misbehaving filesystem, please do it > in do_copy_file_range() and drop the WARN_ON_ONCE(). > Come to think about it, I understand your reasoning for pushing > generic_copy_file_range() down to filesystems so they can fallback to > it in several error conditions. > I do not follow the reasoning of NOT falling back to > generic_copy_file_range() in vfs if EOPNOTSUPP is returned from > filesystem. IOW, if we want to cover up for misbehaving filesystem, > this would have been a more robust code: Since when have we defined a filesystem returning -EOPNOTSUPP as a "misbehaving filesystem"? Userspace has to handle errors in copy_file_range() with it's own fallback copy code (i.e. it cannot rely on the kernel actually supporting copy_file_range at all). Hence it's perfectly fine for a filesystem implementation to encode "offload or fail entirely" semantics if they want. Yes, I've been shouted at by developers quite recently who *demanded* that copy_file_range (and other offloads like fallocate(ZERO_RANGE)) *fail* if they cannot "offload" the operation to make it "fast". The application developers want to use different algorithms if the kernel offload isn't any faster than userspace doing the dumb thing and phsyically pushing bytes around itself. I've pushed back on this as much as I can, but it doesn't change the fact that for many situations doing do_splice_direct() is exactly the wrong thing to do (e.g. because copy_file_range() on a TB+ scale file couldn't be offloaded by the filesystem because the server said EOPNOTSUPP) IOWs, for some filesystems or situations where it makes sense to have fail-fast semantics and leave the decision of what to do next in the hands of the userspace application that has the context necessary to determine what the best action to take is. And to do that, we need to give control of the fallback to the filesystems. Flexibility is what is needed here, not a dumb, hard coded "the VFS always know what's right for you" policy that triggers when nobody really wants it to. Cheers, Dave.
On Thu, Dec 6, 2018 at 11:31 PM Dave Chinner <david@fromorbit.com> wrote: > > On Thu, Dec 06, 2018 at 06:16:46AM +0200, Amir Goldstein wrote: > > On Tue, Dec 4, 2018 at 1:02 AM Dave Chinner <david@fromorbit.com> wrote: > > > > > > On Mon, Dec 03, 2018 at 12:22:21PM +0200, Amir Goldstein wrote: > > > > On Mon, Dec 3, 2018 at 10:34 AM Dave Chinner <david@fromorbit.com> wrote: > > > > > > > > > > From: Dave Chinner <dchinner@redhat.com> > > > > > > > > > > Now that we have generic_copy_file_range(), remove it as a fallback > > > > > case when offloads fail. This puts the responsibility for executing > > > > > fallbacks on the filesystems that implement ->copy_file_range and > > > > > allows us to add operational validity checks to > > > > > generic_copy_file_range(). > > > > > > > > > > Rework vfs_copy_file_range() to call a new do_copy_file_range() > > > > > helper to exceute the copying callout, and move calls to > > > > > generic_file_copy_range() into filesystem methods where they > > > > > currently return failures. > > > > > > > > > > Signed-off-by: Dave Chinner <dchinner@redhat.com> > > > > > > > > You may add > > > > Reviewed-by: Amir Goldstein <amir73il@gmail.com> > > > > > > > > After fixing the overlayfs issue below. > > > > ... > > > > > > > > > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c > > > > > index 84dd957efa24..68736e5d6a56 100644 > > > > > --- a/fs/overlayfs/file.c > > > > > +++ b/fs/overlayfs/file.c > > > > > @@ -486,8 +486,15 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, > > > > > struct file *file_out, loff_t pos_out, > > > > > size_t len, unsigned int flags) > > > > > { > > > > > - return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, > > > > > + ssize_t ret; > > > > > + > > > > > + ret = ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, > > > > > OVL_COPY); > > > > > + > > > > > + if (ret == -EOPNOTSUPP) > > > > > + ret = generic_copy_file_range(file_in, pos_in, file_out, > > > > > + pos_out, len, flags); > > > > > + return ret; > > > > > } > > > > > > > > > > > > > This is unneeded, because ovl_copyfile(OVL_COPY) is implemented > > > > by calling vfs_copy_file_range() (on the underlying files) and it is > > > > not possible > > > > to get EOPNOTSUPP from vfs_copy_file_range(). > > > > > > Except that it is possible. e.g. If the underlying filesystem tries > > > a copy offload, gets a "not supported" failure from the remote > > > server and then doesn't implement a fallback. > > > > > > > I'm in the opinion that ovl_copy_file_range() and do_copy_file_range() > > are a like. If you choose not to fallback in the latter to > > generic_copy_file_range() for misbehaving filesystem and WARN_ON > > this case, there is no reason for overlayfs to cover up for the > > misbehaving underlying filesystem. > > > > If you want to cover up for misbehaving filesystem, please do it > > in do_copy_file_range() and drop the WARN_ON_ONCE(). > > Come to think about it, I understand your reasoning for pushing > > generic_copy_file_range() down to filesystems so they can fallback to > > it in several error conditions. > > I do not follow the reasoning of NOT falling back to > > generic_copy_file_range() in vfs if EOPNOTSUPP is returned from > > filesystem. IOW, if we want to cover up for misbehaving filesystem, > > this would have been a more robust code: > > Since when have we defined a filesystem returning -EOPNOTSUPP as a > "misbehaving filesystem"? Since you wrote: WARN_ON_ONCE(ret == -EOPNOTSUPP); If filesystem is allowed to return EOPNOTSUPP from ->copy_file_range() then what is this warning about? > Userspace has to handle errors in > copy_file_range() with it's own fallback copy code (i.e. it cannot > rely on the kernel actually supporting copy_file_range at all). > Hence it's perfectly fine for a filesystem implementation to encode > "offload or fail entirely" semantics if they want. > > Yes, I've been shouted at by developers quite recently who > *demanded* that copy_file_range (and other offloads like > fallocate(ZERO_RANGE)) *fail* if they cannot "offload" the operation > to make it "fast". The application developers want to use different > algorithms if the kernel offload isn't any faster than userspace > doing the dumb thing and phsyically pushing bytes around itself. > > I've pushed back on this as much as I can, but it doesn't change the > fact that for many situations doing do_splice_direct() is exactly > the wrong thing to do (e.g. because copy_file_range() on a TB+ scale > file couldn't be offloaded by the filesystem because the server said > EOPNOTSUPP) > > IOWs, for some filesystems or situations where it makes sense to > have fail-fast semantics and leave the decision of what to do next > in the hands of the userspace application that has the context > necessary to determine what the best action to take is. And to do > that, we need to give control of the fallback to the filesystems. > > Flexibility is what is needed here, not a dumb, hard coded "the VFS > always know what's right for you" policy that triggers when nobody > really wants it to. > You misunderstood me. Please remove the fallback to generic_copy_file_range() in ovl_copy_file_range() as I requested in initial review for the exact same reasons that you list above. The overlayfs implementation of ovl_copy_file_range() is just handing over the call to underlying vfs_copy_file_range(). If the latter is expected to return EOPNOTSUPP, so does the overlayfs implementation. Thanks, Amir.
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 189df668b6a0..cf29f0410dcb 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1885,7 +1885,7 @@ static int is_file_size_ok(struct inode *src_inode, struct inode *dst_inode, return 0; } -static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, +static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, struct file *dst_file, loff_t dst_off, size_t len, unsigned int flags) { @@ -2096,6 +2096,21 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, return ret; } +static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, + struct file *dst_file, loff_t dst_off, + size_t len, unsigned int flags) +{ + ssize_t ret; + + ret = __ceph_copy_file_range(src_file, src_off, dst_file, dst_off, + len, flags); + + if (ret == -EOPNOTSUPP) + ret = generic_copy_file_range(src_file, src_off, dst_file, + dst_off, len, flags); + return ret; +} + const struct file_operations ceph_file_fops = { .open = ceph_open, .release = ceph_release, diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 865706edb307..5ef4baec6234 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1141,6 +1141,10 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff, len, flags); free_xid(xid); + + if (rc == -EOPNOTSUPP) + rc = generic_copy_file_range(src_file, off, dst_file, + destoff, len, flags); return rc; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b52f9baaa3e7..b86fb0298739 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3024,7 +3024,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, return err; } -static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in, +static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { @@ -3100,6 +3100,21 @@ static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in, return err; } +static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off, + struct file *dst_file, loff_t dst_off, + size_t len, unsigned int flags) +{ + ssize_t ret; + + ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off, + len, flags); + + if (ret == -EOPNOTSUPP) + ret = generic_copy_file_range(src_file, src_off, dst_file, + dst_off, len, flags); + return ret; +} + static const struct file_operations fuse_file_operations = { .llseek = fuse_file_llseek, .read_iter = fuse_file_read_iter, diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 46d691ba04bc..d7766a6eb0f4 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -141,6 +141,10 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); if (ret == -EAGAIN) goto retry; + + if (ret == -EOPNOTSUPP) + ret = generic_copy_file_range(file_in, pos_in, file_out, + pos_out, count, flags); return ret; } diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 84dd957efa24..68736e5d6a56 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -486,8 +486,15 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { - return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, + ssize_t ret; + + ret = ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags, OVL_COPY); + + if (ret == -EOPNOTSUPP) + ret = generic_copy_file_range(file_in, pos_in, file_out, + pos_out, len, flags); + return ret; } static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in, diff --git a/fs/read_write.c b/fs/read_write.c index 50114694c98b..44339b44accc 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1570,6 +1570,18 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, } EXPORT_SYMBOL(generic_copy_file_range); +static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags) +{ + if (file_out->f_op->copy_file_range) + return file_out->f_op->copy_file_range(file_in, pos_in, file_out, + pos_out, len, flags); + + return generic_copy_file_range(file_in, &pos_in, file_out, &pos_out, + len, flags); +} + /* * copy_file_range() differs from regular file read and write in that it * specifically allows return partial success. When it does so is up to @@ -1634,15 +1646,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, } } - if (file_out->f_op->copy_file_range) { - ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, - pos_out, len, flags); - if (ret != -EOPNOTSUPP) - goto done; - } - - ret = generic_copy_file_range(file_in, &pos_in, file_out, &pos_out, - len, flags); + ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len, + flags); + WARN_ON_ONCE(ret == -EOPNOTSUPP); done: if (ret > 0) { fsnotify_access(file_in);