Message ID | 153870034158.29072.8943691140742142494.stgit@magnolia (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | fs: fixes for serious clone/dedupe problems | expand |
On Fri, Oct 5, 2018 at 3:46 AM Darrick J. Wong <darrick.wong@oracle.com> wrote: > > From: Darrick J. Wong <darrick.wong@oracle.com> > > Pass operational flags to the per-filesystem clone and dedupe > implementations. This enables the vfs to signal when it can deal with > short clone and short dedupe operations. > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > --- [...] > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -1761,7 +1761,7 @@ struct file_operations { > loff_t, size_t, unsigned int); > s64 (*clone_file_range)(struct file *file_in, loff_t pos_in, > struct file *file_out, loff_t pos_out, > - u64 count); > + u64 count, unsigned int flags); > s64 (*dedupe_file_range)(struct file *file_in, loff_t pos_in, > struct file *file_out, loff_t pos_out, > u64 count); > @@ -1827,9 +1827,15 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *, > unsigned long, loff_t *, rwf_t); > extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, > loff_t, size_t, unsigned int); > +/* Caller can handle a shortened operation. */ > +#define CLONERANGE_SHORT (1 << 0) > +/* End operation at the source file's EOF. */ > +#define CLONERANGE_EOF (1 << 1) > +/* Operation is actually dedupe, not clone. */ > +#define CLONERANGE_DEDUPE (1 << 2) That's cool. But you know what's going to be the next step, right? Merging the 3 file operation interfaces into a single one. copy_file_range() already has the flags arg for future extensions and as you wrote somewhere, clone is really an optimized copy. ovl_copyfile() already does that internally. So the only take away for this patch series, please use constant names COPYRANGE_* and also explicitly define: /* Operation is actually clone, not copy. */ #define COPYRANGE_CLONE (1 << 2) /* Operation is actually dedupe, not copy. */ #define COPYRANGE_DEDUPE (1 << 3) Thanks, Amir.
On Fri, Oct 05, 2018 at 10:07:43AM +0300, Amir Goldstein wrote: > On Fri, Oct 5, 2018 at 3:46 AM Darrick J. Wong <darrick.wong@oracle.com> wrote: > > > > From: Darrick J. Wong <darrick.wong@oracle.com> > > > > Pass operational flags to the per-filesystem clone and dedupe > > implementations. This enables the vfs to signal when it can deal with > > short clone and short dedupe operations. > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > > --- > [...] > > --- a/include/linux/fs.h > > +++ b/include/linux/fs.h > > @@ -1761,7 +1761,7 @@ struct file_operations { > > loff_t, size_t, unsigned int); > > s64 (*clone_file_range)(struct file *file_in, loff_t pos_in, > > struct file *file_out, loff_t pos_out, > > - u64 count); > > + u64 count, unsigned int flags); > > s64 (*dedupe_file_range)(struct file *file_in, loff_t pos_in, > > struct file *file_out, loff_t pos_out, > > u64 count); > > @@ -1827,9 +1827,15 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *, > > unsigned long, loff_t *, rwf_t); > > extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, > > loff_t, size_t, unsigned int); > > +/* Caller can handle a shortened operation. */ > > +#define CLONERANGE_SHORT (1 << 0) > > +/* End operation at the source file's EOF. */ > > +#define CLONERANGE_EOF (1 << 1) > > +/* Operation is actually dedupe, not clone. */ > > +#define CLONERANGE_DEDUPE (1 << 2) > > That's cool. But you know what's going to be the next step, right? > Merging the 3 file operation interfaces into a single one. > copy_file_range() already has the flags arg for future extensions > and as you wrote somewhere, clone is really an optimized copy. > ovl_copyfile() already does that internally. > > So the only take away for this patch series, please use constant > names COPYRANGE_* and also explicitly define: > > /* Operation is actually clone, not copy. */ > #define COPYRANGE_CLONE (1 << 2) > /* Operation is actually dedupe, not copy. */ > #define COPYRANGE_DEDUPE (1 << 3) Yeah, I was too tired to try to throw that one on top of the flaming garbage pile. But I guess since I have a bunch more work to do to the previous patch I might as well do that... --D > > Thanks, > Amir.
On Fri, Oct 05, 2018 at 10:50:08AM -0700, Darrick J. Wong wrote: > > That's cool. But you know what's going to be the next step, right? > > Merging the 3 file operation interfaces into a single one. > > copy_file_range() already has the flags arg for future extensions > > and as you wrote somewhere, clone is really an optimized copy. > > ovl_copyfile() already does that internally. > > > > So the only take away for this patch series, please use constant > > names COPYRANGE_* and also explicitly define: > > > > /* Operation is actually clone, not copy. */ > > #define COPYRANGE_CLONE (1 << 2) > > /* Operation is actually dedupe, not copy. */ > > #define COPYRANGE_DEDUPE (1 << 3) > > Yeah, I was too tired to try to throw that one on top of the flaming > garbage pile. But I guess since I have a bunch more work to do to the > previous patch I might as well do that... I'm not totally sold on just merging everything, but I very much despise what is done in this patch, as it creates a completely confusing interface.
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 864651257142..e8c9b871709d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3251,7 +3251,8 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages, struct extent_state **cached); int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end); s64 btrfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len); + struct file *file_out, loff_t pos_out, u64 len, + unsigned int flags); /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 35ba974f1333..b41a65622b93 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4351,7 +4351,8 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src, } s64 btrfs_clone_file_range(struct file *src_file, loff_t off, - struct file *dst_file, loff_t destoff, u64 len) + struct file *dst_file, loff_t destoff, u64 len, + unsigned int flags) { int ret; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index f914861f844f..f8ff06fc1c73 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -181,7 +181,8 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t } static s64 nfs42_clone_file_range(struct file *src_file, loff_t src_off, - struct file *dst_file, loff_t dst_off, u64 count) + struct file *dst_file, loff_t dst_off, u64 count, + unsigned int flags) { struct inode *dst_inode = file_inode(dst_file); struct nfs_server *server = NFS_SERVER(dst_inode); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index c4b78ee4a593..1ee6d3ecdac6 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2531,7 +2531,8 @@ static s64 ocfs2_file_clone_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len) + u64 len, + unsigned int flags) { int ret; diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 11e4aad7b783..3758954f2377 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4843,7 +4843,7 @@ int ocfs2_reflink_remap_range(struct file *file_in, goto out_unlock; ret = vfs_clone_file_prep(file_in, pos_in, file_out, pos_out, - &len, is_dedupe); + &len, is_dedupe ? CLONERANGE_DEDUPE : 0); if (ret <= 0) goto out_unlock; diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 6d792d817538..440cb7a82834 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -488,7 +488,8 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, } static s64 ovl_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len) + struct file *file_out, loff_t pos_out, u64 len, + unsigned int flags) { int ret; diff --git a/fs/read_write.c b/fs/read_write.c index f51751281454..7cfff497263b 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1592,7 +1592,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, s64 cloned; cloned = file_in->f_op->clone_file_range(file_in, pos_in, - file_out, pos_out, min(MAX_RW_COUNT, len)); + file_out, pos_out, min(MAX_RW_COUNT, len), + CLONERANGE_SHORT); if (cloned >= 0) { ret = cloned; goto done; @@ -1721,13 +1722,14 @@ static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write) */ int vfs_clone_file_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 *len, bool is_dedupe) + u64 *len, unsigned int flags) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); uint64_t nlen; loff_t isize; bool same_inode = (inode_in == inode_out); + bool is_dedupe = (flags & CLONERANGE_DEDUPE); int ret; /* Don't touch certain kinds of inodes */ @@ -1802,6 +1804,7 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); s64 cloned; + unsigned int flags = 0; int ret; if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) @@ -1834,7 +1837,7 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in, return ret; cloned = file_in->f_op->clone_file_range(file_in, pos_in, - file_out, pos_out, len); + file_out, pos_out, len, flags); if (cloned < 0) return cloned; else if (len && cloned != len) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index efa95e0d8cee..d5d6681ca714 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -925,7 +925,8 @@ xfs_file_clone_range( loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len) + u64 len, + unsigned int flags) { int ret; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 1955e093e9ea..40684dd011ee 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1278,7 +1278,7 @@ xfs_reflink_remap_prep( goto out_unlock; ret = vfs_clone_file_prep(file_in, pos_in, file_out, pos_out, - &len, is_dedupe); + &len, is_dedupe ? CLONERANGE_DEDUPE : 0); if (ret <= 0) goto out_unlock; diff --git a/include/linux/fs.h b/include/linux/fs.h index e5755340e825..ae5685c31270 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1761,7 +1761,7 @@ struct file_operations { loff_t, size_t, unsigned int); s64 (*clone_file_range)(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 count); + u64 count, unsigned int flags); s64 (*dedupe_file_range)(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 count); @@ -1827,9 +1827,15 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *, unsigned long, loff_t *, rwf_t); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); +/* Caller can handle a shortened operation. */ +#define CLONERANGE_SHORT (1 << 0) +/* End operation at the source file's EOF. */ +#define CLONERANGE_EOF (1 << 1) +/* Operation is actually dedupe, not clone. */ +#define CLONERANGE_DEDUPE (1 << 2) extern int vfs_clone_file_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 *count, bool is_dedupe); + u64 *count, unsigned int flags); extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len); extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,