Message ID | 20240520102033.9361-7-nj.shetty@samsung.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Implement copy offload support | expand |
On Mon, May 20, 2024 at 03:50:19PM +0530, Nitesh Shetty wrote: > For direct block device opened with O_DIRECT, use blkdev_copy_offload to > issue device copy offload, or use splice_copy_file_range in case > device copy offload capability is absent or the device files are not open > with O_DIRECT. > > Reviewed-by: Hannes Reinecke <hare@suse.de> > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> > --- > block/fops.c | 26 ++++++++++++++++++++++++++ > 1 file changed, 26 insertions(+) > > diff --git a/block/fops.c b/block/fops.c > index 376265935714..5a4bba4f43aa 100644 > --- a/block/fops.c > +++ b/block/fops.c > @@ -17,6 +17,7 @@ > #include <linux/fs.h> > #include <linux/iomap.h> > #include <linux/module.h> > +#include <linux/splice.h> > #include "blk.h" > > static inline struct inode *bdev_file_inode(struct file *file) > @@ -754,6 +755,30 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) > return ret; > } > > +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in, > + struct file *file_out, loff_t pos_out, > + size_t len, unsigned int flags) > +{ > + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in)); > + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out)); > + ssize_t copied = 0; > + > + if ((in_bdev == out_bdev) && bdev_max_copy_sectors(in_bdev) && > + (file_in->f_iocb_flags & IOCB_DIRECT) && > + (file_out->f_iocb_flags & IOCB_DIRECT)) { > + copied = blkdev_copy_offload(in_bdev, pos_in, pos_out, len, > + NULL, NULL, GFP_KERNEL); > + if (copied < 0) > + copied = 0; > + } else { > + copied = splice_copy_file_range(file_in, pos_in + copied, > + file_out, pos_out + copied, > + len - copied); > + } This should not fall back to a page cache copy. We keep being told by application developers that if the fast hardware/filesystem offload fails, then an error should be returned so the application can determine what the fallback operation should be. It may well be that the application falls back to "copy through the page cache", but that is an application policy choice, not a something the kernel offload driver should be making mandatory. Userspace has to handle copy offload failure anyway, so they a fallback path regardless of whether copy_file_range() works on block devices or not... Cheers, Dave.
On 26/05/24 09:09AM, Dave Chinner wrote: >On Mon, May 20, 2024 at 03:50:19PM +0530, Nitesh Shetty wrote: >> For direct block device opened with O_DIRECT, use blkdev_copy_offload to >> issue device copy offload, or use splice_copy_file_range in case >> device copy offload capability is absent or the device files are not open >> with O_DIRECT. >> >> Reviewed-by: Hannes Reinecke <hare@suse.de> >> Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> >> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> >> --- >> block/fops.c | 26 ++++++++++++++++++++++++++ >> 1 file changed, 26 insertions(+) >> >> diff --git a/block/fops.c b/block/fops.c >> index 376265935714..5a4bba4f43aa 100644 >> --- a/block/fops.c >> +++ b/block/fops.c >> @@ -17,6 +17,7 @@ >> #include <linux/fs.h> >> #include <linux/iomap.h> >> #include <linux/module.h> >> +#include <linux/splice.h> >> #include "blk.h" >> >> static inline struct inode *bdev_file_inode(struct file *file) >> @@ -754,6 +755,30 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) >> return ret; >> } >> >> +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in, >> + struct file *file_out, loff_t pos_out, >> + size_t len, unsigned int flags) >> +{ >> + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in)); >> + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out)); >> + ssize_t copied = 0; >> + >> + if ((in_bdev == out_bdev) && bdev_max_copy_sectors(in_bdev) && >> + (file_in->f_iocb_flags & IOCB_DIRECT) && >> + (file_out->f_iocb_flags & IOCB_DIRECT)) { >> + copied = blkdev_copy_offload(in_bdev, pos_in, pos_out, len, >> + NULL, NULL, GFP_KERNEL); >> + if (copied < 0) >> + copied = 0; >> + } else { >> + copied = splice_copy_file_range(file_in, pos_in + copied, >> + file_out, pos_out + copied, >> + len - copied); >> + } > >This should not fall back to a page cache copy. > >We keep being told by application developers that if the fast >hardware/filesystem offload fails, then an error should be returned >so the application can determine what the fallback operation should >be. > >It may well be that the application falls back to "copy through the >page cache", but that is an application policy choice, not a >something the kernel offload driver should be making mandatory. > >Userspace has to handle copy offload failure anyway, so they a >fallback path regardless of whether copy_file_range() works on block >devices or not... > Makes sense, We will remove fallback part in next version. Thank you, Nitesh Shetty
diff --git a/block/fops.c b/block/fops.c index 376265935714..5a4bba4f43aa 100644 --- a/block/fops.c +++ b/block/fops.c @@ -17,6 +17,7 @@ #include <linux/fs.h> #include <linux/iomap.h> #include <linux/module.h> +#include <linux/splice.h> #include "blk.h" static inline struct inode *bdev_file_inode(struct file *file) @@ -754,6 +755,30 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) return ret; } +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags) +{ + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in)); + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out)); + ssize_t copied = 0; + + if ((in_bdev == out_bdev) && bdev_max_copy_sectors(in_bdev) && + (file_in->f_iocb_flags & IOCB_DIRECT) && + (file_out->f_iocb_flags & IOCB_DIRECT)) { + copied = blkdev_copy_offload(in_bdev, pos_in, pos_out, len, + NULL, NULL, GFP_KERNEL); + if (copied < 0) + copied = 0; + } else { + copied = splice_copy_file_range(file_in, pos_in + copied, + file_out, pos_out + copied, + len - copied); + } + + return copied; +} + #define BLKDEV_FALLOC_FL_SUPPORTED \ (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE) @@ -859,6 +884,7 @@ const struct file_operations def_blk_fops = { .splice_write = iter_file_splice_write, .fallocate = blkdev_fallocate, .fop_flags = FOP_BUFFER_RASYNC, + .copy_file_range = blkdev_copy_file_range, }; static __init int blkdev_init(void)