Message ID | 20230627183629.26571-5-nj.shetty@samsung.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Implement copy offload support | expand |
On 6/28/23 03:36, Nitesh Shetty wrote: > For direct block device opened with O_DIRECT, use copy_file_range to > issue device copy offload, and fallback to generic_copy_file_range incase > device copy offload capability is absent. ...if the device does not support copy offload or the device files are not open with O_DIRECT. No ? > Modify checks to allow bdevs to use copy_file_range. > > Suggested-by: Ming Lei <ming.lei@redhat.com> > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> > --- > block/blk-lib.c | 26 ++++++++++++++++++++++++++ > block/fops.c | 20 ++++++++++++++++++++ > fs/read_write.c | 7 +++++-- > include/linux/blkdev.h | 4 ++++ > 4 files changed, 55 insertions(+), 2 deletions(-) > > diff --git a/block/blk-lib.c b/block/blk-lib.c > index 09e0d5d51d03..7d8e09a99254 100644 > --- a/block/blk-lib.c > +++ b/block/blk-lib.c > @@ -473,6 +473,32 @@ ssize_t blkdev_copy_offload( > } > EXPORT_SYMBOL_GPL(blkdev_copy_offload); > > +/* Copy source offset from source block device to destination block > + * device. Returns the length of bytes copied. > + */ Multi-line comment style: start with a "/*" line please. > +ssize_t blkdev_copy_offload_failfast( What is the "failfast" in the name for ? > + struct block_device *bdev_in, loff_t pos_in, > + struct block_device *bdev_out, loff_t pos_out, > + size_t len, gfp_t gfp_mask) > +{ > + struct request_queue *in_q = bdev_get_queue(bdev_in); > + struct request_queue *out_q = bdev_get_queue(bdev_out); > + ssize_t ret = 0; You do not need this initialization. > + > + if (blkdev_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len)) > + return 0; > + > + if (blk_queue_copy(in_q) && blk_queue_copy(out_q)) { Given that I think we do not allow copies between different devices, in_q and out_q should always be the same, no ? > + ret = __blkdev_copy_offload(bdev_in, pos_in, bdev_out, pos_out, > + len, NULL, NULL, gfp_mask); Same here. Why pass 2 bdevs if we only allow copies within the same device ? > + if (ret < 0) > + return 0; > + } > + > + return ret; return 0; > +} > +EXPORT_SYMBOL_GPL(blkdev_copy_offload_failfast); > + > static int __blkdev_issue_write_zeroes(struct block_device *bdev, > sector_t sector, sector_t nr_sects, gfp_t gfp_mask, > struct bio **biop, unsigned flags) > diff --git a/block/fops.c b/block/fops.c > index a286bf3325c5..a1576304f269 100644 > --- a/block/fops.c > +++ b/block/fops.c > @@ -621,6 +621,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) > return ret; > } > > +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in, > + struct file *file_out, loff_t pos_out, > + size_t len, unsigned int flags) > +{ > + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in)); > + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out)); > + ssize_t comp_len = 0; > + > + if ((file_in->f_iocb_flags & IOCB_DIRECT) && > + (file_out->f_iocb_flags & IOCB_DIRECT)) > + comp_len = blkdev_copy_offload_failfast(in_bdev, pos_in, > + out_bdev, pos_out, len, GFP_KERNEL); > + if (comp_len != len) > + comp_len = generic_copy_file_range(file_in, pos_in + comp_len, > + file_out, pos_out + comp_len, len - comp_len, flags); > + > + return comp_len; > +} > + > #define BLKDEV_FALLOC_FL_SUPPORTED \ > (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ > FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE) > @@ -714,6 +733,7 @@ const struct file_operations def_blk_fops = { > .splice_read = filemap_splice_read, > .splice_write = iter_file_splice_write, > .fallocate = blkdev_fallocate, > + .copy_file_range = blkdev_copy_file_range, > }; > > static __init int blkdev_init(void) > diff --git a/fs/read_write.c b/fs/read_write.c > index b07de77ef126..d27148a2543f 100644 > --- a/fs/read_write.c > +++ b/fs/read_write.c > @@ -1447,7 +1447,8 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, > return -EOVERFLOW; > > /* Shorten the copy to EOF */ > - size_in = i_size_read(inode_in); > + size_in = i_size_read(file_in->f_mapping->host); > + > if (pos_in >= size_in) > count = 0; > else > @@ -1708,7 +1709,9 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out) > /* Don't copy dirs, pipes, sockets... */ > if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) > return -EISDIR; > - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) > + > + if ((!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) && > + (!S_ISBLK(inode_in->i_mode) || !S_ISBLK(inode_out->i_mode))) > return -EINVAL; > > if (!(file_in->f_mode & FMODE_READ) || > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > index c176bf6173c5..850168cad080 100644 > --- a/include/linux/blkdev.h > +++ b/include/linux/blkdev.h > @@ -1047,6 +1047,10 @@ ssize_t blkdev_copy_offload( > struct block_device *bdev_in, loff_t pos_in, > struct block_device *bdev_out, loff_t pos_out, > size_t len, cio_iodone_t end_io, void *private, gfp_t gfp_mask); > +ssize_t blkdev_copy_offload_failfast( > + struct block_device *bdev_in, loff_t pos_in, > + struct block_device *bdev_out, loff_t pos_out, > + size_t len, gfp_t gfp_mask); > struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, > gfp_t gfp_mask); > void bio_map_kern_endio(struct bio *bio);
On 23/06/28 03:51PM, Damien Le Moal wrote: >On 6/28/23 03:36, Nitesh Shetty wrote: >> For direct block device opened with O_DIRECT, use copy_file_range to >> issue device copy offload, and fallback to generic_copy_file_range incase >> device copy offload capability is absent. > >...if the device does not support copy offload or the device files are not open >with O_DIRECT. > >No ? > Yes your right. We will fallback to generic_copy_file_range in either of these cases. >> Modify checks to allow bdevs to use copy_file_range. >> >> Suggested-by: Ming Lei <ming.lei@redhat.com> >> Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> >> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> >> --- >> block/blk-lib.c | 26 ++++++++++++++++++++++++++ >> block/fops.c | 20 ++++++++++++++++++++ >> fs/read_write.c | 7 +++++-- >> include/linux/blkdev.h | 4 ++++ >> 4 files changed, 55 insertions(+), 2 deletions(-) >> >> diff --git a/block/blk-lib.c b/block/blk-lib.c >> index 09e0d5d51d03..7d8e09a99254 100644 >> --- a/block/blk-lib.c >> +++ b/block/blk-lib.c >> @@ -473,6 +473,32 @@ ssize_t blkdev_copy_offload( >> } >> EXPORT_SYMBOL_GPL(blkdev_copy_offload); >> >> +/* Copy source offset from source block device to destination block >> + * device. Returns the length of bytes copied. >> + */ > >Multi-line comment style: start with a "/*" line please. > acked >> +ssize_t blkdev_copy_offload_failfast( > >What is the "failfast" in the name for ? We dont want failed copy offload IOs to fallback to block layer copy emulation. We wanted a API to return error, if offload fails. > >> + struct block_device *bdev_in, loff_t pos_in, >> + struct block_device *bdev_out, loff_t pos_out, >> + size_t len, gfp_t gfp_mask) >> +{ >> + struct request_queue *in_q = bdev_get_queue(bdev_in); >> + struct request_queue *out_q = bdev_get_queue(bdev_out); >> + ssize_t ret = 0; > >You do not need this initialization. > we need this initialization, because __blkdev_copy_offload return number of bytes copied or error value. So we can not return 0, incase of success/partial completion. blkdev_copy_offload_failfast is expected to return number of bytes copied. >> + >> + if (blkdev_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len)) >> + return 0; >> + >> + if (blk_queue_copy(in_q) && blk_queue_copy(out_q)) { > >Given that I think we do not allow copies between different devices, in_q and >out_q should always be the same, no ? acked, will update this. > >> + ret = __blkdev_copy_offload(bdev_in, pos_in, bdev_out, pos_out, >> + len, NULL, NULL, gfp_mask); > >Same here. Why pass 2 bdevs if we only allow copies within the same device ? > acked, will update function arguments to take single bdev. >> + if (ret < 0) >> + return 0; >> + } >> + >> + return ret; > >return 0; > Nack, explained above. Thank you, Nitesh Shetty -- dm-devel mailing list dm-devel@redhat.com https://listman.redhat.com/mailman/listinfo/dm-devel
> +/* Copy source offset from source block device to destination block > + * device. Returns the length of bytes copied. > + */ > +ssize_t blkdev_copy_offload_failfast( > + struct block_device *bdev_in, loff_t pos_in, > + struct block_device *bdev_out, loff_t pos_out, > + size_t len, gfp_t gfp_mask) This is an odd and very misnamed interface. Either we have a klkdev_copy() interface that automatically falls back to a fallback (maybe with an opt-out), or we have separate blkdev_copy_offload/blkdev_copy_emulated interface and let the caller decide. But none of that really is "failfast". Also this needs to go into the helpers patch and not a patch that is supposed to just wire copying up for block device node. > index b07de77ef126..d27148a2543f 100644 > --- a/fs/read_write.c > +++ b/fs/read_write.c > @@ -1447,7 +1447,8 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, > return -EOVERFLOW; > > /* Shorten the copy to EOF */ > - size_in = i_size_read(inode_in); > + size_in = i_size_read(file_in->f_mapping->host); generic_copy_file_checks needs to be fixed to use ->mapping->host both or inode_in and inode_out at the top of the file instead of this band aid. And that needs to be a separate patch with a Fixes tag. > @@ -1708,7 +1709,9 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out) > /* Don't copy dirs, pipes, sockets... */ > if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) > return -EISDIR; > - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) > + > + if ((!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) && > + (!S_ISBLK(inode_in->i_mode) || !S_ISBLK(inode_out->i_mode))) This is using weird indentation, and might also not be doing exactly what we want. I think the better thing to do here is to: 1) check for the accetable types only on the in inode 2) have a check that the mode matches for the in and out inodes And please do this as a separate prep patch instead of hiding it here. -- dm-devel mailing list dm-devel@redhat.com https://listman.redhat.com/mailman/listinfo/dm-devel
On 23/07/20 09:57AM, Christoph Hellwig wrote: >> +/* Copy source offset from source block device to destination block >> + * device. Returns the length of bytes copied. >> + */ >> +ssize_t blkdev_copy_offload_failfast( >> + struct block_device *bdev_in, loff_t pos_in, >> + struct block_device *bdev_out, loff_t pos_out, >> + size_t len, gfp_t gfp_mask) > >This is an odd and very misnamed interface. > >Either we have a klkdev_copy() interface that automatically falls back >to a fallback (maybe with an opt-out), or we have separate >blkdev_copy_offload/blkdev_copy_emulated interface and let the caller >decide. But none of that really is "failfast". > >Also this needs to go into the helpers patch and not a patch that is >supposed to just wire copying up for block device node. > Acked. >> index b07de77ef126..d27148a2543f 100644 >> --- a/fs/read_write.c >> +++ b/fs/read_write.c >> @@ -1447,7 +1447,8 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, >> return -EOVERFLOW; >> >> /* Shorten the copy to EOF */ >> - size_in = i_size_read(inode_in); >> + size_in = i_size_read(file_in->f_mapping->host); > >generic_copy_file_checks needs to be fixed to use ->mapping->host both >or inode_in and inode_out at the top of the file instead of this >band aid. And that needs to be a separate patch with a Fixes tag. > Addressed below. >> @@ -1708,7 +1709,9 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out) >> /* Don't copy dirs, pipes, sockets... */ >> if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) >> return -EISDIR; >> - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) >> + >> + if ((!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) && >> + (!S_ISBLK(inode_in->i_mode) || !S_ISBLK(inode_out->i_mode))) > >This is using weird indentation, and might also not be doing >exactly what we want. I think the better thing to do here is to: > > 1) check for the accetable types only on the in inode > 2) have a check that the mode matches for the in and out inodes > >And please do this as a separate prep patch instead of hiding it here. > Agreed. We will send a separate patch, that enables copy_file_range on block devices. Thank you, Nitesh Shetty -- dm-devel mailing list dm-devel@redhat.com https://listman.redhat.com/mailman/listinfo/dm-devel
diff --git a/block/blk-lib.c b/block/blk-lib.c index 09e0d5d51d03..7d8e09a99254 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -473,6 +473,32 @@ ssize_t blkdev_copy_offload( } EXPORT_SYMBOL_GPL(blkdev_copy_offload); +/* Copy source offset from source block device to destination block + * device. Returns the length of bytes copied. + */ +ssize_t blkdev_copy_offload_failfast( + struct block_device *bdev_in, loff_t pos_in, + struct block_device *bdev_out, loff_t pos_out, + size_t len, gfp_t gfp_mask) +{ + struct request_queue *in_q = bdev_get_queue(bdev_in); + struct request_queue *out_q = bdev_get_queue(bdev_out); + ssize_t ret = 0; + + if (blkdev_copy_sanity_check(bdev_in, pos_in, bdev_out, pos_out, len)) + return 0; + + if (blk_queue_copy(in_q) && blk_queue_copy(out_q)) { + ret = __blkdev_copy_offload(bdev_in, pos_in, bdev_out, pos_out, + len, NULL, NULL, gfp_mask); + if (ret < 0) + return 0; + } + + return ret; +} +EXPORT_SYMBOL_GPL(blkdev_copy_offload_failfast); + static int __blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, unsigned flags) diff --git a/block/fops.c b/block/fops.c index a286bf3325c5..a1576304f269 100644 --- a/block/fops.c +++ b/block/fops.c @@ -621,6 +621,25 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) return ret; } +static ssize_t blkdev_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags) +{ + struct block_device *in_bdev = I_BDEV(bdev_file_inode(file_in)); + struct block_device *out_bdev = I_BDEV(bdev_file_inode(file_out)); + ssize_t comp_len = 0; + + if ((file_in->f_iocb_flags & IOCB_DIRECT) && + (file_out->f_iocb_flags & IOCB_DIRECT)) + comp_len = blkdev_copy_offload_failfast(in_bdev, pos_in, + out_bdev, pos_out, len, GFP_KERNEL); + if (comp_len != len) + comp_len = generic_copy_file_range(file_in, pos_in + comp_len, + file_out, pos_out + comp_len, len - comp_len, flags); + + return comp_len; +} + #define BLKDEV_FALLOC_FL_SUPPORTED \ (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE) @@ -714,6 +733,7 @@ const struct file_operations def_blk_fops = { .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, .fallocate = blkdev_fallocate, + .copy_file_range = blkdev_copy_file_range, }; static __init int blkdev_init(void) diff --git a/fs/read_write.c b/fs/read_write.c index b07de77ef126..d27148a2543f 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1447,7 +1447,8 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, return -EOVERFLOW; /* Shorten the copy to EOF */ - size_in = i_size_read(inode_in); + size_in = i_size_read(file_in->f_mapping->host); + if (pos_in >= size_in) count = 0; else @@ -1708,7 +1709,9 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out) /* Don't copy dirs, pipes, sockets... */ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) return -EISDIR; - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) + + if ((!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) && + (!S_ISBLK(inode_in->i_mode) || !S_ISBLK(inode_out->i_mode))) return -EINVAL; if (!(file_in->f_mode & FMODE_READ) || diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c176bf6173c5..850168cad080 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1047,6 +1047,10 @@ ssize_t blkdev_copy_offload( struct block_device *bdev_in, loff_t pos_in, struct block_device *bdev_out, loff_t pos_out, size_t len, cio_iodone_t end_io, void *private, gfp_t gfp_mask); +ssize_t blkdev_copy_offload_failfast( + struct block_device *bdev_in, loff_t pos_in, + struct block_device *bdev_out, loff_t pos_out, + size_t len, gfp_t gfp_mask); struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, gfp_t gfp_mask); void bio_map_kern_endio(struct bio *bio);