Message ID | 20220207141348.4235-4-nj.shetty@samsung.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add Copy offload support | expand |
Hi Nitesh, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on axboe-block/for-next] [also build test WARNING on next-20220207] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Nitesh-Shetty/block-make-bio_map_kern-non-static/20220207-231407 base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next config: nios2-randconfig-r001-20220207 (https://download.01.org/0day-ci/archive/20220208/202202080650.48C9Ps00-lkp@intel.com/config) compiler: nios2-linux-gcc (GCC) 11.2.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/12a9801a7301f1a1e2ea355c5a4438dab17894cf git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Nitesh-Shetty/block-make-bio_map_kern-non-static/20220207-231407 git checkout 12a9801a7301f1a1e2ea355c5a4438dab17894cf # save the config file to linux build tree mkdir build_dir COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=nios2 SHELL=/bin/bash If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): >> block/blk-lib.c:185:5: warning: no previous prototype for 'blk_copy_offload' [-Wmissing-prototypes] 185 | int blk_copy_offload(struct block_device *src_bdev, int nr_srcs, | ^~~~~~~~~~~~~~~~ vim +/blk_copy_offload +185 block/blk-lib.c 180 181 /* 182 * blk_copy_offload - Use device's native copy offload feature 183 * Go through user provide payload, prepare new payload based on device's copy offload limits. 184 */ > 185 int blk_copy_offload(struct block_device *src_bdev, int nr_srcs, 186 struct range_entry *rlist, struct block_device *dst_bdev, gfp_t gfp_mask) 187 { 188 struct request_queue *sq = bdev_get_queue(src_bdev); 189 struct request_queue *dq = bdev_get_queue(dst_bdev); 190 struct bio *read_bio, *write_bio; 191 struct copy_ctx *ctx; 192 struct cio *cio; 193 struct page *token; 194 sector_t src_blk, copy_len, dst_blk; 195 sector_t remaining, max_copy_len = LONG_MAX; 196 int ri = 0, ret = 0; 197 198 cio = kzalloc(sizeof(struct cio), GFP_KERNEL); 199 if (!cio) 200 return -ENOMEM; 201 atomic_set(&cio->refcount, 0); 202 cio->rlist = rlist; 203 204 max_copy_len = min3(max_copy_len, (sector_t)sq->limits.max_copy_sectors, 205 (sector_t)dq->limits.max_copy_sectors); 206 max_copy_len = min3(max_copy_len, (sector_t)sq->limits.max_copy_range_sectors, 207 (sector_t)dq->limits.max_copy_range_sectors) << SECTOR_SHIFT; 208 209 for (ri = 0; ri < nr_srcs; ri++) { 210 cio->rlist[ri].comp_len = rlist[ri].len; 211 for (remaining = rlist[ri].len, src_blk = rlist[ri].src, dst_blk = rlist[ri].dst; 212 remaining > 0; 213 remaining -= copy_len, src_blk += copy_len, dst_blk += copy_len) { 214 copy_len = min(remaining, max_copy_len); 215 216 token = alloc_page(gfp_mask); 217 if (unlikely(!token)) { 218 ret = -ENOMEM; 219 goto err_token; 220 } 221 222 read_bio = bio_alloc(src_bdev, 1, REQ_OP_READ | REQ_COPY | REQ_NOMERGE, 223 gfp_mask); 224 if (!read_bio) { 225 ret = -ENOMEM; 226 goto err_read_bio; 227 } 228 read_bio->bi_iter.bi_sector = src_blk >> SECTOR_SHIFT; 229 read_bio->bi_iter.bi_size = copy_len; 230 __bio_add_page(read_bio, token, PAGE_SIZE, 0); 231 ret = submit_bio_wait(read_bio); 232 if (ret) { 233 bio_put(read_bio); 234 goto err_read_bio; 235 } 236 bio_put(read_bio); 237 ctx = kzalloc(sizeof(struct copy_ctx), gfp_mask); 238 if (!ctx) { 239 ret = -ENOMEM; 240 goto err_read_bio; 241 } 242 ctx->cio = cio; 243 ctx->range_idx = ri; 244 ctx->start_sec = rlist[ri].src; 245 246 write_bio = bio_alloc(dst_bdev, 1, REQ_OP_WRITE | REQ_COPY | REQ_NOMERGE, 247 gfp_mask); 248 if (!write_bio) { 249 ret = -ENOMEM; 250 goto err_read_bio; 251 } 252 253 write_bio->bi_iter.bi_sector = dst_blk >> SECTOR_SHIFT; 254 write_bio->bi_iter.bi_size = copy_len; 255 __bio_add_page(write_bio, token, PAGE_SIZE, 0); 256 write_bio->bi_end_io = bio_copy_end_io; 257 write_bio->bi_private = ctx; 258 atomic_inc(&cio->refcount); 259 submit_bio(write_bio); 260 } 261 } 262 263 /* Wait for completion of all IO's*/ 264 return cio_await_completion(cio); 265 266 err_read_bio: 267 __free_page(token); 268 err_token: 269 rlist[ri].comp_len = min_t(sector_t, rlist[ri].comp_len, (rlist[ri].len - remaining)); 270 271 cio->io_err = ret; 272 return cio_await_completion(cio); 273 } 274 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Hi Nitesh, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on axboe-block/for-next] [also build test WARNING on next-20220207] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Nitesh-Shetty/block-make-bio_map_kern-non-static/20220207-231407 base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next config: hexagon-randconfig-r045-20220207 (https://download.01.org/0day-ci/archive/20220208/202202080735.lyaEe5Bq-lkp@intel.com/config) compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project 0d8850ae2cae85d49bea6ae0799fa41c7202c05c) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/12a9801a7301f1a1e2ea355c5a4438dab17894cf git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Nitesh-Shetty/block-make-bio_map_kern-non-static/20220207-231407 git checkout 12a9801a7301f1a1e2ea355c5a4438dab17894cf # save the config file to linux build tree mkdir build_dir COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon SHELL=/bin/bash If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): >> block/blk-lib.c:185:5: warning: no previous prototype for function 'blk_copy_offload' [-Wmissing-prototypes] int blk_copy_offload(struct block_device *src_bdev, int nr_srcs, ^ block/blk-lib.c:185:1: note: declare 'static' if the function is not intended to be used outside of this translation unit int blk_copy_offload(struct block_device *src_bdev, int nr_srcs, ^ static 1 warning generated. vim +/blk_copy_offload +185 block/blk-lib.c 180 181 /* 182 * blk_copy_offload - Use device's native copy offload feature 183 * Go through user provide payload, prepare new payload based on device's copy offload limits. 184 */ > 185 int blk_copy_offload(struct block_device *src_bdev, int nr_srcs, 186 struct range_entry *rlist, struct block_device *dst_bdev, gfp_t gfp_mask) 187 { 188 struct request_queue *sq = bdev_get_queue(src_bdev); 189 struct request_queue *dq = bdev_get_queue(dst_bdev); 190 struct bio *read_bio, *write_bio; 191 struct copy_ctx *ctx; 192 struct cio *cio; 193 struct page *token; 194 sector_t src_blk, copy_len, dst_blk; 195 sector_t remaining, max_copy_len = LONG_MAX; 196 int ri = 0, ret = 0; 197 198 cio = kzalloc(sizeof(struct cio), GFP_KERNEL); 199 if (!cio) 200 return -ENOMEM; 201 atomic_set(&cio->refcount, 0); 202 cio->rlist = rlist; 203 204 max_copy_len = min3(max_copy_len, (sector_t)sq->limits.max_copy_sectors, 205 (sector_t)dq->limits.max_copy_sectors); 206 max_copy_len = min3(max_copy_len, (sector_t)sq->limits.max_copy_range_sectors, 207 (sector_t)dq->limits.max_copy_range_sectors) << SECTOR_SHIFT; 208 209 for (ri = 0; ri < nr_srcs; ri++) { 210 cio->rlist[ri].comp_len = rlist[ri].len; 211 for (remaining = rlist[ri].len, src_blk = rlist[ri].src, dst_blk = rlist[ri].dst; 212 remaining > 0; 213 remaining -= copy_len, src_blk += copy_len, dst_blk += copy_len) { 214 copy_len = min(remaining, max_copy_len); 215 216 token = alloc_page(gfp_mask); 217 if (unlikely(!token)) { 218 ret = -ENOMEM; 219 goto err_token; 220 } 221 222 read_bio = bio_alloc(src_bdev, 1, REQ_OP_READ | REQ_COPY | REQ_NOMERGE, 223 gfp_mask); 224 if (!read_bio) { 225 ret = -ENOMEM; 226 goto err_read_bio; 227 } 228 read_bio->bi_iter.bi_sector = src_blk >> SECTOR_SHIFT; 229 read_bio->bi_iter.bi_size = copy_len; 230 __bio_add_page(read_bio, token, PAGE_SIZE, 0); 231 ret = submit_bio_wait(read_bio); 232 if (ret) { 233 bio_put(read_bio); 234 goto err_read_bio; 235 } 236 bio_put(read_bio); 237 ctx = kzalloc(sizeof(struct copy_ctx), gfp_mask); 238 if (!ctx) { 239 ret = -ENOMEM; 240 goto err_read_bio; 241 } 242 ctx->cio = cio; 243 ctx->range_idx = ri; 244 ctx->start_sec = rlist[ri].src; 245 246 write_bio = bio_alloc(dst_bdev, 1, REQ_OP_WRITE | REQ_COPY | REQ_NOMERGE, 247 gfp_mask); 248 if (!write_bio) { 249 ret = -ENOMEM; 250 goto err_read_bio; 251 } 252 253 write_bio->bi_iter.bi_sector = dst_blk >> SECTOR_SHIFT; 254 write_bio->bi_iter.bi_size = copy_len; 255 __bio_add_page(write_bio, token, PAGE_SIZE, 0); 256 write_bio->bi_end_io = bio_copy_end_io; 257 write_bio->bi_private = ctx; 258 atomic_inc(&cio->refcount); 259 submit_bio(write_bio); 260 } 261 } 262 263 /* Wait for completion of all IO's*/ 264 return cio_await_completion(cio); 265 266 err_read_bio: 267 __free_page(token); 268 err_token: 269 rlist[ri].comp_len = min_t(sector_t, rlist[ri].comp_len, (rlist[ri].len - remaining)); 270 271 cio->io_err = ret; 272 return cio_await_completion(cio); 273 } 274 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
On 2/7/22 23:13, Nitesh Shetty wrote: > Introduce blkdev_issue_copy which supports source and destination bdevs, > and a array of (source, destination and copy length) tuples. s/a/an > Introduce REQ_COP copy offload operation flag. Create a read-write REQ_COPY ? > bio pair with a token as payload and submitted to the device in order. > the read request populates token with source specific information which > is then passed with write request. > Ths design is courtsey Mikulas Patocka<mpatocka@>'s token based copy s/Ths design is courtsey/This design is courtesy of > > Larger copy operation may be divided if necessary by looking at device > limits. may or will ? by looking at -> depending on the ? > > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> > Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com> > Signed-off-by: Arnav Dawn <arnav.dawn@samsung.com> > --- > block/blk-lib.c | 216 ++++++++++++++++++++++++++++++++++++++ > block/blk-settings.c | 2 + > block/blk.h | 2 + > include/linux/blk_types.h | 20 ++++ > include/linux/blkdev.h | 3 + > include/uapi/linux/fs.h | 14 +++ > 6 files changed, 257 insertions(+) > > diff --git a/block/blk-lib.c b/block/blk-lib.c > index 1b8ced45e4e5..3ae2c27b566e 100644 > --- a/block/blk-lib.c > +++ b/block/blk-lib.c > @@ -135,6 +135,222 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, > } > EXPORT_SYMBOL(blkdev_issue_discard); > > +/* > + * Wait on and process all in-flight BIOs. This must only be called once > + * all bios have been issued so that the refcount can only decrease. > + * This just waits for all bios to make it through bio_copy_end_io. IO > + * errors are propagated through cio->io_error. > + */ > +static int cio_await_completion(struct cio *cio) > +{ > + int ret = 0; > + > + while (atomic_read(&cio->refcount)) { > + cio->waiter = current; > + __set_current_state(TASK_UNINTERRUPTIBLE); > + blk_io_schedule(); > + /* wake up sets us TASK_RUNNING */ > + cio->waiter = NULL; > + ret = cio->io_err; Why is this in the loop ? > + } > + kvfree(cio); > + > + return ret; > +} > + > +static void bio_copy_end_io(struct bio *bio) > +{ > + struct copy_ctx *ctx = bio->bi_private; > + struct cio *cio = ctx->cio; > + sector_t clen; > + int ri = ctx->range_idx; > + > + if (bio->bi_status) { > + cio->io_err = bio->bi_status; > + clen = (bio->bi_iter.bi_sector - ctx->start_sec) << SECTOR_SHIFT; > + cio->rlist[ri].comp_len = min_t(sector_t, clen, cio->rlist[ri].comp_len); > + } > + __free_page(bio->bi_io_vec[0].bv_page); > + kfree(ctx); > + bio_put(bio); > + > + if (atomic_dec_and_test(&cio->refcount) && cio->waiter) > + wake_up_process(cio->waiter); This looks racy: the cio->waiter test and wakeup are not atomic. > +} > + > +/* > + * blk_copy_offload - Use device's native copy offload feature > + * Go through user provide payload, prepare new payload based on device's copy offload limits. > + */ > +int blk_copy_offload(struct block_device *src_bdev, int nr_srcs, > + struct range_entry *rlist, struct block_device *dst_bdev, gfp_t gfp_mask) > +{ > + struct request_queue *sq = bdev_get_queue(src_bdev); > + struct request_queue *dq = bdev_get_queue(dst_bdev); > + struct bio *read_bio, *write_bio; > + struct copy_ctx *ctx; > + struct cio *cio; > + struct page *token; > + sector_t src_blk, copy_len, dst_blk; > + sector_t remaining, max_copy_len = LONG_MAX; > + int ri = 0, ret = 0; > + > + cio = kzalloc(sizeof(struct cio), GFP_KERNEL); > + if (!cio) > + return -ENOMEM; > + atomic_set(&cio->refcount, 0); > + cio->rlist = rlist; > + > + max_copy_len = min3(max_copy_len, (sector_t)sq->limits.max_copy_sectors, > + (sector_t)dq->limits.max_copy_sectors); sq->limits.max_copy_sectors is already by definition smaller than LONG_MAX, so there is no need for the min3 here. > + max_copy_len = min3(max_copy_len, (sector_t)sq->limits.max_copy_range_sectors, > + (sector_t)dq->limits.max_copy_range_sectors) << SECTOR_SHIFT;> + > + for (ri = 0; ri < nr_srcs; ri++) { > + cio->rlist[ri].comp_len = rlist[ri].len; > + for (remaining = rlist[ri].len, src_blk = rlist[ri].src, dst_blk = rlist[ri].dst; > + remaining > 0; > + remaining -= copy_len, src_blk += copy_len, dst_blk += copy_len) { This is unreadable. > + copy_len = min(remaining, max_copy_len); > + > + token = alloc_page(gfp_mask); > + if (unlikely(!token)) { > + ret = -ENOMEM; > + goto err_token; > + } > + > + read_bio = bio_alloc(src_bdev, 1, REQ_OP_READ | REQ_COPY | REQ_NOMERGE, > + gfp_mask); > + if (!read_bio) { > + ret = -ENOMEM; > + goto err_read_bio; > + } > + read_bio->bi_iter.bi_sector = src_blk >> SECTOR_SHIFT; > + read_bio->bi_iter.bi_size = copy_len; > + __bio_add_page(read_bio, token, PAGE_SIZE, 0); > + ret = submit_bio_wait(read_bio); > + if (ret) { > + bio_put(read_bio); > + goto err_read_bio; > + } > + bio_put(read_bio); > + ctx = kzalloc(sizeof(struct copy_ctx), gfp_mask); > + if (!ctx) { > + ret = -ENOMEM; > + goto err_read_bio; > + } This should be done before the read. > + ctx->cio = cio; > + ctx->range_idx = ri; > + ctx->start_sec = rlist[ri].src; > + > + write_bio = bio_alloc(dst_bdev, 1, REQ_OP_WRITE | REQ_COPY | REQ_NOMERGE, > + gfp_mask); > + if (!write_bio) { > + ret = -ENOMEM; > + goto err_read_bio; > + } > + > + write_bio->bi_iter.bi_sector = dst_blk >> SECTOR_SHIFT; > + write_bio->bi_iter.bi_size = copy_len; > + __bio_add_page(write_bio, token, PAGE_SIZE, 0); > + write_bio->bi_end_io = bio_copy_end_io; > + write_bio->bi_private = ctx; > + atomic_inc(&cio->refcount); > + submit_bio(write_bio); > + } > + } > + > + /* Wait for completion of all IO's*/ > + return cio_await_completion(cio); > + > +err_read_bio: > + __free_page(token); > +err_token: > + rlist[ri].comp_len = min_t(sector_t, rlist[ri].comp_len, (rlist[ri].len - remaining)); > + > + cio->io_err = ret; > + return cio_await_completion(cio); > +} > + > +static inline int blk_copy_sanity_check(struct block_device *src_bdev, > + struct block_device *dst_bdev, struct range_entry *rlist, int nr) > +{ > + unsigned int align_mask = max( > + bdev_logical_block_size(dst_bdev), bdev_logical_block_size(src_bdev)) - 1; > + sector_t len = 0; > + int i; > + > + for (i = 0; i < nr; i++) { > + if (rlist[i].len) > + len += rlist[i].len; > + else > + return -EINVAL; > + if ((rlist[i].dst & align_mask) || (rlist[i].src & align_mask) || > + (rlist[i].len & align_mask)) > + return -EINVAL; > + rlist[i].comp_len = 0; > + } > + > + if (!len && len >= MAX_COPY_TOTAL_LENGTH) > + return -EINVAL; > + > + return 0; > +} > + > +static inline bool blk_check_copy_offload(struct request_queue *src_q, > + struct request_queue *dest_q) > +{ > + if (dest_q->limits.copy_offload == BLK_COPY_OFFLOAD && > + src_q->limits.copy_offload == BLK_COPY_OFFLOAD) > + return true; > + > + return false; > +} > + > +/* > + * blkdev_issue_copy - queue a copy > + * @src_bdev: source block device > + * @nr_srcs: number of source ranges to copy > + * @src_rlist: array of source ranges > + * @dest_bdev: destination block device > + * @gfp_mask: memory allocation flags (for bio_alloc) > + * @flags: BLKDEV_COPY_* flags to control behaviour > + * > + * Description: > + * Copy source ranges from source block device to destination block device. > + * length of a source range cannot be zero. > + */ > +int blkdev_issue_copy(struct block_device *src_bdev, int nr, > + struct range_entry *rlist, struct block_device *dest_bdev, > + gfp_t gfp_mask, int flags) > +{ > + struct request_queue *src_q = bdev_get_queue(src_bdev); > + struct request_queue *dest_q = bdev_get_queue(dest_bdev); > + int ret = -EINVAL; > + > + if (!src_q || !dest_q) > + return -ENXIO; > + > + if (!nr) > + return -EINVAL; > + > + if (nr >= MAX_COPY_NR_RANGE) > + return -EINVAL; > + > + if (bdev_read_only(dest_bdev)) > + return -EPERM; > + > + ret = blk_copy_sanity_check(src_bdev, dest_bdev, rlist, nr); > + if (ret) > + return ret; > + > + if (blk_check_copy_offload(src_q, dest_q)) > + ret = blk_copy_offload(src_bdev, nr, rlist, dest_bdev, gfp_mask); > + > + return ret; > +} > +EXPORT_SYMBOL(blkdev_issue_copy); > + > /** > * __blkdev_issue_write_same - generate number of bios with same page > * @bdev: target blockdev > diff --git a/block/blk-settings.c b/block/blk-settings.c > index 818454552cf8..4c8d48b8af25 100644 > --- a/block/blk-settings.c > +++ b/block/blk-settings.c > @@ -545,6 +545,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, > t->max_segment_size = min_not_zero(t->max_segment_size, > b->max_segment_size); > > + t->max_copy_sectors = min_not_zero(t->max_copy_sectors, b->max_copy_sectors); Why min_not_zero ? If one of the underlying drive does not support copy offload, you cannot report that the top drive does. > + > t->misaligned |= b->misaligned; > > alignment = queue_limit_alignment_offset(b, start); > diff --git a/block/blk.h b/block/blk.h > index abb663a2a147..94d2b055750b 100644 > --- a/block/blk.h > +++ b/block/blk.h > @@ -292,6 +292,8 @@ static inline bool blk_may_split(struct request_queue *q, struct bio *bio) > break; > } > > + if (unlikely(op_is_copy(bio->bi_opf))) > + return false; > /* > * All drivers must accept single-segments bios that are <= PAGE_SIZE. > * This is a quick and dirty check that relies on the fact that > diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h > index 5561e58d158a..0a3fee8ad61c 100644 > --- a/include/linux/blk_types.h > +++ b/include/linux/blk_types.h > @@ -418,6 +418,7 @@ enum req_flag_bits { > /* for driver use */ > __REQ_DRV, > __REQ_SWAP, /* swapping request. */ > + __REQ_COPY, /* copy request*/ > __REQ_NR_BITS, /* stops here */ > }; > > @@ -442,6 +443,7 @@ enum req_flag_bits { > > #define REQ_DRV (1ULL << __REQ_DRV) > #define REQ_SWAP (1ULL << __REQ_SWAP) > +#define REQ_COPY (1ULL << __REQ_COPY) > > #define REQ_FAILFAST_MASK \ > (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) > @@ -498,6 +500,11 @@ static inline bool op_is_discard(unsigned int op) > return (op & REQ_OP_MASK) == REQ_OP_DISCARD; > } > > +static inline bool op_is_copy(unsigned int op) > +{ > + return (op & REQ_COPY); > +} > + > /* > * Check if a bio or request operation is a zone management operation, with > * the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case > @@ -532,4 +539,17 @@ struct blk_rq_stat { > u64 batch; > }; > > +struct cio { > + atomic_t refcount; > + blk_status_t io_err; > + struct range_entry *rlist; > + struct task_struct *waiter; /* waiting task (NULL if none) */ > +}; > + > +struct copy_ctx { > + int range_idx; > + sector_t start_sec; > + struct cio *cio; > +}; > + > #endif /* __LINUX_BLK_TYPES_H */ > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > index f63ae50f1de3..15597488040c 100644 > --- a/include/linux/blkdev.h > +++ b/include/linux/blkdev.h > @@ -1120,6 +1120,9 @@ extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, > struct bio **biop); > struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, > gfp_t gfp_mask); > +int blkdev_issue_copy(struct block_device *src_bdev, int nr_srcs, > + struct range_entry *src_rlist, struct block_device *dest_bdev, > + gfp_t gfp_mask, int flags); > > #define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */ > #define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */ > diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h > index bdf7b404b3e7..55bca8f6e8ed 100644 > --- a/include/uapi/linux/fs.h > +++ b/include/uapi/linux/fs.h > @@ -64,6 +64,20 @@ struct fstrim_range { > __u64 minlen; > }; > > +/* Maximum no of entries supported */ > +#define MAX_COPY_NR_RANGE (1 << 12) > + > +/* maximum total copy length */ > +#define MAX_COPY_TOTAL_LENGTH (1 << 21) > + > +/* Source range entry for copy */ > +struct range_entry { > + __u64 src; > + __u64 dst; > + __u64 len; > + __u64 comp_len; > +}; > + > /* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */ > #define FILE_DEDUPE_RANGE_SAME 0 > #define FILE_DEDUPE_RANGE_DIFFERS 1
Hi Nitesh, url: https://github.com/0day-ci/linux/commits/Nitesh-Shetty/block-make-bio_map_kern-non-static/20220207-231407 base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next config: i386-randconfig-m021-20220207 (https://download.01.org/0day-ci/archive/20220209/202202090703.U5riBMIn-lkp@intel.com/config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> Reported-by: Dan Carpenter <dan.carpenter@oracle.com> smatch warnings: block/blk-lib.c:272 blk_copy_offload() warn: possible memory leak of 'ctx' vim +/ctx +272 block/blk-lib.c 12a9801a7301f1 Nitesh Shetty 2022-02-07 185 int blk_copy_offload(struct block_device *src_bdev, int nr_srcs, 12a9801a7301f1 Nitesh Shetty 2022-02-07 186 struct range_entry *rlist, struct block_device *dst_bdev, gfp_t gfp_mask) 12a9801a7301f1 Nitesh Shetty 2022-02-07 187 { 12a9801a7301f1 Nitesh Shetty 2022-02-07 188 struct request_queue *sq = bdev_get_queue(src_bdev); 12a9801a7301f1 Nitesh Shetty 2022-02-07 189 struct request_queue *dq = bdev_get_queue(dst_bdev); 12a9801a7301f1 Nitesh Shetty 2022-02-07 190 struct bio *read_bio, *write_bio; 12a9801a7301f1 Nitesh Shetty 2022-02-07 191 struct copy_ctx *ctx; 12a9801a7301f1 Nitesh Shetty 2022-02-07 192 struct cio *cio; 12a9801a7301f1 Nitesh Shetty 2022-02-07 193 struct page *token; 12a9801a7301f1 Nitesh Shetty 2022-02-07 194 sector_t src_blk, copy_len, dst_blk; 12a9801a7301f1 Nitesh Shetty 2022-02-07 195 sector_t remaining, max_copy_len = LONG_MAX; 12a9801a7301f1 Nitesh Shetty 2022-02-07 196 int ri = 0, ret = 0; 12a9801a7301f1 Nitesh Shetty 2022-02-07 197 12a9801a7301f1 Nitesh Shetty 2022-02-07 198 cio = kzalloc(sizeof(struct cio), GFP_KERNEL); 12a9801a7301f1 Nitesh Shetty 2022-02-07 199 if (!cio) 12a9801a7301f1 Nitesh Shetty 2022-02-07 200 return -ENOMEM; 12a9801a7301f1 Nitesh Shetty 2022-02-07 201 atomic_set(&cio->refcount, 0); 12a9801a7301f1 Nitesh Shetty 2022-02-07 202 cio->rlist = rlist; 12a9801a7301f1 Nitesh Shetty 2022-02-07 203 12a9801a7301f1 Nitesh Shetty 2022-02-07 204 max_copy_len = min3(max_copy_len, (sector_t)sq->limits.max_copy_sectors, 12a9801a7301f1 Nitesh Shetty 2022-02-07 205 (sector_t)dq->limits.max_copy_sectors); 12a9801a7301f1 Nitesh Shetty 2022-02-07 206 max_copy_len = min3(max_copy_len, (sector_t)sq->limits.max_copy_range_sectors, 12a9801a7301f1 Nitesh Shetty 2022-02-07 207 (sector_t)dq->limits.max_copy_range_sectors) << SECTOR_SHIFT; 12a9801a7301f1 Nitesh Shetty 2022-02-07 208 12a9801a7301f1 Nitesh Shetty 2022-02-07 209 for (ri = 0; ri < nr_srcs; ri++) { 12a9801a7301f1 Nitesh Shetty 2022-02-07 210 cio->rlist[ri].comp_len = rlist[ri].len; 12a9801a7301f1 Nitesh Shetty 2022-02-07 211 for (remaining = rlist[ri].len, src_blk = rlist[ri].src, dst_blk = rlist[ri].dst; 12a9801a7301f1 Nitesh Shetty 2022-02-07 212 remaining > 0; 12a9801a7301f1 Nitesh Shetty 2022-02-07 213 remaining -= copy_len, src_blk += copy_len, dst_blk += copy_len) { 12a9801a7301f1 Nitesh Shetty 2022-02-07 214 copy_len = min(remaining, max_copy_len); 12a9801a7301f1 Nitesh Shetty 2022-02-07 215 12a9801a7301f1 Nitesh Shetty 2022-02-07 216 token = alloc_page(gfp_mask); 12a9801a7301f1 Nitesh Shetty 2022-02-07 217 if (unlikely(!token)) { 12a9801a7301f1 Nitesh Shetty 2022-02-07 218 ret = -ENOMEM; 12a9801a7301f1 Nitesh Shetty 2022-02-07 219 goto err_token; 12a9801a7301f1 Nitesh Shetty 2022-02-07 220 } 12a9801a7301f1 Nitesh Shetty 2022-02-07 221 12a9801a7301f1 Nitesh Shetty 2022-02-07 222 read_bio = bio_alloc(src_bdev, 1, REQ_OP_READ | REQ_COPY | REQ_NOMERGE, 12a9801a7301f1 Nitesh Shetty 2022-02-07 223 gfp_mask); 12a9801a7301f1 Nitesh Shetty 2022-02-07 224 if (!read_bio) { 12a9801a7301f1 Nitesh Shetty 2022-02-07 225 ret = -ENOMEM; 12a9801a7301f1 Nitesh Shetty 2022-02-07 226 goto err_read_bio; 12a9801a7301f1 Nitesh Shetty 2022-02-07 227 } 12a9801a7301f1 Nitesh Shetty 2022-02-07 228 read_bio->bi_iter.bi_sector = src_blk >> SECTOR_SHIFT; 12a9801a7301f1 Nitesh Shetty 2022-02-07 229 read_bio->bi_iter.bi_size = copy_len; 12a9801a7301f1 Nitesh Shetty 2022-02-07 230 __bio_add_page(read_bio, token, PAGE_SIZE, 0); 12a9801a7301f1 Nitesh Shetty 2022-02-07 231 ret = submit_bio_wait(read_bio); 12a9801a7301f1 Nitesh Shetty 2022-02-07 232 if (ret) { 12a9801a7301f1 Nitesh Shetty 2022-02-07 233 bio_put(read_bio); 12a9801a7301f1 Nitesh Shetty 2022-02-07 234 goto err_read_bio; 12a9801a7301f1 Nitesh Shetty 2022-02-07 235 } 12a9801a7301f1 Nitesh Shetty 2022-02-07 236 bio_put(read_bio); 12a9801a7301f1 Nitesh Shetty 2022-02-07 237 ctx = kzalloc(sizeof(struct copy_ctx), gfp_mask); 12a9801a7301f1 Nitesh Shetty 2022-02-07 238 if (!ctx) { 12a9801a7301f1 Nitesh Shetty 2022-02-07 239 ret = -ENOMEM; 12a9801a7301f1 Nitesh Shetty 2022-02-07 240 goto err_read_bio; 12a9801a7301f1 Nitesh Shetty 2022-02-07 241 } 12a9801a7301f1 Nitesh Shetty 2022-02-07 242 ctx->cio = cio; 12a9801a7301f1 Nitesh Shetty 2022-02-07 243 ctx->range_idx = ri; 12a9801a7301f1 Nitesh Shetty 2022-02-07 244 ctx->start_sec = rlist[ri].src; 12a9801a7301f1 Nitesh Shetty 2022-02-07 245 12a9801a7301f1 Nitesh Shetty 2022-02-07 246 write_bio = bio_alloc(dst_bdev, 1, REQ_OP_WRITE | REQ_COPY | REQ_NOMERGE, 12a9801a7301f1 Nitesh Shetty 2022-02-07 247 gfp_mask); 12a9801a7301f1 Nitesh Shetty 2022-02-07 248 if (!write_bio) { Please call kfree(ctx) before the goto. 12a9801a7301f1 Nitesh Shetty 2022-02-07 249 ret = -ENOMEM; 12a9801a7301f1 Nitesh Shetty 2022-02-07 250 goto err_read_bio; 12a9801a7301f1 Nitesh Shetty 2022-02-07 251 } 12a9801a7301f1 Nitesh Shetty 2022-02-07 252 12a9801a7301f1 Nitesh Shetty 2022-02-07 253 write_bio->bi_iter.bi_sector = dst_blk >> SECTOR_SHIFT; 12a9801a7301f1 Nitesh Shetty 2022-02-07 254 write_bio->bi_iter.bi_size = copy_len; 12a9801a7301f1 Nitesh Shetty 2022-02-07 255 __bio_add_page(write_bio, token, PAGE_SIZE, 0); 12a9801a7301f1 Nitesh Shetty 2022-02-07 256 write_bio->bi_end_io = bio_copy_end_io; 12a9801a7301f1 Nitesh Shetty 2022-02-07 257 write_bio->bi_private = ctx; 12a9801a7301f1 Nitesh Shetty 2022-02-07 258 atomic_inc(&cio->refcount); 12a9801a7301f1 Nitesh Shetty 2022-02-07 259 submit_bio(write_bio); 12a9801a7301f1 Nitesh Shetty 2022-02-07 260 } 12a9801a7301f1 Nitesh Shetty 2022-02-07 261 } 12a9801a7301f1 Nitesh Shetty 2022-02-07 262 12a9801a7301f1 Nitesh Shetty 2022-02-07 263 /* Wait for completion of all IO's*/ 12a9801a7301f1 Nitesh Shetty 2022-02-07 264 return cio_await_completion(cio); 12a9801a7301f1 Nitesh Shetty 2022-02-07 265 12a9801a7301f1 Nitesh Shetty 2022-02-07 266 err_read_bio: 12a9801a7301f1 Nitesh Shetty 2022-02-07 267 __free_page(token); 12a9801a7301f1 Nitesh Shetty 2022-02-07 268 err_token: 12a9801a7301f1 Nitesh Shetty 2022-02-07 269 rlist[ri].comp_len = min_t(sector_t, rlist[ri].comp_len, (rlist[ri].len - remaining)); 12a9801a7301f1 Nitesh Shetty 2022-02-07 270 12a9801a7301f1 Nitesh Shetty 2022-02-07 271 cio->io_err = ret; 12a9801a7301f1 Nitesh Shetty 2022-02-07 @272 return cio_await_completion(cio); 12a9801a7301f1 Nitesh Shetty 2022-02-07 273 } --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
O Tue, Feb 08, 2022 at 04:21:19PM +0900, Damien Le Moal wrote: > On 2/7/22 23:13, Nitesh Shetty wrote: > > Introduce blkdev_issue_copy which supports source and destination bdevs, > > and a array of (source, destination and copy length) tuples. > > s/a/an > acked > > Introduce REQ_COP copy offload operation flag. Create a read-write > > REQ_COPY ? > acked > > bio pair with a token as payload and submitted to the device in order. > > the read request populates token with source specific information which > > is then passed with write request. > > Ths design is courtsey Mikulas Patocka<mpatocka@>'s token based copy > > s/Ths design is courtsey/This design is courtesy of > acked > > > > Larger copy operation may be divided if necessary by looking at device > > limits. > > may or will ? > by looking at -> depending on the ? > Larger copy will be divided, based on max_copy_sectors,max_copy_range_sector limits. Will add in next series. > > > > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> > > Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com> > > Signed-off-by: Arnav Dawn <arnav.dawn@samsung.com> > > --- > > block/blk-lib.c | 216 ++++++++++++++++++++++++++++++++++++++ > > block/blk-settings.c | 2 + > > block/blk.h | 2 + > > include/linux/blk_types.h | 20 ++++ > > include/linux/blkdev.h | 3 + > > include/uapi/linux/fs.h | 14 +++ > > 6 files changed, 257 insertions(+) > > > > diff --git a/block/blk-lib.c b/block/blk-lib.c > > index 1b8ced45e4e5..3ae2c27b566e 100644 > > --- a/block/blk-lib.c > > +++ b/block/blk-lib.c > > @@ -135,6 +135,222 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, > > } > > EXPORT_SYMBOL(blkdev_issue_discard); > > > > +/* > > + * Wait on and process all in-flight BIOs. This must only be called once > > + * all bios have been issued so that the refcount can only decrease. > > + * This just waits for all bios to make it through bio_copy_end_io. IO > > + * errors are propagated through cio->io_error. > > + */ > > +static int cio_await_completion(struct cio *cio) > > +{ > > + int ret = 0; > > + > > + while (atomic_read(&cio->refcount)) { > > + cio->waiter = current; > > + __set_current_state(TASK_UNINTERRUPTIBLE); > > + blk_io_schedule(); > > + /* wake up sets us TASK_RUNNING */ > > + cio->waiter = NULL; > > + ret = cio->io_err; > > Why is this in the loop ? > agree. > > + } > > + kvfree(cio); > > + > > + return ret; > > +} > > + > > +static void bio_copy_end_io(struct bio *bio) > > +{ > > + struct copy_ctx *ctx = bio->bi_private; > > + struct cio *cio = ctx->cio; > > + sector_t clen; > > + int ri = ctx->range_idx; > > + > > + if (bio->bi_status) { > > + cio->io_err = bio->bi_status; > > + clen = (bio->bi_iter.bi_sector - ctx->start_sec) << SECTOR_SHIFT; > > + cio->rlist[ri].comp_len = min_t(sector_t, clen, cio->rlist[ri].comp_len); > > + } > > + __free_page(bio->bi_io_vec[0].bv_page); > > + kfree(ctx); > > + bio_put(bio); > > + > > + if (atomic_dec_and_test(&cio->refcount) && cio->waiter) > > + wake_up_process(cio->waiter); > > This looks racy: the cio->waiter test and wakeup are not atomic. agreed, will remove atomic for refcount and add if check and wakeup in locks in next version. > > +} > > + > > +/* > > + * blk_copy_offload - Use device's native copy offload feature > > + * Go through user provide payload, prepare new payload based on device's copy offload limits. > > + */ > > +int blk_copy_offload(struct block_device *src_bdev, int nr_srcs, > > + struct range_entry *rlist, struct block_device *dst_bdev, gfp_t gfp_mask) > > +{ > > + struct request_queue *sq = bdev_get_queue(src_bdev); > > + struct request_queue *dq = bdev_get_queue(dst_bdev); > > + struct bio *read_bio, *write_bio; > > + struct copy_ctx *ctx; > > + struct cio *cio; > > + struct page *token; > > + sector_t src_blk, copy_len, dst_blk; > > + sector_t remaining, max_copy_len = LONG_MAX; > > + int ri = 0, ret = 0; > > + > > + cio = kzalloc(sizeof(struct cio), GFP_KERNEL); > > + if (!cio) > > + return -ENOMEM; > > + atomic_set(&cio->refcount, 0); > > + cio->rlist = rlist; > > + > > + max_copy_len = min3(max_copy_len, (sector_t)sq->limits.max_copy_sectors, > > + (sector_t)dq->limits.max_copy_sectors); > > sq->limits.max_copy_sectors is already by definition smaller than > LONG_MAX, so there is no need for the min3 here. > acked > > + max_copy_len = min3(max_copy_len, (sector_t)sq->limits.max_copy_range_sectors, > > + (sector_t)dq->limits.max_copy_range_sectors) << SECTOR_SHIFT;> + > > + for (ri = 0; ri < nr_srcs; ri++) { > > + cio->rlist[ri].comp_len = rlist[ri].len; > > + for (remaining = rlist[ri].len, src_blk = rlist[ri].src, dst_blk = rlist[ri].dst; > > + remaining > 0; > > + remaining -= copy_len, src_blk += copy_len, dst_blk += copy_len) { > > This is unreadable. > Sure, I will simplify the loops in next version. > > + copy_len = min(remaining, max_copy_len); > > + > > + token = alloc_page(gfp_mask); > > + if (unlikely(!token)) { > > + ret = -ENOMEM; > > + goto err_token; > > + } > > + > > + read_bio = bio_alloc(src_bdev, 1, REQ_OP_READ | REQ_COPY | REQ_NOMERGE, > > + gfp_mask); > > + if (!read_bio) { > > + ret = -ENOMEM; > > + goto err_read_bio; > > + } > > + read_bio->bi_iter.bi_sector = src_blk >> SECTOR_SHIFT; > > + read_bio->bi_iter.bi_size = copy_len; > > + __bio_add_page(read_bio, token, PAGE_SIZE, 0); > > + ret = submit_bio_wait(read_bio); > > + if (ret) { > > + bio_put(read_bio); > > + goto err_read_bio; > > + } > > + bio_put(read_bio); > > + ctx = kzalloc(sizeof(struct copy_ctx), gfp_mask); > > + if (!ctx) { > > + ret = -ENOMEM; > > + goto err_read_bio; > > + } > > This should be done before the read. > acked. > > + ctx->cio = cio; > > + ctx->range_idx = ri; > > + ctx->start_sec = rlist[ri].src; > > + > > + write_bio = bio_alloc(dst_bdev, 1, REQ_OP_WRITE | REQ_COPY | REQ_NOMERGE, > > + gfp_mask); > > + if (!write_bio) { > > + ret = -ENOMEM; > > + goto err_read_bio; > > + } > > + > > + write_bio->bi_iter.bi_sector = dst_blk >> SECTOR_SHIFT; > > + write_bio->bi_iter.bi_size = copy_len; > > + __bio_add_page(write_bio, token, PAGE_SIZE, 0); > > + write_bio->bi_end_io = bio_copy_end_io; > > + write_bio->bi_private = ctx; > > + atomic_inc(&cio->refcount); > > + submit_bio(write_bio); > > + } > > + } > > + > > + /* Wait for completion of all IO's*/ > > + return cio_await_completion(cio); > > + > > +err_read_bio: > > + __free_page(token); > > +err_token: > > + rlist[ri].comp_len = min_t(sector_t, rlist[ri].comp_len, (rlist[ri].len - remaining)); > > + > > + cio->io_err = ret; > > + return cio_await_completion(cio); > > +} > > + > > +static inline int blk_copy_sanity_check(struct block_device *src_bdev, > > + struct block_device *dst_bdev, struct range_entry *rlist, int nr) > > +{ > > + unsigned int align_mask = max( > > + bdev_logical_block_size(dst_bdev), bdev_logical_block_size(src_bdev)) - 1; > > + sector_t len = 0; > > + int i; > > + > > + for (i = 0; i < nr; i++) { > > + if (rlist[i].len) > > + len += rlist[i].len; > > + else > > + return -EINVAL; > > + if ((rlist[i].dst & align_mask) || (rlist[i].src & align_mask) || > > + (rlist[i].len & align_mask)) > > + return -EINVAL; > > + rlist[i].comp_len = 0; > > + } > > + > > + if (!len && len >= MAX_COPY_TOTAL_LENGTH) > > + return -EINVAL; > > + > > + return 0; > > +} > > + > > +static inline bool blk_check_copy_offload(struct request_queue *src_q, > > + struct request_queue *dest_q) > > +{ > > + if (dest_q->limits.copy_offload == BLK_COPY_OFFLOAD && > > + src_q->limits.copy_offload == BLK_COPY_OFFLOAD) > > + return true; > > + > > + return false; > > +} > > + > > +/* > > + * blkdev_issue_copy - queue a copy > > + * @src_bdev: source block device > > + * @nr_srcs: number of source ranges to copy > > + * @src_rlist: array of source ranges > > + * @dest_bdev: destination block device > > + * @gfp_mask: memory allocation flags (for bio_alloc) > > + * @flags: BLKDEV_COPY_* flags to control behaviour > > + * > > + * Description: > > + * Copy source ranges from source block device to destination block device. > > + * length of a source range cannot be zero. > > + */ > > +int blkdev_issue_copy(struct block_device *src_bdev, int nr, > > + struct range_entry *rlist, struct block_device *dest_bdev, > > + gfp_t gfp_mask, int flags) > > +{ > > + struct request_queue *src_q = bdev_get_queue(src_bdev); > > + struct request_queue *dest_q = bdev_get_queue(dest_bdev); > > + int ret = -EINVAL; > > + > > + if (!src_q || !dest_q) > > + return -ENXIO; > > + > > + if (!nr) > > + return -EINVAL; > > + > > + if (nr >= MAX_COPY_NR_RANGE) > > + return -EINVAL; > > + > > + if (bdev_read_only(dest_bdev)) > > + return -EPERM; > > + > > + ret = blk_copy_sanity_check(src_bdev, dest_bdev, rlist, nr); > > + if (ret) > > + return ret; > > + > > + if (blk_check_copy_offload(src_q, dest_q)) > > + ret = blk_copy_offload(src_bdev, nr, rlist, dest_bdev, gfp_mask); > > + > > + return ret; > > +} > > +EXPORT_SYMBOL(blkdev_issue_copy); > > + > > /** > > * __blkdev_issue_write_same - generate number of bios with same page > > * @bdev: target blockdev > > diff --git a/block/blk-settings.c b/block/blk-settings.c > > index 818454552cf8..4c8d48b8af25 100644 > > --- a/block/blk-settings.c > > +++ b/block/blk-settings.c > > @@ -545,6 +545,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, > > t->max_segment_size = min_not_zero(t->max_segment_size, > > b->max_segment_size); > > > > + t->max_copy_sectors = min_not_zero(t->max_copy_sectors, b->max_copy_sectors); > > Why min_not_zero ? If one of the underlying drive does not support copy > offload, you cannot report that the top drive does. > agreed. Will update in next series. > > + > > t->misaligned |= b->misaligned; > > > > alignment = queue_limit_alignment_offset(b, start); > > diff --git a/block/blk.h b/block/blk.h > > index abb663a2a147..94d2b055750b 100644 > > --- a/block/blk.h > > +++ b/block/blk.h > > @@ -292,6 +292,8 @@ static inline bool blk_may_split(struct request_queue *q, struct bio *bio) > > break; > > } > > > > + if (unlikely(op_is_copy(bio->bi_opf))) > > + return false; > > /* > > * All drivers must accept single-segments bios that are <= PAGE_SIZE. > > * This is a quick and dirty check that relies on the fact that > > diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h > > index 5561e58d158a..0a3fee8ad61c 100644 > > --- a/include/linux/blk_types.h > > +++ b/include/linux/blk_types.h > > @@ -418,6 +418,7 @@ enum req_flag_bits { > > /* for driver use */ > > __REQ_DRV, > > __REQ_SWAP, /* swapping request. */ > > + __REQ_COPY, /* copy request*/ > > __REQ_NR_BITS, /* stops here */ > > }; > > > > @@ -442,6 +443,7 @@ enum req_flag_bits { > > > > #define REQ_DRV (1ULL << __REQ_DRV) > > #define REQ_SWAP (1ULL << __REQ_SWAP) > > +#define REQ_COPY (1ULL << __REQ_COPY) > > > > #define REQ_FAILFAST_MASK \ > > (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) > > @@ -498,6 +500,11 @@ static inline bool op_is_discard(unsigned int op) > > return (op & REQ_OP_MASK) == REQ_OP_DISCARD; > > } > > > > +static inline bool op_is_copy(unsigned int op) > > +{ > > + return (op & REQ_COPY); > > +} > > + > > /* > > * Check if a bio or request operation is a zone management operation, with > > * the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case > > @@ -532,4 +539,17 @@ struct blk_rq_stat { > > u64 batch; > > }; > > > > +struct cio { > > + atomic_t refcount; > > + blk_status_t io_err; > > + struct range_entry *rlist; > > + struct task_struct *waiter; /* waiting task (NULL if none) */ > > +}; > > + > > +struct copy_ctx { > > + int range_idx; > > + sector_t start_sec; > > + struct cio *cio; > > +}; > > + > > #endif /* __LINUX_BLK_TYPES_H */ > > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > > index f63ae50f1de3..15597488040c 100644 > > --- a/include/linux/blkdev.h > > +++ b/include/linux/blkdev.h > > @@ -1120,6 +1120,9 @@ extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, > > struct bio **biop); > > struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, > > gfp_t gfp_mask); > > +int blkdev_issue_copy(struct block_device *src_bdev, int nr_srcs, > > + struct range_entry *src_rlist, struct block_device *dest_bdev, > > + gfp_t gfp_mask, int flags); > > > > #define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */ > > #define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */ > > diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h > > index bdf7b404b3e7..55bca8f6e8ed 100644 > > --- a/include/uapi/linux/fs.h > > +++ b/include/uapi/linux/fs.h > > @@ -64,6 +64,20 @@ struct fstrim_range { > > __u64 minlen; > > }; > > > > +/* Maximum no of entries supported */ > > +#define MAX_COPY_NR_RANGE (1 << 12) > > + > > +/* maximum total copy length */ > > +#define MAX_COPY_TOTAL_LENGTH (1 << 21) > > + > > +/* Source range entry for copy */ > > +struct range_entry { > > + __u64 src; > > + __u64 dst; > > + __u64 len; > > + __u64 comp_len; > > +}; > > + > > /* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */ > > #define FILE_DEDUPE_RANGE_SAME 0 > > #define FILE_DEDUPE_RANGE_DIFFERS 1 > > > -- > Damien Le Moal > Western Digital Research > -- Thank you Nitesh
On Wed, Feb 09, 2022 at 10:48:44AM +0300, Dan Carpenter wrote: > Hi Nitesh, > > url: https://protect2.fireeye.com/v1/url?k=483798a4-17aca1b5-483613eb-0cc47a31cdbc-db5fd22936f47f46&q=1&e=e5a0c082-878d-4bbf-be36-3c8e34773475&u=https%3A%2F%2Fgithub.com%2F0day-ci%2Flinux%2Fcommits%2FNitesh-Shetty%2Fblock-make-bio_map_kern-non-static%2F20220207-231407 > base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next > config: i386-randconfig-m021-20220207 (https://protect2.fireeye.com/v1/url?k=24e309ba-7b7830ab-24e282f5-0cc47a31cdbc-9cc4e76aaefa8c0d&q=1&e=e5a0c082-878d-4bbf-be36-3c8e34773475&u=https%3A%2F%2Fdownload.01.org%2F0day-ci%2Farchive%2F20220209%2F202202090703.U5riBMIn-lkp%40intel.com%2Fconfig) > compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 > > If you fix the issue, kindly add following tag as appropriate > Reported-by: kernel test robot <lkp@intel.com> > Reported-by: Dan Carpenter <dan.carpenter@oracle.com> > > smatch warnings: > block/blk-lib.c:272 blk_copy_offload() warn: possible memory leak of 'ctx' > > vim +/ctx +272 block/blk-lib.c > acked > 12a9801a7301f1 Nitesh Shetty 2022-02-07 185 int blk_copy_offload(struct block_device *src_bdev, int nr_srcs, > 12a9801a7301f1 Nitesh Shetty 2022-02-07 186 struct range_entry *rlist, struct block_device *dst_bdev, gfp_t gfp_mask) > 12a9801a7301f1 Nitesh Shetty 2022-02-07 187 { > 12a9801a7301f1 Nitesh Shetty 2022-02-07 188 struct request_queue *sq = bdev_get_queue(src_bdev); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 189 struct request_queue *dq = bdev_get_queue(dst_bdev); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 190 struct bio *read_bio, *write_bio; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 191 struct copy_ctx *ctx; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 192 struct cio *cio; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 193 struct page *token; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 194 sector_t src_blk, copy_len, dst_blk; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 195 sector_t remaining, max_copy_len = LONG_MAX; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 196 int ri = 0, ret = 0; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 197 > 12a9801a7301f1 Nitesh Shetty 2022-02-07 198 cio = kzalloc(sizeof(struct cio), GFP_KERNEL); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 199 if (!cio) > 12a9801a7301f1 Nitesh Shetty 2022-02-07 200 return -ENOMEM; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 201 atomic_set(&cio->refcount, 0); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 202 cio->rlist = rlist; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 203 > 12a9801a7301f1 Nitesh Shetty 2022-02-07 204 max_copy_len = min3(max_copy_len, (sector_t)sq->limits.max_copy_sectors, > 12a9801a7301f1 Nitesh Shetty 2022-02-07 205 (sector_t)dq->limits.max_copy_sectors); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 206 max_copy_len = min3(max_copy_len, (sector_t)sq->limits.max_copy_range_sectors, > 12a9801a7301f1 Nitesh Shetty 2022-02-07 207 (sector_t)dq->limits.max_copy_range_sectors) << SECTOR_SHIFT; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 208 > 12a9801a7301f1 Nitesh Shetty 2022-02-07 209 for (ri = 0; ri < nr_srcs; ri++) { > 12a9801a7301f1 Nitesh Shetty 2022-02-07 210 cio->rlist[ri].comp_len = rlist[ri].len; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 211 for (remaining = rlist[ri].len, src_blk = rlist[ri].src, dst_blk = rlist[ri].dst; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 212 remaining > 0; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 213 remaining -= copy_len, src_blk += copy_len, dst_blk += copy_len) { > 12a9801a7301f1 Nitesh Shetty 2022-02-07 214 copy_len = min(remaining, max_copy_len); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 215 > 12a9801a7301f1 Nitesh Shetty 2022-02-07 216 token = alloc_page(gfp_mask); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 217 if (unlikely(!token)) { > 12a9801a7301f1 Nitesh Shetty 2022-02-07 218 ret = -ENOMEM; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 219 goto err_token; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 220 } > 12a9801a7301f1 Nitesh Shetty 2022-02-07 221 > 12a9801a7301f1 Nitesh Shetty 2022-02-07 222 read_bio = bio_alloc(src_bdev, 1, REQ_OP_READ | REQ_COPY | REQ_NOMERGE, > 12a9801a7301f1 Nitesh Shetty 2022-02-07 223 gfp_mask); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 224 if (!read_bio) { > 12a9801a7301f1 Nitesh Shetty 2022-02-07 225 ret = -ENOMEM; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 226 goto err_read_bio; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 227 } > 12a9801a7301f1 Nitesh Shetty 2022-02-07 228 read_bio->bi_iter.bi_sector = src_blk >> SECTOR_SHIFT; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 229 read_bio->bi_iter.bi_size = copy_len; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 230 __bio_add_page(read_bio, token, PAGE_SIZE, 0); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 231 ret = submit_bio_wait(read_bio); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 232 if (ret) { > 12a9801a7301f1 Nitesh Shetty 2022-02-07 233 bio_put(read_bio); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 234 goto err_read_bio; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 235 } > 12a9801a7301f1 Nitesh Shetty 2022-02-07 236 bio_put(read_bio); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 237 ctx = kzalloc(sizeof(struct copy_ctx), gfp_mask); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 238 if (!ctx) { > 12a9801a7301f1 Nitesh Shetty 2022-02-07 239 ret = -ENOMEM; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 240 goto err_read_bio; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 241 } > 12a9801a7301f1 Nitesh Shetty 2022-02-07 242 ctx->cio = cio; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 243 ctx->range_idx = ri; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 244 ctx->start_sec = rlist[ri].src; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 245 > 12a9801a7301f1 Nitesh Shetty 2022-02-07 246 write_bio = bio_alloc(dst_bdev, 1, REQ_OP_WRITE | REQ_COPY | REQ_NOMERGE, > 12a9801a7301f1 Nitesh Shetty 2022-02-07 247 gfp_mask); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 248 if (!write_bio) { > > Please call kfree(ctx) before the goto. > > 12a9801a7301f1 Nitesh Shetty 2022-02-07 249 ret = -ENOMEM; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 250 goto err_read_bio; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 251 } > 12a9801a7301f1 Nitesh Shetty 2022-02-07 252 > 12a9801a7301f1 Nitesh Shetty 2022-02-07 253 write_bio->bi_iter.bi_sector = dst_blk >> SECTOR_SHIFT; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 254 write_bio->bi_iter.bi_size = copy_len; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 255 __bio_add_page(write_bio, token, PAGE_SIZE, 0); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 256 write_bio->bi_end_io = bio_copy_end_io; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 257 write_bio->bi_private = ctx; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 258 atomic_inc(&cio->refcount); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 259 submit_bio(write_bio); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 260 } > 12a9801a7301f1 Nitesh Shetty 2022-02-07 261 } > 12a9801a7301f1 Nitesh Shetty 2022-02-07 262 > 12a9801a7301f1 Nitesh Shetty 2022-02-07 263 /* Wait for completion of all IO's*/ > 12a9801a7301f1 Nitesh Shetty 2022-02-07 264 return cio_await_completion(cio); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 265 > 12a9801a7301f1 Nitesh Shetty 2022-02-07 266 err_read_bio: > 12a9801a7301f1 Nitesh Shetty 2022-02-07 267 __free_page(token); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 268 err_token: > 12a9801a7301f1 Nitesh Shetty 2022-02-07 269 rlist[ri].comp_len = min_t(sector_t, rlist[ri].comp_len, (rlist[ri].len - remaining)); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 270 > 12a9801a7301f1 Nitesh Shetty 2022-02-07 271 cio->io_err = ret; > 12a9801a7301f1 Nitesh Shetty 2022-02-07 @272 return cio_await_completion(cio); > 12a9801a7301f1 Nitesh Shetty 2022-02-07 273 } > > --- > 0-DAY CI Kernel Test Service, Intel Corporation > https://protect2.fireeye.com/v1/url?k=4cd82b59-13431248-4cd9a016-0cc47a31cdbc-7ef30a0abcb321a3&q=1&e=e5a0c082-878d-4bbf-be36-3c8e34773475&u=https%3A%2F%2Flists.01.org%2Fhyperkitty%2Flist%2Fkbuild-all%40lists.01.org > > > -- Thank you Nitesh
diff --git a/block/blk-lib.c b/block/blk-lib.c index 1b8ced45e4e5..3ae2c27b566e 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -135,6 +135,222 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, } EXPORT_SYMBOL(blkdev_issue_discard); +/* + * Wait on and process all in-flight BIOs. This must only be called once + * all bios have been issued so that the refcount can only decrease. + * This just waits for all bios to make it through bio_copy_end_io. IO + * errors are propagated through cio->io_error. + */ +static int cio_await_completion(struct cio *cio) +{ + int ret = 0; + + while (atomic_read(&cio->refcount)) { + cio->waiter = current; + __set_current_state(TASK_UNINTERRUPTIBLE); + blk_io_schedule(); + /* wake up sets us TASK_RUNNING */ + cio->waiter = NULL; + ret = cio->io_err; + } + kvfree(cio); + + return ret; +} + +static void bio_copy_end_io(struct bio *bio) +{ + struct copy_ctx *ctx = bio->bi_private; + struct cio *cio = ctx->cio; + sector_t clen; + int ri = ctx->range_idx; + + if (bio->bi_status) { + cio->io_err = bio->bi_status; + clen = (bio->bi_iter.bi_sector - ctx->start_sec) << SECTOR_SHIFT; + cio->rlist[ri].comp_len = min_t(sector_t, clen, cio->rlist[ri].comp_len); + } + __free_page(bio->bi_io_vec[0].bv_page); + kfree(ctx); + bio_put(bio); + + if (atomic_dec_and_test(&cio->refcount) && cio->waiter) + wake_up_process(cio->waiter); +} + +/* + * blk_copy_offload - Use device's native copy offload feature + * Go through user provide payload, prepare new payload based on device's copy offload limits. + */ +int blk_copy_offload(struct block_device *src_bdev, int nr_srcs, + struct range_entry *rlist, struct block_device *dst_bdev, gfp_t gfp_mask) +{ + struct request_queue *sq = bdev_get_queue(src_bdev); + struct request_queue *dq = bdev_get_queue(dst_bdev); + struct bio *read_bio, *write_bio; + struct copy_ctx *ctx; + struct cio *cio; + struct page *token; + sector_t src_blk, copy_len, dst_blk; + sector_t remaining, max_copy_len = LONG_MAX; + int ri = 0, ret = 0; + + cio = kzalloc(sizeof(struct cio), GFP_KERNEL); + if (!cio) + return -ENOMEM; + atomic_set(&cio->refcount, 0); + cio->rlist = rlist; + + max_copy_len = min3(max_copy_len, (sector_t)sq->limits.max_copy_sectors, + (sector_t)dq->limits.max_copy_sectors); + max_copy_len = min3(max_copy_len, (sector_t)sq->limits.max_copy_range_sectors, + (sector_t)dq->limits.max_copy_range_sectors) << SECTOR_SHIFT; + + for (ri = 0; ri < nr_srcs; ri++) { + cio->rlist[ri].comp_len = rlist[ri].len; + for (remaining = rlist[ri].len, src_blk = rlist[ri].src, dst_blk = rlist[ri].dst; + remaining > 0; + remaining -= copy_len, src_blk += copy_len, dst_blk += copy_len) { + copy_len = min(remaining, max_copy_len); + + token = alloc_page(gfp_mask); + if (unlikely(!token)) { + ret = -ENOMEM; + goto err_token; + } + + read_bio = bio_alloc(src_bdev, 1, REQ_OP_READ | REQ_COPY | REQ_NOMERGE, + gfp_mask); + if (!read_bio) { + ret = -ENOMEM; + goto err_read_bio; + } + read_bio->bi_iter.bi_sector = src_blk >> SECTOR_SHIFT; + read_bio->bi_iter.bi_size = copy_len; + __bio_add_page(read_bio, token, PAGE_SIZE, 0); + ret = submit_bio_wait(read_bio); + if (ret) { + bio_put(read_bio); + goto err_read_bio; + } + bio_put(read_bio); + ctx = kzalloc(sizeof(struct copy_ctx), gfp_mask); + if (!ctx) { + ret = -ENOMEM; + goto err_read_bio; + } + ctx->cio = cio; + ctx->range_idx = ri; + ctx->start_sec = rlist[ri].src; + + write_bio = bio_alloc(dst_bdev, 1, REQ_OP_WRITE | REQ_COPY | REQ_NOMERGE, + gfp_mask); + if (!write_bio) { + ret = -ENOMEM; + goto err_read_bio; + } + + write_bio->bi_iter.bi_sector = dst_blk >> SECTOR_SHIFT; + write_bio->bi_iter.bi_size = copy_len; + __bio_add_page(write_bio, token, PAGE_SIZE, 0); + write_bio->bi_end_io = bio_copy_end_io; + write_bio->bi_private = ctx; + atomic_inc(&cio->refcount); + submit_bio(write_bio); + } + } + + /* Wait for completion of all IO's*/ + return cio_await_completion(cio); + +err_read_bio: + __free_page(token); +err_token: + rlist[ri].comp_len = min_t(sector_t, rlist[ri].comp_len, (rlist[ri].len - remaining)); + + cio->io_err = ret; + return cio_await_completion(cio); +} + +static inline int blk_copy_sanity_check(struct block_device *src_bdev, + struct block_device *dst_bdev, struct range_entry *rlist, int nr) +{ + unsigned int align_mask = max( + bdev_logical_block_size(dst_bdev), bdev_logical_block_size(src_bdev)) - 1; + sector_t len = 0; + int i; + + for (i = 0; i < nr; i++) { + if (rlist[i].len) + len += rlist[i].len; + else + return -EINVAL; + if ((rlist[i].dst & align_mask) || (rlist[i].src & align_mask) || + (rlist[i].len & align_mask)) + return -EINVAL; + rlist[i].comp_len = 0; + } + + if (!len && len >= MAX_COPY_TOTAL_LENGTH) + return -EINVAL; + + return 0; +} + +static inline bool blk_check_copy_offload(struct request_queue *src_q, + struct request_queue *dest_q) +{ + if (dest_q->limits.copy_offload == BLK_COPY_OFFLOAD && + src_q->limits.copy_offload == BLK_COPY_OFFLOAD) + return true; + + return false; +} + +/* + * blkdev_issue_copy - queue a copy + * @src_bdev: source block device + * @nr_srcs: number of source ranges to copy + * @src_rlist: array of source ranges + * @dest_bdev: destination block device + * @gfp_mask: memory allocation flags (for bio_alloc) + * @flags: BLKDEV_COPY_* flags to control behaviour + * + * Description: + * Copy source ranges from source block device to destination block device. + * length of a source range cannot be zero. + */ +int blkdev_issue_copy(struct block_device *src_bdev, int nr, + struct range_entry *rlist, struct block_device *dest_bdev, + gfp_t gfp_mask, int flags) +{ + struct request_queue *src_q = bdev_get_queue(src_bdev); + struct request_queue *dest_q = bdev_get_queue(dest_bdev); + int ret = -EINVAL; + + if (!src_q || !dest_q) + return -ENXIO; + + if (!nr) + return -EINVAL; + + if (nr >= MAX_COPY_NR_RANGE) + return -EINVAL; + + if (bdev_read_only(dest_bdev)) + return -EPERM; + + ret = blk_copy_sanity_check(src_bdev, dest_bdev, rlist, nr); + if (ret) + return ret; + + if (blk_check_copy_offload(src_q, dest_q)) + ret = blk_copy_offload(src_bdev, nr, rlist, dest_bdev, gfp_mask); + + return ret; +} +EXPORT_SYMBOL(blkdev_issue_copy); + /** * __blkdev_issue_write_same - generate number of bios with same page * @bdev: target blockdev diff --git a/block/blk-settings.c b/block/blk-settings.c index 818454552cf8..4c8d48b8af25 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -545,6 +545,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size); + t->max_copy_sectors = min_not_zero(t->max_copy_sectors, b->max_copy_sectors); + t->misaligned |= b->misaligned; alignment = queue_limit_alignment_offset(b, start); diff --git a/block/blk.h b/block/blk.h index abb663a2a147..94d2b055750b 100644 --- a/block/blk.h +++ b/block/blk.h @@ -292,6 +292,8 @@ static inline bool blk_may_split(struct request_queue *q, struct bio *bio) break; } + if (unlikely(op_is_copy(bio->bi_opf))) + return false; /* * All drivers must accept single-segments bios that are <= PAGE_SIZE. * This is a quick and dirty check that relies on the fact that diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 5561e58d158a..0a3fee8ad61c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -418,6 +418,7 @@ enum req_flag_bits { /* for driver use */ __REQ_DRV, __REQ_SWAP, /* swapping request. */ + __REQ_COPY, /* copy request*/ __REQ_NR_BITS, /* stops here */ }; @@ -442,6 +443,7 @@ enum req_flag_bits { #define REQ_DRV (1ULL << __REQ_DRV) #define REQ_SWAP (1ULL << __REQ_SWAP) +#define REQ_COPY (1ULL << __REQ_COPY) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) @@ -498,6 +500,11 @@ static inline bool op_is_discard(unsigned int op) return (op & REQ_OP_MASK) == REQ_OP_DISCARD; } +static inline bool op_is_copy(unsigned int op) +{ + return (op & REQ_COPY); +} + /* * Check if a bio or request operation is a zone management operation, with * the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case @@ -532,4 +539,17 @@ struct blk_rq_stat { u64 batch; }; +struct cio { + atomic_t refcount; + blk_status_t io_err; + struct range_entry *rlist; + struct task_struct *waiter; /* waiting task (NULL if none) */ +}; + +struct copy_ctx { + int range_idx; + sector_t start_sec; + struct cio *cio; +}; + #endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f63ae50f1de3..15597488040c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1120,6 +1120,9 @@ extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, struct bio **biop); struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, gfp_t gfp_mask); +int blkdev_issue_copy(struct block_device *src_bdev, int nr_srcs, + struct range_entry *src_rlist, struct block_device *dest_bdev, + gfp_t gfp_mask, int flags); #define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */ #define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index bdf7b404b3e7..55bca8f6e8ed 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -64,6 +64,20 @@ struct fstrim_range { __u64 minlen; }; +/* Maximum no of entries supported */ +#define MAX_COPY_NR_RANGE (1 << 12) + +/* maximum total copy length */ +#define MAX_COPY_TOTAL_LENGTH (1 << 21) + +/* Source range entry for copy */ +struct range_entry { + __u64 src; + __u64 dst; + __u64 len; + __u64 comp_len; +}; + /* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */ #define FILE_DEDUPE_RANGE_SAME 0 #define FILE_DEDUPE_RANGE_DIFFERS 1