@@ -252,6 +252,20 @@ Description:
write_zeroes_max_bytes is 0, write zeroes is not supported
by the device.
+What: /sys/block/<disk>/queue/verify_max_bytes
+Date: Nov 2021
+Contact: Chaitanya Kulkarni <kch@nvidia.com>
+Description:
+ Devices that support verify operation in which a single
+ request can be issued to verify the range of the contiguous
+ blocks on the storage without any payload in the request.
+ This can be used to optimize verifying LBAs on the device
+ without reading by offloading functionality. verify_max_bytes
+ indicates how many bytes can be written in a single verify
+ command. If verify_max_bytes is 0, verify operation is not
+ supported by the device.
+
+
What: /sys/block/<disk>/queue/zoned
Date: September 2016
Contact: Damien Le Moal <damien.lemoal@wdc.com>
@@ -141,6 +141,7 @@ static const char *const blk_op_name[] = {
REQ_OP_NAME(ZONE_APPEND),
REQ_OP_NAME(WRITE_SAME),
REQ_OP_NAME(WRITE_ZEROES),
+ REQ_OP_NAME(VERIFY),
REQ_OP_NAME(SCSI_IN),
REQ_OP_NAME(SCSI_OUT),
REQ_OP_NAME(DRV_IN),
@@ -851,6 +852,10 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio)
if (!q->limits.max_write_same_sectors)
goto not_supported;
break;
+ case REQ_OP_VERIFY:
+ if (!q->limits.max_verify_sectors)
+ goto not_supported;
+ break;
case REQ_OP_ZONE_APPEND:
status = blk_check_zone_append(q, bio);
if (status != BLK_STS_OK)
@@ -439,3 +439,195 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
return ret;
}
EXPORT_SYMBOL(blkdev_issue_zeroout);
+
+/**
+ * __blkdev_emulate_verify - emulate number of verify operations
+ * asynchronously
+ * @bdev: blockdev to issue
+ * @sector: start sector
+ * @nr_sects: number of sectors to verify
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ * @biop: pointer to anchor bio
+ * @buf: data buffer to mapped on bio
+ *
+ * Description:
+ * Verify a block range by emulating REQ_OP_VERIFY, use this when H/W
+ * offloading is not supported asynchronously. Caller is responsible to
+ * handle anchored bio.
+ */
+int __blkdev_emulate_verify(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, char *buf)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+ struct bio *bio = *biop;
+ unsigned int sz;
+ int bi_size;
+
+ if (!q)
+ return -ENXIO;
+
+ if (bdev_read_only(bdev))
+ return -EPERM;
+
+ while (nr_sects != 0) {
+ bio = blk_next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
+ gfp_mask);
+ bio->bi_iter.bi_sector = sector;
+ bio_set_dev(bio, bdev);
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
+
+ while (nr_sects != 0) {
+ bool is_vaddr = is_vmalloc_addr(buf);
+ struct page *p;
+
+ p = is_vaddr ? vmalloc_to_page(buf) : virt_to_page(buf);
+ sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
+ bi_size = bio_add_page(bio, p, sz, offset_in_page(buf));
+ nr_sects -= bi_size >> 9;
+ sector += bi_size >> 9;
+ buf += bi_size;
+
+ if (bi_size < sz)
+ break;
+ }
+ cond_resched();
+ }
+
+ *biop = bio;
+ return 0;
+}
+EXPORT_SYMBOL(__blkdev_emulate_verify);
+
+/**
+ * blkdev_emulate_verify - emulate number of verify operations synchronously
+ * @bdev: blockdev to issue
+ * @sector: start sector
+ * @nr_sects: number of sectors to verify
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ *
+ * Description:
+ * Verify a block range by emulating REQ_OP_VERIFY, use this when H/W
+ * offloading is not supported synchronously.
+ */
+int blkdev_emulate_verify(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask)
+{
+ sector_t min_io_sect = (BIO_MAX_VECS << PAGE_SHIFT) >> 9;
+ int ret = 0;
+ char *buf;
+
+ /* allows pages in buffer to be == BIO_MAX_VECS */
+ buf = kzalloc(min_io_sect << 9, GFP_KERNEL);
+ if (!buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ while (nr_sects > 0) {
+ sector_t curr_sects = min_t(sector_t, nr_sects, min_io_sect);
+ struct bio *bio = NULL;
+
+ ret = __blkdev_emulate_verify(bdev, sector, curr_sects,
+ GFP_KERNEL, &bio, buf);
+
+ if (!(ret == 0 && bio))
+ break;
+
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
+
+ nr_sects -= curr_sects;
+ sector += curr_sects;
+ }
+out:
+ kfree(buf);
+ return ret;
+}
+EXPORT_SYMBOL(blkdev_emulate_verify);
+
+/**
+ * __blkdev_issue_verify - generate number of verify operations
+ * @bdev: blockdev to issue
+ * @sector: start sector
+ * @nr_sects: number of sectors to verify
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ * @biop: pointer to anchor bio
+ *
+ * Description:
+ * Verify a block range using hardware offload.
+ *
+ * The function will emulate verify operation if no explicit hardware
+ * offloading for verifying is provided.
+ */
+int __blkdev_issue_verify(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask, struct bio **biop)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+ unsigned int max_verify_sectors;
+ struct bio *bio = *biop;
+
+ if (!q)
+ return -ENXIO;
+
+ if (bdev_read_only(bdev))
+ return -EPERM;
+
+ max_verify_sectors = bdev_verify_sectors(bdev);
+
+ if (max_verify_sectors == 0)
+ return blkdev_emulate_verify(bdev, sector, nr_sects, gfp_mask);
+
+ while (nr_sects) {
+ bio = blk_next_bio(bio, 0, gfp_mask);
+ bio->bi_iter.bi_sector = sector;
+ bio_set_dev(bio, bdev);
+ bio->bi_opf = REQ_OP_VERIFY;
+ if (nr_sects > max_verify_sectors) {
+ bio->bi_iter.bi_size = max_verify_sectors << 9;
+ nr_sects -= max_verify_sectors;
+ sector += max_verify_sectors;
+ } else {
+ bio->bi_iter.bi_size = nr_sects << 9;
+ nr_sects = 0;
+ }
+ cond_resched();
+ }
+
+ *biop = bio;
+ return 0;
+}
+EXPORT_SYMBOL(__blkdev_issue_verify);
+
+/**
+ * blkdev_issue_verify - verify a block range
+ * @bdev: blockdev to verify
+ * @sector: start sector
+ * @nr_sects: number of sectors to verify
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ *
+ * Description:
+ * Verify a block range using hardware offload.
+ */
+int blkdev_issue_verify(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask)
+{
+ int ret = 0;
+ sector_t bs_mask;
+ struct bio *bio = NULL;
+ struct blk_plug plug;
+
+ bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
+ if ((sector | nr_sects) & bs_mask)
+ return -EINVAL;
+
+ blk_start_plug(&plug);
+ ret = __blkdev_issue_verify(bdev, sector, nr_sects, gfp_mask, &bio);
+ if (ret == 0 && bio) {
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
+ }
+ blk_finish_plug(&plug);
+
+ return ret;
+}
+EXPORT_SYMBOL(blkdev_issue_verify);
@@ -117,6 +117,20 @@ static struct bio *blk_bio_write_zeroes_split(struct request_queue *q,
return bio_split(bio, q->limits.max_write_zeroes_sectors, GFP_NOIO, bs);
}
+static struct bio *blk_bio_verify_split(struct request_queue *q,
+ struct bio *bio, struct bio_set *bs, unsigned *nsegs)
+{
+ *nsegs = 0;
+
+ if (!q->limits.max_verify_sectors)
+ return NULL;
+
+ if (bio_sectors(bio) <= q->limits.max_verify_sectors)
+ return NULL;
+
+ return bio_split(bio, q->limits.max_verify_sectors, GFP_NOIO, bs);
+}
+
static struct bio *blk_bio_write_same_split(struct request_queue *q,
struct bio *bio,
struct bio_set *bs,
@@ -316,6 +330,10 @@ void __blk_queue_split(struct bio **bio, unsigned int *nr_segs)
split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split,
nr_segs);
break;
+ case REQ_OP_VERIFY:
+ split = blk_bio_verify_split(q, *bio, &q->bio_split,
+ nr_segs);
+ break;
case REQ_OP_WRITE_SAME:
split = blk_bio_write_same_split(q, *bio, &q->bio_split,
nr_segs);
@@ -383,6 +401,7 @@ unsigned int blk_recalc_rq_segments(struct request *rq)
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
case REQ_OP_WRITE_ZEROES:
+ case REQ_OP_VERIFY:
return 0;
case REQ_OP_WRITE_SAME:
return 1;
@@ -48,6 +48,7 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->chunk_sectors = 0;
lim->max_write_same_sectors = 0;
lim->max_write_zeroes_sectors = 0;
+ lim->max_verify_sectors = 0;
lim->max_zone_append_sectors = 0;
lim->max_discard_sectors = 0;
lim->max_hw_discard_sectors = 0;
@@ -84,6 +85,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
lim->max_dev_sectors = UINT_MAX;
lim->max_write_same_sectors = UINT_MAX;
lim->max_write_zeroes_sectors = UINT_MAX;
+ lim->max_verify_sectors = UINT_MAX;
lim->max_zone_append_sectors = UINT_MAX;
}
EXPORT_SYMBOL(blk_set_stacking_limits);
@@ -227,6 +229,19 @@ void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
}
EXPORT_SYMBOL(blk_queue_max_write_zeroes_sectors);
+/**
+ * blk_queue_max_verify_sectors - set max sectors for a single verify
+ *
+ * @q: the request queue for the device
+ * @max_verify_sectors: maximum number of sectors to verify per command
+ **/
+void blk_queue_max_verify_sectors(struct request_queue *q,
+ unsigned int max_verify_sectors)
+{
+ q->limits.max_verify_sectors = max_verify_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_verify_sectors);
+
/**
* blk_queue_max_zone_append_sectors - set max sectors for a single zone append
* @q: the request queue for the device
@@ -514,6 +529,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
b->max_write_same_sectors);
t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors,
b->max_write_zeroes_sectors);
+ t->max_verify_sectors = min(t->max_verify_sectors,
+ b->max_verify_sectors);
t->max_zone_append_sectors = min(t->max_zone_append_sectors,
b->max_zone_append_sectors);
t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
@@ -108,6 +108,12 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
return ret;
}
+static ssize_t queue_verify_max_show(struct request_queue *q, char *page)
+{
+ return sprintf(page, "%llu\n",
+ (unsigned long long)q->limits.max_verify_sectors << 9);
+}
+
static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
{
int max_sectors_kb = queue_max_sectors(q) >> 1;
@@ -584,6 +590,7 @@ QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data");
QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes");
QUEUE_RO_ENTRY(queue_write_zeroes_max, "write_zeroes_max_bytes");
+QUEUE_RO_ENTRY(queue_verify_max, "verify_max_bytes");
QUEUE_RO_ENTRY(queue_zone_append_max, "zone_append_max_bytes");
QUEUE_RO_ENTRY(queue_zoned, "zoned");
@@ -638,6 +645,7 @@ static struct attribute *queue_attrs[] = {
&queue_discard_zeroes_data_entry.attr,
&queue_write_same_max_entry.attr,
&queue_write_zeroes_max_entry.attr,
+ &queue_verify_max_entry.attr,
&queue_zone_append_max_entry.attr,
&queue_nonrot_entry.attr,
&queue_zoned_entry.attr,
@@ -73,6 +73,7 @@ bool blk_req_needs_zone_write_lock(struct request *rq)
switch (req_op(rq)) {
case REQ_OP_WRITE_ZEROES:
+ case REQ_OP_VERIFY:
case REQ_OP_WRITE_SAME:
case REQ_OP_WRITE:
return blk_rq_zone_is_seq(rq);
@@ -259,6 +259,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
case REQ_OP_WRITE_ZEROES:
+ case REQ_OP_VERIFY:
break;
case REQ_OP_WRITE_SAME:
bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
@@ -168,6 +168,39 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
BLKDEV_ZERO_NOUNMAP);
}
+static int blk_ioctl_verify(struct block_device *bdev, fmode_t mode,
+ unsigned long arg)
+{
+ uint64_t range[2];
+ struct address_space *mapping;
+ uint64_t start, end, len;
+
+ if (!(mode & FMODE_WRITE))
+ return -EBADF;
+
+ if (copy_from_user(range, (void __user *)arg, sizeof(range)))
+ return -EFAULT;
+
+ start = range[0];
+ len = range[1];
+ end = start + len - 1;
+
+ if (start & 511)
+ return -EINVAL;
+ if (len & 511)
+ return -EINVAL;
+ if (end >= (uint64_t)i_size_read(bdev->bd_inode))
+ return -EINVAL;
+ if (end < start)
+ return -EINVAL;
+
+ /* Invalidate the page cache, including dirty pages */
+ mapping = bdev->bd_inode->i_mapping;
+ truncate_inode_pages_range(mapping, start, end);
+
+ return blkdev_issue_verify(bdev, start >> 9, len >> 9, GFP_KERNEL);
+}
+
static int put_ushort(unsigned short __user *argp, unsigned short val)
{
return put_user(val, argp);
@@ -460,6 +493,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
BLKDEV_DISCARD_SECURE);
case BLKZEROOUT:
return blk_ioctl_zeroout(bdev, mode, arg);
+ case BLKVERIFY:
+ return blk_ioctl_verify(bdev, mode, arg);
case BLKREPORTZONE:
return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
case BLKRESETZONE:
@@ -63,7 +63,8 @@ static inline bool bio_has_data(struct bio *bio)
bio->bi_iter.bi_size &&
bio_op(bio) != REQ_OP_DISCARD &&
bio_op(bio) != REQ_OP_SECURE_ERASE &&
- bio_op(bio) != REQ_OP_WRITE_ZEROES)
+ bio_op(bio) != REQ_OP_WRITE_ZEROES &&
+ bio_op(bio) != REQ_OP_VERIFY)
return true;
return false;
@@ -73,8 +74,8 @@ static inline bool bio_no_advance_iter(const struct bio *bio)
{
return bio_op(bio) == REQ_OP_DISCARD ||
bio_op(bio) == REQ_OP_SECURE_ERASE ||
- bio_op(bio) == REQ_OP_WRITE_SAME ||
- bio_op(bio) == REQ_OP_WRITE_ZEROES;
+ bio_op(bio) == REQ_OP_WRITE_ZEROES ||
+ bio_op(bio) == REQ_OP_VERIFY;
}
static inline bool bio_mergeable(struct bio *bio)
@@ -198,7 +199,7 @@ static inline unsigned bio_segments(struct bio *bio)
struct bvec_iter iter;
/*
- * We special case discard/write same/write zeroes, because they
+ * We special case discard/write same/write zeroes/verify, because they
* interpret bi_size differently:
*/
@@ -206,6 +207,7 @@ static inline unsigned bio_segments(struct bio *bio)
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
case REQ_OP_WRITE_ZEROES:
+ case REQ_OP_VERIFY:
return 0;
case REQ_OP_WRITE_SAME:
return 1;
@@ -366,6 +366,8 @@ enum req_opf {
REQ_OP_SECURE_ERASE = 5,
/* write the same sector many times */
REQ_OP_WRITE_SAME = 7,
+ /* verify the sectors */
+ REQ_OP_VERIFY = 8,
/* write the zero filled sector many times */
REQ_OP_WRITE_ZEROES = 9,
/* Open a zone */
@@ -334,6 +334,7 @@ struct queue_limits {
unsigned int max_hw_discard_sectors;
unsigned int max_write_same_sectors;
unsigned int max_write_zeroes_sectors;
+ unsigned int max_verify_sectors;
unsigned int max_zone_append_sectors;
unsigned int discard_granularity;
unsigned int discard_alignment;
@@ -621,6 +622,7 @@ struct request_queue {
#define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */
#define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */
#define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */
+#define QUEUE_FLAG_VERIFY 30 /* supports Verify */
#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_SAME_COMP) | \
@@ -667,6 +669,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
#define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
#define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
#define blk_queue_nowait(q) test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
+#define blk_queue_verify(q) test_bit(QUEUE_FLAG_VERIFY, &(q)->queue_flags)
extern void blk_set_pm_only(struct request_queue *q);
extern void blk_clear_pm_only(struct request_queue *q);
@@ -814,6 +817,9 @@ static inline bool rq_mergeable(struct request *rq)
if (req_op(rq) == REQ_OP_WRITE_ZEROES)
return false;
+ if (req_op(rq) == REQ_OP_VERIFY)
+ return false;
+
if (req_op(rq) == REQ_OP_ZONE_APPEND)
return false;
@@ -1072,6 +1078,9 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
if (unlikely(op == REQ_OP_WRITE_ZEROES))
return q->limits.max_write_zeroes_sectors;
+ if (unlikely(op == REQ_OP_VERIFY))
+ return q->limits.max_verify_sectors;
+
return q->limits.max_sectors;
}
@@ -1154,6 +1163,8 @@ extern void blk_queue_max_discard_sectors(struct request_queue *q,
unsigned int max_discard_sectors);
extern void blk_queue_max_write_same_sectors(struct request_queue *q,
unsigned int max_write_same_sectors);
+extern void blk_queue_max_verify_sectors(struct request_queue *q,
+ unsigned int max_verify_sectors);
extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
unsigned int max_write_same_sectors);
extern void blk_queue_logical_block_size(struct request_queue *, unsigned int);
@@ -1348,6 +1359,16 @@ extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
unsigned flags);
extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned flags);
+extern int __blkdev_emulate_verify(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
+ char *buf);
+extern int blkdev_emulate_verify(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask);
+extern int __blkdev_issue_verify(struct block_device *bdev,
+ sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
+ struct bio **biop);
+extern int blkdev_issue_verify(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask);
static inline int sb_issue_discard(struct super_block *sb, sector_t block,
sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
@@ -1553,6 +1574,16 @@ static inline unsigned int bdev_write_same(struct block_device *bdev)
return 0;
}
+static inline unsigned int bdev_verify_sectors(struct block_device *bdev)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+
+ if (q)
+ return q->limits.max_verify_sectors;
+
+ return 0;
+}
+
static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
@@ -184,6 +184,7 @@ struct fsxattr {
#define BLKSECDISCARD _IO(0x12,125)
#define BLKROTATIONAL _IO(0x12,126)
#define BLKZEROOUT _IO(0x12,127)
+#define BLKVERIFY _IO(0x12,128)
/*
* A jump here: 130-131 are reserved for zoned block devices
* (see uapi/linux/blkzoned.h)