@@ -670,6 +670,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
switch (bio_op(bio)) {
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
+ case REQ_OP_WRITE_ZEROES:
break;
case REQ_OP_WRITE_SAME:
bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
@@ -1945,6 +1945,10 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
if (!bdev_is_zoned(bio->bi_bdev))
goto not_supported;
break;
+ case REQ_OP_WRITE_ZEROES:
+ if (!bdev_write_zeroes(bio->bi_bdev))
+ goto not_supported;
+ break;
default:
break;
}
@@ -227,6 +227,55 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
EXPORT_SYMBOL(blkdev_issue_write_same);
/**
+ * __blkdev_issue_write_zeroes - generate number of bios with WRITE ZEROES
+ * @bdev: blockdev to issue
+ * @sector: start sector
+ * @nr_sects: number of sectors to write
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ * @biop: pointer to anchor bio
+ *
+ * Description:
+ * Generate and issue number of bios(REQ_OP_WRITE_ZEROES) with zerofiled pages.
+ */
+static int __blkdev_issue_write_zeroes(struct block_device *bdev,
+ sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
+ struct bio **biop)
+{
+ struct bio *bio = *biop;
+ unsigned int max_write_zeroes_sectors;
+ struct request_queue *q = bdev_get_queue(bdev);
+
+ if (!q)
+ return -ENXIO;
+
+ /* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */
+ max_write_zeroes_sectors = bdev_write_zeroes(bdev);
+
+ if (max_write_zeroes_sectors == 0)
+ return -EOPNOTSUPP;
+
+ while (nr_sects) {
+ bio = next_bio(bio, 0, gfp_mask);
+ bio->bi_iter.bi_sector = sector;
+ bio->bi_bdev = bdev;
+ bio_set_op_attrs(bio, REQ_OP_WRITE_ZEROES, 0);
+
+ if (nr_sects > max_write_zeroes_sectors) {
+ bio->bi_iter.bi_size = max_write_zeroes_sectors << 9;
+ nr_sects -= max_write_zeroes_sectors;
+ sector += max_write_zeroes_sectors;
+ } else {
+ bio->bi_iter.bi_size = nr_sects << 9;
+ nr_sects = 0;
+ }
+ cond_resched();
+ }
+
+ *biop = bio;
+ return 0;
+}
+
+/**
* __blkdev_issue_zeroout - generate number of zero filed write bios
* @bdev: blockdev to issue
* @sector: start sector
@@ -259,6 +308,11 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
goto out;
}
+ ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
+ biop);
+ if (ret == 0 || (ret && ret != -EOPNOTSUPP))
+ goto out;
+
ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
ZERO_PAGE(0), biop);
if (ret == 0 || (ret && ret != -EOPNOTSUPP))
@@ -304,8 +358,8 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
* the discard request fail, if the discard flag is not set, or if
* discard_zeroes_data is not supported, this function will resort to
* zeroing the blocks manually, thus provisioning (allocating,
- * anchoring) them. If the block device supports the WRITE SAME command
- * blkdev_issue_zeroout() will use it to optimize the process of
+ * anchoring) them. If the block device supports WRITE ZEROES or WRITE SAME
+ * command(s), blkdev_issue_zeroout() will use it to optimize the process of
* clearing the block range. Otherwise the zeroing will be performed
* using regular WRITE calls.
*/
@@ -199,6 +199,10 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
case REQ_OP_SECURE_ERASE:
split = blk_bio_discard_split(q, *bio, bs, &nsegs);
break;
+ case REQ_OP_WRITE_ZEROES:
+ split = NULL;
+ nsegs = (*bio)->bi_phys_segments;
+ break;
case REQ_OP_WRITE_SAME:
split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
break;
@@ -241,11 +245,15 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
* This should probably be returning 0, but blk_add_request_payload()
* (Christoph!!!!)
*/
- if (bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE)
- return 1;
-
- if (bio_op(bio) == REQ_OP_WRITE_SAME)
+ switch (bio_op(bio)) {
+ case REQ_OP_DISCARD:
+ case REQ_OP_SECURE_ERASE:
+ case REQ_OP_WRITE_SAME:
+ case REQ_OP_WRITE_ZEROES:
return 1;
+ default:
+ break;
+ }
fbio = bio;
cluster = blk_queue_cluster(q);
@@ -416,6 +424,7 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
switch (bio_op(bio)) {
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
+ case REQ_OP_WRITE_ZEROES:
/*
* This is a hack - drivers should be neither modifying the
* biovec, nor relying on bi_vcnt - but because of
@@ -96,6 +96,7 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->max_dev_sectors = 0;
lim->chunk_sectors = 0;
lim->max_write_same_sectors = 0;
+ lim->max_write_zeroes_sectors = 0;
lim->max_discard_sectors = 0;
lim->max_hw_discard_sectors = 0;
lim->discard_granularity = 0;
@@ -132,6 +133,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
lim->max_sectors = UINT_MAX;
lim->max_dev_sectors = UINT_MAX;
lim->max_write_same_sectors = UINT_MAX;
+ lim->max_write_zeroes_sectors = UINT_MAX;
}
EXPORT_SYMBOL(blk_set_stacking_limits);
@@ -300,6 +302,19 @@ void blk_queue_max_write_same_sectors(struct request_queue *q,
EXPORT_SYMBOL(blk_queue_max_write_same_sectors);
/**
+ * blk_queue_max_write_zeroes_sectors - set max sectors for a single
+ * write zeroes
+ * @q: the request queue for the device
+ * @max_write_zeroes_sectors: maximum number of sectors to write per command
+ **/
+void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
+ unsigned int max_write_zeroes_sectors)
+{
+ q->limits.max_write_zeroes_sectors = max_write_zeroes_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_write_zeroes_sectors);
+
+/**
* blk_queue_max_segments - set max hw segments for a request for this queue
* @q: the request queue for the device
* @max_segments: max number of segments
@@ -575,9 +575,10 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
const int op = bio_op(bio);
/*
- * If not a WRITE (or a discard), do nothing
+ * If not a WRITE (or a discard or write zeroes), do nothing
*/
- if (!(op == REQ_OP_WRITE || op == REQ_OP_DISCARD))
+ if (!(op == REQ_OP_WRITE || op == REQ_OP_DISCARD ||
+ op == REQ_OP_WRITE_ZEROES))
return false;
/*
@@ -76,7 +76,8 @@ static inline bool bio_has_data(struct bio *bio)
if (bio &&
bio->bi_iter.bi_size &&
bio_op(bio) != REQ_OP_DISCARD &&
- bio_op(bio) != REQ_OP_SECURE_ERASE)
+ bio_op(bio) != REQ_OP_SECURE_ERASE &&
+ bio_op(bio) != REQ_OP_WRITE_ZEROES)
return true;
return false;
@@ -86,7 +87,8 @@ static inline bool bio_no_advance_iter(struct bio *bio)
{
return bio_op(bio) == REQ_OP_DISCARD ||
bio_op(bio) == REQ_OP_SECURE_ERASE ||
- bio_op(bio) == REQ_OP_WRITE_SAME;
+ bio_op(bio) == REQ_OP_WRITE_SAME ||
+ bio_op(bio) == REQ_OP_WRITE_ZEROES;
}
static inline bool bio_mergeable(struct bio *bio)
@@ -188,18 +190,19 @@ static inline unsigned bio_segments(struct bio *bio)
struct bvec_iter iter;
/*
- * We special case discard/write same, because they interpret bi_size
- * differently:
+ * We special case discard/write same/write zeroes, because they
+ * interpret bi_size differently:
*/
- if (bio_op(bio) == REQ_OP_DISCARD)
- return 1;
-
- if (bio_op(bio) == REQ_OP_SECURE_ERASE)
- return 1;
-
- if (bio_op(bio) == REQ_OP_WRITE_SAME)
+ switch (bio_op(bio)) {
+ case REQ_OP_DISCARD:
+ case REQ_OP_SECURE_ERASE:
+ case REQ_OP_WRITE_SAME:
+ case REQ_OP_WRITE_ZEROES:
return 1;
+ default:
+ break;
+ }
bio_for_each_segment(bv, bio, iter)
segs++;
@@ -159,6 +159,8 @@ enum req_opf {
REQ_OP_ZONE_RESET = 6,
/* write the same sector many times */
REQ_OP_WRITE_SAME = 7,
+ /* write the zero filled sector many times */
+ REQ_OP_WRITE_ZEROES = 8,
REQ_OP_LAST,
};
@@ -323,6 +323,7 @@ struct queue_limits {
unsigned int max_discard_sectors;
unsigned int max_hw_discard_sectors;
unsigned int max_write_same_sectors;
+ unsigned int max_write_zeroes_sectors;
unsigned int discard_granularity;
unsigned int discard_alignment;
@@ -773,6 +774,9 @@ static inline bool rq_mergeable(struct request *rq)
if (req_op(rq) == REQ_OP_FLUSH)
return false;
+ if (req_op(rq) == REQ_OP_WRITE_ZEROES)
+ return false;
+
if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
return false;
if (rq->rq_flags & RQF_NOMERGE_FLAGS)
@@ -1003,6 +1007,9 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
if (unlikely(op == REQ_OP_WRITE_SAME))
return q->limits.max_write_same_sectors;
+ if (unlikely(op == REQ_OP_WRITE_ZEROES))
+ return q->limits.max_write_zeroes_sectors;
+
return q->limits.max_sectors;
}
@@ -1106,6 +1113,8 @@ extern void blk_queue_max_discard_sectors(struct request_queue *q,
unsigned int max_discard_sectors);
extern void blk_queue_max_write_same_sectors(struct request_queue *q,
unsigned int max_write_same_sectors);
+extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
+ unsigned int max_write_same_sectors);
extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
extern void blk_queue_alignment_offset(struct request_queue *q,
@@ -1474,6 +1483,16 @@ static inline unsigned int bdev_write_same(struct block_device *bdev)
return 0;
}
+static inline unsigned int bdev_write_zeroes(struct block_device *bdev)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+
+ if (q)
+ return q->limits.max_write_zeroes_sectors;
+
+ return 0;
+}
+
static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);