@@ -1895,6 +1895,10 @@ struct bio *bio_split(struct bio *bio, int sectors,
BUG_ON(sectors <= 0);
BUG_ON(sectors >= bio_sectors(bio));
+ /* Zone append commands cannot be split */
+ if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND))
+ return NULL;
+
split = bio_clone_fast(bio, gfp, bs);
if (!split)
return NULL;
@@ -135,6 +135,7 @@ static const char *const blk_op_name[] = {
REQ_OP_NAME(ZONE_OPEN),
REQ_OP_NAME(ZONE_CLOSE),
REQ_OP_NAME(ZONE_FINISH),
+ REQ_OP_NAME(ZONE_APPEND),
REQ_OP_NAME(WRITE_SAME),
REQ_OP_NAME(WRITE_ZEROES),
REQ_OP_NAME(SCSI_IN),
@@ -239,6 +240,16 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
bio_set_flag(bio, BIO_QUIET);
bio_advance(bio, nbytes);
+ if (req_op(rq) == REQ_OP_ZONE_APPEND && error == BLK_STS_OK) {
+ /*
+ * Partial completions cannot be supported as the BIO
+ * fragments may end up not being written sequentially.
+ */
+ if (bio->bi_iter.bi_size)
+ bio->bi_status = BLK_STS_IOERR;
+ else
+ bio->bi_iter.bi_sector = rq->__sector;
+ }
/* don't actually finish bio if it's part of flush sequence */
if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
@@ -865,6 +876,39 @@ static inline int blk_partition_remap(struct bio *bio)
return ret;
}
+/*
+ * Check write append to a zoned block device.
+ */
+static inline blk_status_t blk_check_zone_append(struct request_queue *q,
+ struct bio *bio)
+{
+ sector_t pos = bio->bi_iter.bi_sector;
+ int nr_sectors = bio_sectors(bio);
+
+ /* Only applicable to zoned block devices */
+ if (!blk_queue_is_zoned(q))
+ return BLK_STS_NOTSUPP;
+
+ /* The bio sector must point to the start of a sequential zone */
+ if (pos & (blk_queue_zone_sectors(q) - 1) ||
+ !blk_queue_zone_is_seq(q, pos))
+ return BLK_STS_IOERR;
+
+ /*
+ * Not allowed to cross zone boundaries. Otherwise, the BIO will be
+ * split and could result in non-contiguous sectors being written in
+ * different zones.
+ */
+ if (blk_queue_zone_no(q, pos) != blk_queue_zone_no(q, pos + nr_sectors))
+ return BLK_STS_IOERR;
+
+ /* Make sure the BIO is small enough and will not get split */
+ if (nr_sectors > q->limits.max_zone_append_sectors)
+ return BLK_STS_IOERR;
+
+ return BLK_STS_OK;
+}
+
static noinline_for_stack bool
generic_make_request_checks(struct bio *bio)
{
@@ -937,6 +981,11 @@ generic_make_request_checks(struct bio *bio)
if (!q->limits.max_write_same_sectors)
goto not_supported;
break;
+ case REQ_OP_ZONE_APPEND:
+ status = blk_check_zone_append(q, bio);
+ if (status != BLK_STS_OK)
+ goto end_io;
+ break;
case REQ_OP_ZONE_RESET:
case REQ_OP_ZONE_OPEN:
case REQ_OP_ZONE_CLOSE:
@@ -48,6 +48,7 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->chunk_sectors = 0;
lim->max_write_same_sectors = 0;
lim->max_write_zeroes_sectors = 0;
+ lim->max_zone_append_sectors = 0;
lim->max_discard_sectors = 0;
lim->max_hw_discard_sectors = 0;
lim->discard_granularity = 0;
@@ -83,6 +84,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
lim->max_dev_sectors = UINT_MAX;
lim->max_write_same_sectors = UINT_MAX;
lim->max_write_zeroes_sectors = UINT_MAX;
+ lim->max_zone_append_sectors = UINT_MAX;
}
EXPORT_SYMBOL(blk_set_stacking_limits);
@@ -257,6 +259,18 @@ void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
}
EXPORT_SYMBOL(blk_queue_max_write_zeroes_sectors);
+/**
+ * blk_queue_max_zone_append_sectors - set max sectors for a single zone append
+ * @q: the request queue for the device
+ * @max_zone_append_sectors: maximum number of sectors to write per command
+ **/
+void blk_queue_max_zone_append_sectors(struct request_queue *q,
+ unsigned int max_zone_append_sectors)
+{
+ q->limits.max_zone_append_sectors = max_zone_append_sectors;
+}
+EXPORT_SYMBOL_GPL(blk_queue_max_zone_append_sectors);
+
/**
* blk_queue_max_segments - set max hw segments for a request for this queue
* @q: the request queue for the device
@@ -506,6 +520,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
b->max_write_same_sectors);
t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors,
b->max_write_zeroes_sectors);
+ t->max_zone_append_sectors = min(t->max_zone_append_sectors,
+ b->max_zone_append_sectors);
t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
@@ -218,6 +218,13 @@ static ssize_t queue_write_zeroes_max_show(struct request_queue *q, char *page)
(unsigned long long)q->limits.max_write_zeroes_sectors << 9);
}
+static ssize_t queue_zone_append_max_show(struct request_queue *q, char *page)
+{
+ unsigned long long max_sectors = q->limits.max_zone_append_sectors;
+
+ return sprintf(page, "%llu\n", max_sectors << SECTOR_SHIFT);
+}
+
static ssize_t
queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
{
@@ -639,6 +646,11 @@ static struct queue_sysfs_entry queue_write_zeroes_max_entry = {
.show = queue_write_zeroes_max_show,
};
+static struct queue_sysfs_entry queue_zone_append_max_entry = {
+ .attr = {.name = "zone_append_max_bytes", .mode = 0444 },
+ .show = queue_zone_append_max_show,
+};
+
static struct queue_sysfs_entry queue_nonrot_entry = {
.attr = {.name = "rotational", .mode = 0644 },
.show = queue_show_nonrot,
@@ -749,6 +761,7 @@ static struct attribute *queue_attrs[] = {
&queue_discard_zeroes_data_entry.attr,
&queue_write_same_max_entry.attr,
&queue_write_zeroes_max_entry.attr,
+ &queue_zone_append_max_entry.attr,
&queue_nonrot_entry.attr,
&queue_zoned_entry.attr,
&queue_nr_zones_entry.attr,
@@ -74,6 +74,7 @@ static inline bool bio_no_advance_iter(struct bio *bio)
{
return bio_op(bio) == REQ_OP_DISCARD ||
bio_op(bio) == REQ_OP_SECURE_ERASE ||
+ bio_op(bio) == REQ_OP_ZONE_APPEND ||
bio_op(bio) == REQ_OP_WRITE_SAME ||
bio_op(bio) == REQ_OP_WRITE_ZEROES;
}
@@ -296,6 +296,8 @@ enum req_opf {
REQ_OP_ZONE_CLOSE = 11,
/* Transition a zone to full */
REQ_OP_ZONE_FINISH = 12,
+ /* write data at the current zone write pointer */
+ REQ_OP_ZONE_APPEND = 13,
/* SCSI passthrough using struct scsi_request */
REQ_OP_SCSI_IN = 32,
@@ -336,6 +336,7 @@ struct queue_limits {
unsigned int max_hw_discard_sectors;
unsigned int max_write_same_sectors;
unsigned int max_write_zeroes_sectors;
+ unsigned int max_zone_append_sectors;
unsigned int discard_granularity;
unsigned int discard_alignment;
@@ -757,6 +758,9 @@ static inline bool rq_mergeable(struct request *rq)
if (req_op(rq) == REQ_OP_WRITE_ZEROES)
return false;
+ if (req_op(rq) == REQ_OP_ZONE_APPEND)
+ return false;
+
if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
return false;
if (rq->rq_flags & RQF_NOMERGE_FLAGS)
@@ -1088,6 +1092,8 @@ extern void blk_queue_max_write_same_sectors(struct request_queue *q,
extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
unsigned int max_write_same_sectors);
extern void blk_queue_logical_block_size(struct request_queue *, unsigned int);
+extern void blk_queue_max_zone_append_sectors(struct request_queue *q,
+ unsigned int max_zone_append_sectors);
extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
extern void blk_queue_alignment_offset(struct request_queue *q,
unsigned int alignment);
@@ -1301,6 +1307,11 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q)
return q->limits.max_segment_size;
}
+static inline unsigned int queue_max_zone_append_sectors(const struct request_queue *q)
+{
+ return q->limits.max_zone_append_sectors;
+}
+
static inline unsigned queue_logical_block_size(const struct request_queue *q)
{
int retval = 512;