@@ -906,8 +906,7 @@ bool blk_update_request(struct request *req, blk_status_t error,
if (bio_bytes == bio->bi_iter.bi_size) {
req->bio = bio->bi_next;
- } else if (req_op(req) == REQ_OP_ZONE_APPEND &&
- error == BLK_STS_OK) {
+ } else if (bio_is_zone_append(bio) && error == BLK_STS_OK) {
/*
* Partial zone append completions cannot be supported
* as the BIO fragments may end up not being written
@@ -689,7 +689,8 @@ static void disk_zone_wplug_abort_unaligned(struct gendisk *disk,
while ((bio = bio_list_pop(&zwplug->bio_list))) {
if (wp_offset >= zone_capacity ||
- bio_offset_from_zone_start(bio) != wp_offset) {
+ (bio_op(bio) != REQ_OP_ZONE_APPEND &&
+ bio_offset_from_zone_start(bio) != wp_offset)) {
blk_zone_wplug_bio_io_error(bio);
disk_put_zone_wplug(zwplug);
continue;
@@ -951,7 +952,8 @@ static inline void disk_zone_wplug_set_error(struct gendisk *disk,
/*
* Check and prepare a BIO for submission by incrementing the write pointer
- * offset of its zone write plug.
+ * offset of its zone write plug and changing zone append operations into
+ * regular write when zone append emulation is needed.
*/
static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
struct bio *bio)
@@ -966,13 +968,30 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
if (zwplug->wp_offset >= disk->zone_capacity)
goto err;
- /*
- * Check for non-sequential writes early because we avoid a
- * whole lot of error handling trouble if we don't send it off
- * to the driver.
- */
- if (bio_offset_from_zone_start(bio) != zwplug->wp_offset)
- goto err;
+ if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+ /*
+ * Use a regular write starting at the current write pointer.
+ * Similarly to native zone append operations, do not allow
+ * merging.
+ */
+ bio->bi_opf &= ~REQ_OP_MASK;
+ bio->bi_opf |= REQ_OP_WRITE | REQ_NOMERGE;
+ bio->bi_iter.bi_sector += zwplug->wp_offset;
+
+ /*
+ * Remember that this BIO is in fact a zone append operation
+ * so that we can restore its operation code on completion.
+ */
+ bio_set_flag(bio, BIO_EMULATES_ZONE_APPEND);
+ } else {
+ /*
+ * Check for non-sequential writes early because we avoid a
+ * whole lot of error handling trouble if we don't send it off
+ * to the driver.
+ */
+ if (bio_offset_from_zone_start(bio) != zwplug->wp_offset)
+ goto err;
+ }
/* Advance the zone write pointer offset. */
zwplug->wp_offset += bio_sectors(bio);
@@ -1008,8 +1027,14 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
}
/* Conventional zones do not need write plugging. */
- if (disk_zone_is_conv(disk, sector))
+ if (disk_zone_is_conv(disk, sector)) {
+ /* Zone append to conventional zones is not allowed. */
+ if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+ bio_io_error(bio);
+ return true;
+ }
return false;
+ }
if (bio->bi_opf & REQ_NOWAIT)
gfp_mask = GFP_NOWAIT;
@@ -1057,10 +1082,10 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
* @bio: The BIO being submitted
* @nr_segs: The number of physical segments of @bio
*
- * Handle write and write zeroes operations using zone write plugging.
- * Return true whenever @bio execution needs to be delayed through the zone
- * write plug. Otherwise, return false to let the submission path process
- * @bio normally.
+ * Handle write, write zeroes and zone append operations requiring emulation
+ * using zone write plugging. Return true whenever @bio execution needs to be
+ * delayed through the zone write plug. Otherwise, return false to let the
+ * submission path process @bio normally.
*/
bool blk_zone_write_plug_bio(struct bio *bio, unsigned int nr_segs)
{
@@ -1095,6 +1120,9 @@ bool blk_zone_write_plug_bio(struct bio *bio, unsigned int nr_segs)
* machinery operates at the request level, below the plug, and
* completion of the flush sequence will go through the regular BIO
* completion, which will handle zone write plugging.
+ * Zone append operations for devices that requested emulation must
+ * also be plugged so that these BIOs can be changed into regular
+ * write BIOs.
* Zone reset, reset all and finish commands need special treatment
* to correctly track the write pointer offset of zones. These commands
* are not plugged as we do not need serialization with write
@@ -1102,6 +1130,10 @@ bool blk_zone_write_plug_bio(struct bio *bio, unsigned int nr_segs)
* and finish commands when write operations are in flight.
*/
switch (bio_op(bio)) {
+ case REQ_OP_ZONE_APPEND:
+ if (!bdev_emulates_zone_append(bdev))
+ return false;
+ fallthrough;
case REQ_OP_WRITE:
case REQ_OP_WRITE_ZEROES:
return blk_zone_wplug_handle_write(bio, nr_segs);
@@ -1170,6 +1202,15 @@ void blk_zone_write_plug_bio_endio(struct bio *bio)
/* Make sure we do not see this BIO again by clearing the plug flag. */
bio_clear_flag(bio, BIO_ZONE_WRITE_PLUGGING);
+ /*
+ * If this is a regular write emulating a zone append operation,
+ * restore the original operation code.
+ */
+ if (bio_flagged(bio, BIO_EMULATES_ZONE_APPEND)) {
+ bio->bi_opf &= ~REQ_OP_MASK;
+ bio->bi_opf |= REQ_OP_ZONE_APPEND;
+ }
+
/*
* If the BIO failed, mark the plug as having an error to trigger
* recovery.
@@ -421,6 +421,11 @@ static inline bool bio_zone_write_plugging(struct bio *bio)
{
return bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING);
}
+static inline bool bio_is_zone_append(struct bio *bio)
+{
+ return bio_op(bio) == REQ_OP_ZONE_APPEND ||
+ bio_flagged(bio, BIO_EMULATES_ZONE_APPEND);
+}
void blk_zone_write_plug_bio_merged(struct bio *bio);
void blk_zone_write_plug_attempt_merge(struct request *rq);
static inline void blk_zone_update_request_bio(struct request *rq,
@@ -430,8 +435,9 @@ static inline void blk_zone_update_request_bio(struct request *rq,
* For zone append requests, the request sector indicates the location
* at which the BIO data was written. Return this value to the BIO
* issuer through the BIO iter sector.
- * For plugged zone writes, we need the original BIO sector so
- * that blk_zone_write_plug_bio_endio() can lookup the zone write plug.
+ * For plugged zone writes, which include emulated zone append, we need
+ * the original BIO sector so that blk_zone_write_plug_bio_endio() can
+ * lookup the zone write plug.
*/
if (req_op(rq) == REQ_OP_ZONE_APPEND || bio_zone_write_plugging(bio))
bio->bi_iter.bi_sector = rq->__sector;
@@ -464,6 +470,10 @@ static inline bool bio_zone_write_plugging(struct bio *bio)
{
return false;
}
+static inline bool bio_is_zone_append(struct bio *bio)
+{
+ return false;
+}
static inline void blk_zone_write_plug_bio_merged(struct bio *bio)
{
}
@@ -311,6 +311,7 @@ enum {
BIO_REMAPPED,
BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */
BIO_ZONE_WRITE_PLUGGING, /* bio handled through zone write plugging */
+ BIO_EMULATES_ZONE_APPEND, /* bio emulates a zone append operation */
BIO_FLAG_LAST
};