@@ -658,6 +658,53 @@ static void disk_zone_wplug_abort_unaligned(struct gendisk *disk,
bio_list_merge(&zwplug->bio_list, &bl);
}
+static inline void disk_zone_wplug_set_error(struct gendisk *disk,
+ struct blk_zone_wplug *zwplug)
+{
+ if (!(zwplug->flags & BLK_ZONE_WPLUG_ERROR)) {
+ unsigned long flags;
+
+ /*
+ * At this point, we already have a reference on the zone write
+ * plug. However, since we are going to add the plug to the disk
+ * zone write plugs work list, increase its reference count.
+ * This reference will be dropped in disk_zone_wplugs_work()
+ * once the error state is handled, or
+ * in disk_zone_wplug_clear_error() if the zone is reset or
+ * finished.
+ */
+ zwplug->flags |= BLK_ZONE_WPLUG_ERROR;
+ atomic_inc(&zwplug->ref);
+
+ spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
+ list_add_tail(&zwplug->link, &disk->zone_wplugs_err_list);
+ spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
+ }
+}
+
+static inline void disk_zone_wplug_clear_error(struct gendisk *disk,
+ struct blk_zone_wplug *zwplug)
+{
+ if (zwplug->flags & BLK_ZONE_WPLUG_ERROR) {
+ unsigned long flags;
+
+ /*
+ * We are racing with the error handling work which drops
+ * the reference on the zone write plug after handling the error
+ * state. So remove the plug from the error list and drop its
+ * reference count only if the error handling has not yet
+ * started, that is, if the zone write plug is still listed.
+ */
+ spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
+ if (!list_empty(&zwplug->link)) {
+ list_del_init(&zwplug->link);
+ zwplug->flags &= ~BLK_ZONE_WPLUG_ERROR;
+ disk_put_zone_wplug(zwplug);
+ }
+ spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
+ }
+}
+
/*
* Set a zone write plug write pointer offset to either 0 (zone reset case)
* or to the zone size (zone finish case). This aborts all plugged BIOs, which
@@ -691,12 +738,7 @@ static void disk_zone_wplug_set_wp_offset(struct gendisk *disk,
* in a good state. So clear the error flag and decrement the
* error count if we were in error state.
*/
- if (zwplug->flags & BLK_ZONE_WPLUG_ERROR) {
- zwplug->flags &= ~BLK_ZONE_WPLUG_ERROR;
- spin_lock(&disk->zone_wplugs_lock);
- list_del_init(&zwplug->link);
- spin_unlock(&disk->zone_wplugs_lock);
- }
+ disk_zone_wplug_clear_error(disk, zwplug);
/*
* The zone write plug now has no BIO plugged: remove it from the
@@ -885,26 +927,6 @@ void blk_zone_write_plug_attempt_merge(struct request *req)
spin_unlock_irqrestore(&zwplug->lock, flags);
}
-static inline void disk_zone_wplug_set_error(struct gendisk *disk,
- struct blk_zone_wplug *zwplug)
-{
- if (!(zwplug->flags & BLK_ZONE_WPLUG_ERROR)) {
- unsigned long flags;
-
- /*
- * Increase the plug reference count. The reference will be
- * dropped in disk_zone_wplugs_work() once the error state
- * is handled.
- */
- zwplug->flags |= BLK_ZONE_WPLUG_ERROR;
- atomic_inc(&zwplug->ref);
-
- spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
- list_add_tail(&zwplug->link, &disk->zone_wplugs_err_list);
- spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
- }
-}
-
/*
* Check and prepare a BIO for submission by incrementing the write pointer
* offset of its zone write plug and changing zone append operations into
When zone is reset or finished, disk_zone_wplug_set_wp_offset() is called to update the zone write plug write pointer offset and to clear the zone error state (BLK_ZONE_WPLUG_ERROR flag) if it is set. However, this processing is missing dropping the reference to the zone write plug that was taken in disk_zone_wplug_set_error() when the error flag was first set. Furthermore, the error state handling must release the zone write plug lock to first execute a report zones command. When the report zone races with a reset or finish operation that clears the error, we can end up decrementing the zone write plug reference count twice: once in disk_zone_wplug_set_wp_offset() for the reset/finish operation and one more time in disk_zone_wplugs_work() once disk_zone_wplug_handle_error() completes. Fix this by introducing disk_zone_wplug_clear_error() as the symmetric function of disk_zone_wplug_set_error(). disk_zone_wplug_clear_error() decrements the zone write plug reference count obtained in disk_zone_wplug_set_error() only if the error handling has not started yet, that is, only if disk_zone_wplugs_work() has not yet taken the zone write plug off the error list. This ensure that either disk_zone_wplug_clear_error() or disk_zone_wplugs_work() drop the zone write plug reference count. Fixes: dd291d77cc90 ("block: Introduce zone write plugging") Signed-off-by: Damien Le Moal <dlemoal@kernel.org> --- block/blk-zoned.c | 74 ++++++++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 26 deletions(-)