diff mbox series

[06/13] block: Unhash a zone write plug only if needed

Message ID 20240430125131.668482-7-dlemoal@kernel.org (mailing list archive)
State Superseded, archived
Delegated to: Mike Snitzer
Headers show
Series Zone write plugging fixes and cleanup | expand

Commit Message

Damien Le Moal April 30, 2024, 12:51 p.m. UTC
Fix disk_remove_zone_wplug() to ensure that a zone write plug already
removed from a disk hash table of zone write plugs is not removed
again. Do this by checking the BLK_ZONE_WPLUG_UNHASHED flag of the plug
and calling hlist_del_init_rcu() only if the flag is not set.

Furthermore, since BIO completions can happen at any time, that is,
decrementing of the zone write plug reference count can happen at any
time, make sure to use disk_put_zone_wplug() instead of atomic_dec() to
ensure that the zone write plug is freed when its last reference is
dropped. In order to do this, disk_remove_zone_wplug() is moved after
the definition of disk_put_zone_wplug(). disk_should_remove_zone_wplug()
is moved as well to keep it together with disk_remove_zone_wplug().

To be consistent with this change, add a check in disk_put_zone_wplug()
to ensure that a zone write plug being freed was already removed from
the disk hash table.

Fixes: dd291d77cc90 ("block: Introduce zone write plugging")
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
---
 block/blk-zoned.c | 54 +++++++++++++++++++++++++++--------------------
 1 file changed, 31 insertions(+), 23 deletions(-)

Comments

Christoph Hellwig April 30, 2024, 3:31 p.m. UTC | #1
Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
diff mbox series

Patch

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 03555ea64774..82e540dad900 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -476,29 +476,6 @@  static bool disk_insert_zone_wplug(struct gendisk *disk,
 	return true;
 }
 
-static void disk_remove_zone_wplug(struct gendisk *disk,
-				   struct blk_zone_wplug *zwplug)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
-	zwplug->flags |= BLK_ZONE_WPLUG_UNHASHED;
-	atomic_dec(&zwplug->ref);
-	hlist_del_init_rcu(&zwplug->node);
-	spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
-}
-
-static inline bool disk_should_remove_zone_wplug(struct gendisk *disk,
-						 struct blk_zone_wplug *zwplug)
-{
-	/* If the zone is still busy, the plug cannot be removed. */
-	if (zwplug->flags & BLK_ZONE_WPLUG_BUSY)
-		return false;
-
-	/* We can remove zone write plugs for zones that are empty or full. */
-	return !zwplug->wp_offset || zwplug->wp_offset >= disk->zone_capacity;
-}
-
 static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
 						  sector_t sector)
 {
@@ -534,11 +511,42 @@  static inline void disk_put_zone_wplug(struct blk_zone_wplug *zwplug)
 	if (atomic_dec_and_test(&zwplug->ref)) {
 		WARN_ON_ONCE(!bio_list_empty(&zwplug->bio_list));
 		WARN_ON_ONCE(!list_empty(&zwplug->link));
+		WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_UNHASHED));
 
 		call_rcu(&zwplug->rcu_head, disk_free_zone_wplug_rcu);
 	}
 }
 
+static inline bool disk_should_remove_zone_wplug(struct gendisk *disk,
+						 struct blk_zone_wplug *zwplug)
+{
+	/* If the zone is still busy, the plug cannot be removed. */
+	if (zwplug->flags & BLK_ZONE_WPLUG_BUSY)
+		return false;
+
+	/* We can remove zone write plugs for zones that are empty or full. */
+	return !zwplug->wp_offset || zwplug->wp_offset >= disk->zone_capacity;
+}
+
+static void disk_remove_zone_wplug(struct gendisk *disk,
+				   struct blk_zone_wplug *zwplug)
+{
+	if (!(zwplug->flags & BLK_ZONE_WPLUG_UNHASHED)) {
+		unsigned long flags;
+
+		/*
+		 * Mark the zone write plug as unhashed and drop the extra
+		 * reference we took when the plug was inserted in the hash
+		 * table.
+		 */
+		zwplug->flags |= BLK_ZONE_WPLUG_UNHASHED;
+		spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
+		hlist_del_init_rcu(&zwplug->node);
+		spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
+		disk_put_zone_wplug(zwplug);
+	}
+}
+
 static void blk_zone_wplug_bio_work(struct work_struct *work);
 
 /*