diff mbox series

[RFC,v2,05/10] bdev: allow to switch between bdev aops

Message ID 20230915213254.2724586-6-mcgrof@kernel.org (mailing list archive)
State Deferred, archived
Headers show
Series bdev: LBS devices support to coexist with buffer-heads | expand

Commit Message

Luis Chamberlain Sept. 15, 2023, 9:32 p.m. UTC
Now that we have annotations for filesystems which require buffer-heads we
can use that flag to verify if we can use the filesystem on the target
block devices which require higher order folios. A filesystems which requires
buffer-heads cannot be used on block devices which have a logical block size
greater than PAGE_SIZE. We also want to allow to use buffer-head filesystems
on block devices and at a later time then unmount and switch to a filesystem
which supports bs > PAGE_SIZE, even if the logical block size of the block
device is PAGE_SIZE, and this requires iomap. Provide helpers to do all these
checks and resets the aops to iomap when needed.

Leaving iomap in place after an umount would not make such block devices usable
for buffer-head filesystems so we must reset the aops to buffer-heads also
on unmount.

Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 block/bdev.c           | 55 ++++++++++++++++++++++++++++++++++++++++++
 fs/super.c             |  3 ++-
 include/linux/blkdev.h |  7 ++++++
 3 files changed, 64 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/block/bdev.c b/block/bdev.c
index 0d685270cd34..bf3cfc02aaf9 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -150,6 +150,59 @@  static int bdev_bsize_limit(struct block_device *bdev)
 	return PAGE_SIZE;
 }
 
+#ifdef CONFIG_BUFFER_HEAD
+static void bdev_aops_set(struct block_device *bdev,
+			  const struct address_space_operations *aops)
+{
+	kill_bdev(bdev);
+	bdev->bd_inode->i_data.a_ops = aops;
+}
+
+static void bdev_aops_sync(struct super_block *sb, struct block_device *bdev,
+			   const struct address_space_operations *aops)
+{
+	sync_blockdev(bdev);
+	bdev_aops_set(bdev, aops);
+	kill_bdev(bdev);
+	bdev->bd_inode->i_data.a_ops = aops;
+}
+
+void bdev_aops_reset(struct block_device *bdev)
+{
+	bdev_aops_set(bdev, &def_blk_aops);
+}
+
+static int sb_bdev_aops_set(struct super_block *sb)
+{
+	struct block_device *bdev = sb->s_bdev;
+
+	if (mapping_min_folio_order(bdev->bd_inode->i_mapping) != 0 &&
+	    sb->s_type->fs_flags & FS_BUFFER_HEADS) {
+			pr_warn_ratelimited(
+"block device logical block size > PAGE_SIZE, buffer-head filesystem cannot be used.\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * We can switch back and forth, but we need to use buffer-heads
+	 * first, otherwise a filesystem created which only uses iomap
+	 * will have it sticky and we can't detect buffer-head filesystems
+	 * on mount.
+	 */
+	bdev_aops_sync(sb, bdev, &def_blk_aops);
+	if (sb->s_type->fs_flags & FS_BUFFER_HEADS)
+		return 0;
+
+	bdev_aops_sync(sb, bdev, &def_blk_aops_iomap);
+	return 0;
+}
+#else
+static int sb_bdev_aops_set(struct super_block *sb)
+{
+	return 0;
+}
+#endif
+
 int set_blocksize(struct block_device *bdev, int size)
 {
 	/* Size must be a power of two, and between 512 and supported order */
@@ -173,6 +226,8 @@  EXPORT_SYMBOL(set_blocksize);
 
 int sb_set_blocksize(struct super_block *sb, int size)
 {
+	if (sb_bdev_aops_set(sb))
+		return 0;
 	if (set_blocksize(sb->s_bdev, size))
 		return 0;
 	/* If we get here, we know size is power of two
diff --git a/fs/super.c b/fs/super.c
index 816a22a5cad1..eb269c9489cb 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1649,12 +1649,13 @@  void kill_block_super(struct super_block *sb)
 	generic_shutdown_super(sb);
 	if (bdev) {
 		sync_blockdev(bdev);
+		bdev_aops_reset(bdev);
 		blkdev_put(bdev, sb);
 	}
 }
 
 EXPORT_SYMBOL(kill_block_super);
-#endif
+#endif /* CONFIG_BLOCK */
 
 struct dentry *mount_nodev(struct file_system_type *fs_type,
 	int flags, void *data,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index eef450f25982..738a879a0786 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1503,6 +1503,13 @@  void sync_bdevs(bool wait);
 void bdev_statx_dioalign(struct inode *inode, struct kstat *stat);
 void printk_all_partitions(void);
 int __init early_lookup_bdev(const char *pathname, dev_t *dev);
+#ifdef CONFIG_BUFFER_HEAD
+void bdev_aops_reset(struct block_device *bdev);
+#else
+static inline void bdev_aops_reset(struct block_device *bdev)
+{
+}
+#endif
 #else
 static inline void invalidate_bdev(struct block_device *bdev)
 {