diff mbox series

[v3] btrfs: introduce offload_csum_mode to tweak checksum offloading behavior

Message ID 8dc4a312da70bb93f042f32a75efb7ec848cc08b.1707122589.git.naohiro.aota@wdc.com (mailing list archive)
State New, archived
Headers show
Series [v3] btrfs: introduce offload_csum_mode to tweak checksum offloading behavior | expand

Commit Message

Naohiro Aota Feb. 5, 2024, 1:01 p.m. UTC
We disable offloading checksum to workqueues and do it synchronously when
the checksum algorithm is fast. However, as reported in the link below,
RAID0 with multiple devices may suffer from the sync checksum, because
"fast checksum" is still not fast enough to catch up RAID0 writing.

To measure the effectiveness of sync checksum and checksum offloading for
developers, it would be better to have a switch for the checksum offloading
under CONFIG_BTRFS_DEBUG hood.

This commit introduces fs_devices->offload_csum_mode for
CONFIG_BTRFS_DEBUG, so that a btrfs developer can change the behavior by
writing to /sys/fs/btrfs/<uuid>/offload_csum. The default is "auto" which
is the same as the previous behavior. Or, you can set "on" or "off" (or "y"
or "n" whatever kstrtobool() accepts) to always/never offload checksum.

More benchmark should be collected with this knob to implement a proper
criteria to enable/disable checksum offloading.

Link: https://lore.kernel.org/linux-btrfs/20230731152223.4EFB.409509F4@e16-tech.com/
Link: https://lore.kernel.org/linux-btrfs/p3vo3g7pqn664mhmdhlotu5dzcna6vjtcoc2hb2lsgo2fwct7k@xzaxclba5tae/
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 fs/btrfs/bio.c     | 14 +++++++++++++-
 fs/btrfs/sysfs.c   | 44 ++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/volumes.h | 24 ++++++++++++++++++++++++
 3 files changed, 81 insertions(+), 1 deletion(-)

Comments

David Sterba Feb. 5, 2024, 6:44 p.m. UTC | #1
On Mon, Feb 05, 2024 at 10:01:16PM +0900, Naohiro Aota wrote:
> We disable offloading checksum to workqueues and do it synchronously when
> the checksum algorithm is fast. However, as reported in the link below,
> RAID0 with multiple devices may suffer from the sync checksum, because
> "fast checksum" is still not fast enough to catch up RAID0 writing.
> 
> To measure the effectiveness of sync checksum and checksum offloading for
> developers, it would be better to have a switch for the checksum offloading
> under CONFIG_BTRFS_DEBUG hood.
> 
> This commit introduces fs_devices->offload_csum_mode for
> CONFIG_BTRFS_DEBUG, so that a btrfs developer can change the behavior by
> writing to /sys/fs/btrfs/<uuid>/offload_csum. The default is "auto" which
> is the same as the previous behavior. Or, you can set "on" or "off" (or "y"
> or "n" whatever kstrtobool() accepts) to always/never offload checksum.
> 
> More benchmark should be collected with this knob to implement a proper
> criteria to enable/disable checksum offloading.
> 
> Link: https://lore.kernel.org/linux-btrfs/20230731152223.4EFB.409509F4@e16-tech.com/
> Link: https://lore.kernel.org/linux-btrfs/p3vo3g7pqn664mhmdhlotu5dzcna6vjtcoc2hb2lsgo2fwct7k@xzaxclba5tae/
> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>

Reviewed-by: David Sterba <dsterba@suse.com>

Thanks. Now to find a good heuristic for the auto mode.
Johannes Thumshirn Feb. 6, 2024, 8:10 a.m. UTC | #2
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
David Sterba Feb. 20, 2024, 12:22 p.m. UTC | #3
On Mon, Feb 05, 2024 at 10:01:16PM +0900, Naohiro Aota wrote:
> We disable offloading checksum to workqueues and do it synchronously when
> the checksum algorithm is fast. However, as reported in the link below,
> RAID0 with multiple devices may suffer from the sync checksum, because
> "fast checksum" is still not fast enough to catch up RAID0 writing.
> 
> To measure the effectiveness of sync checksum and checksum offloading for
> developers, it would be better to have a switch for the checksum offloading
> under CONFIG_BTRFS_DEBUG hood.
> 
> This commit introduces fs_devices->offload_csum_mode for
> CONFIG_BTRFS_DEBUG, so that a btrfs developer can change the behavior by
> writing to /sys/fs/btrfs/<uuid>/offload_csum. The default is "auto" which
> is the same as the previous behavior. Or, you can set "on" or "off" (or "y"
> or "n" whatever kstrtobool() accepts) to always/never offload checksum.
> 
> More benchmark should be collected with this knob to implement a proper
> criteria to enable/disable checksum offloading.
> 
> Link: https://lore.kernel.org/linux-btrfs/20230731152223.4EFB.409509F4@e16-tech.com/
> Link: https://lore.kernel.org/linux-btrfs/p3vo3g7pqn664mhmdhlotu5dzcna6vjtcoc2hb2lsgo2fwct7k@xzaxclba5tae/
> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>

I'm adding this to for-next.
diff mbox series

Patch

diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 960b81718e29..477f350a8bd0 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -608,8 +608,20 @@  static void run_one_async_done(struct btrfs_work *work, bool do_free)
 
 static bool should_async_write(struct btrfs_bio *bbio)
 {
+	bool auto_csum_mode = true;
+
+#ifdef CONFIG_BTRFS_DEBUG
+	struct btrfs_fs_devices *fs_devices = bbio->fs_info->fs_devices;
+	enum btrfs_offload_csum_mode csum_mode = READ_ONCE(fs_devices->offload_csum_mode);
+
+	if (csum_mode == BTRFS_OFFLOAD_CSUM_FORCE_OFF)
+		return false;
+
+	auto_csum_mode = (csum_mode == BTRFS_OFFLOAD_CSUM_AUTO);
+#endif
+
 	/* Submit synchronously if the checksum implementation is fast. */
-	if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
+	if (auto_csum_mode && test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
 		return false;
 
 	/*
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 84c05246ffd8..d2d485f103bd 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1306,6 +1306,47 @@  static ssize_t btrfs_bg_reclaim_threshold_store(struct kobject *kobj,
 BTRFS_ATTR_RW(, bg_reclaim_threshold, btrfs_bg_reclaim_threshold_show,
 	      btrfs_bg_reclaim_threshold_store);
 
+#ifdef CONFIG_BTRFS_DEBUG
+static ssize_t btrfs_offload_csum_show(struct kobject *kobj,
+				       struct kobj_attribute *a, char *buf)
+{
+	struct btrfs_fs_devices *fs_devices = to_fs_devs(kobj);
+
+	switch (READ_ONCE(fs_devices->offload_csum_mode)) {
+	case BTRFS_OFFLOAD_CSUM_AUTO:
+		return sysfs_emit(buf, "auto\n");
+	case BTRFS_OFFLOAD_CSUM_FORCE_ON:
+		return sysfs_emit(buf, "1\n");
+	case BTRFS_OFFLOAD_CSUM_FORCE_OFF:
+		return sysfs_emit(buf, "0\n");
+	default:
+		WARN_ON(1);
+		return -EINVAL;
+	}
+}
+
+static ssize_t btrfs_offload_csum_store(struct kobject *kobj,
+					struct kobj_attribute *a, const char *buf,
+					size_t len)
+{
+	struct btrfs_fs_devices *fs_devices = to_fs_devs(kobj);
+	int ret;
+	bool val;
+
+	ret = kstrtobool(buf, &val);
+	if (ret == 0)
+		WRITE_ONCE(fs_devices->offload_csum_mode,
+			   val ? BTRFS_OFFLOAD_CSUM_FORCE_ON : BTRFS_OFFLOAD_CSUM_FORCE_OFF);
+	else if (ret == -EINVAL && sysfs_streq(buf, "auto"))
+		WRITE_ONCE(fs_devices->offload_csum_mode, BTRFS_OFFLOAD_CSUM_AUTO);
+	else
+		return -EINVAL;
+
+	return len;
+}
+BTRFS_ATTR_RW(, offload_csum, btrfs_offload_csum_show, btrfs_offload_csum_store);
+#endif
+
 /*
  * Per-filesystem information and stats.
  *
@@ -1325,6 +1366,9 @@  static const struct attribute *btrfs_attrs[] = {
 	BTRFS_ATTR_PTR(, bg_reclaim_threshold),
 	BTRFS_ATTR_PTR(, commit_stats),
 	BTRFS_ATTR_PTR(, temp_fsid),
+#ifdef CONFIG_BTRFS_DEBUG
+	BTRFS_ATTR_PTR(, offload_csum),
+#endif
 	NULL,
 };
 
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 53f87f398da7..1f6f73ce48f9 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -276,6 +276,25 @@  enum btrfs_read_policy {
 	BTRFS_NR_READ_POLICY,
 };
 
+#ifdef CONFIG_BTRFS_DEBUG
+/*
+ * Checksum mode - offload it to workqueues or do it synchronously in
+ * btrfs_submit_chunk().
+ */
+enum btrfs_offload_csum_mode {
+	/*
+	 * Choose offloading checksum or do it synchronously automatically.
+	 * Do it synchronously if the checksum is fast, or offload to workqueues
+	 * otherwise.
+	 */
+	BTRFS_OFFLOAD_CSUM_AUTO,
+	/* Always offload checksum to workqueues. */
+	BTRFS_OFFLOAD_CSUM_FORCE_ON,
+	/* Never offload checksum to workqueues. */
+	BTRFS_OFFLOAD_CSUM_FORCE_OFF,
+};
+#endif
+
 struct btrfs_fs_devices {
 	u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
 
@@ -380,6 +399,11 @@  struct btrfs_fs_devices {
 
 	/* Policy used to read the mirrored stripes. */
 	enum btrfs_read_policy read_policy;
+
+#ifdef CONFIG_BTRFS_DEBUG
+	/* Checksum mode - offload it or do it synchronously. */
+	enum btrfs_offload_csum_mode offload_csum_mode;
+#endif
 };
 
 #define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info)	\