diff mbox series

[v2,2/3] btrfs: use the path with the lowest latency for RAID1 reads

Message ID 5dc93c29ea3b5c8f6049d38bd968c8caf143ca7c.1728608421.git.anand.jain@oracle.com (mailing list archive)
State New, archived
Headers show
Series raid1 balancing methods | expand

Commit Message

Anand Jain Oct. 11, 2024, 2:49 a.m. UTC
This feature aims to direct the read I/O to the device with the lowest
known latency for reading RAID1 blocks.

echo "latency" > /sys/fs/btrfs/<UUID>/read_policy

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/sysfs.c   |  2 +-
 fs/btrfs/volumes.c | 36 ++++++++++++++++++++++++++++++++++++
 fs/btrfs/volumes.h |  2 ++
 3 files changed, 39 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index bacb2871109b..9f506d46a94c 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1306,7 +1306,7 @@  static ssize_t btrfs_temp_fsid_show(struct kobject *kobj,
 BTRFS_ATTR(, temp_fsid, btrfs_temp_fsid_show);
 
 #ifdef CONFIG_BTRFS_EXPERIMENTAL
-static const char * const btrfs_read_policy_name[] = { "pid", "rotation" };
+static const char * const btrfs_read_policy_name[] = { "pid", "rotation", "latency" };
 #else
 static const char * const btrfs_read_policy_name[] = { "pid" };
 #endif
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index ec5dbe69ba2c..8912ee1d8b54 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -12,6 +12,9 @@ 
 #include <linux/uuid.h>
 #include <linux/list_sort.h>
 #include <linux/namei.h>
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+#include <linux/part_stat.h>
+#endif
 #include "misc.h"
 #include "ctree.h"
 #include "disk-io.h"
@@ -5963,6 +5966,35 @@  unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
 }
 
 #ifdef CONFIG_BTRFS_EXPERIMENTAL
+static int btrfs_best_stripe(struct btrfs_fs_info *fs_info,
+			     struct btrfs_chunk_map *map, int first,
+			     int num_stripe)
+{
+	u64 best_wait = U64_MAX;
+	int best_stripe = 0;
+	int index;
+
+	for (index = first; index < first + num_stripe; index++) {
+		u64 read_wait;
+		u64 avg_wait = 0;
+		unsigned long read_ios;
+		struct btrfs_device *device = map->stripes[index].dev;
+
+		read_wait = part_stat_read(device->bdev, nsecs[READ]);
+		read_ios = part_stat_read(device->bdev, ios[READ]);
+
+		if (read_wait && read_ios && read_wait >= read_ios)
+			avg_wait = div_u64(read_wait, read_ios);
+
+		if (best_wait > avg_wait) {
+			best_wait = avg_wait;
+			best_stripe = index;
+		}
+	}
+
+	return best_stripe;
+}
+
 struct stripe_mirror {
 	u64 devid;
 	int num;
@@ -6043,6 +6075,10 @@  static int find_live_mirror(struct btrfs_fs_info *fs_info,
 	case BTRFS_READ_POLICY_ROTATION:
 		preferred_mirror = btrfs_read_rotation(map, first, num_stripes);
 		break;
+	case BTRFS_READ_POLICY_LATENCY:
+		preferred_mirror = btrfs_best_stripe(fs_info, map, first,
+								num_stripes);
+		break;
 #endif
 	}
 
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 0db754a4b13d..f9c744b87b61 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -306,6 +306,8 @@  enum btrfs_read_policy {
 #ifdef CONFIG_BTRFS_EXPERIMENTAL
 	/* Balancing raid1 reads across all striped devices */
 	BTRFS_READ_POLICY_ROTATION,
+	/* Use the lowest-latency device dynamically */
+	BTRFS_READ_POLICY_LATENCY,
 #endif
 	BTRFS_NR_READ_POLICY,
 };