diff mbox series

[1/3] btrfs: introduce RAID1 round-robin read balancing

Message ID 63676f15fe9b1ca6c10eb9021829b4666db6d021.1727368214.git.anand.jain@oracle.com (mailing list archive)
State New, archived
Headers show
Series raid1 balancing methods | expand

Commit Message

Anand Jain Sept. 27, 2024, 9:55 a.m. UTC
This feature balances I/O across the striped devices when reading from
RAID1 blocks.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/sysfs.c   |  4 ++++
 fs/btrfs/volumes.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/volumes.h |  7 ++++++
 3 files changed, 64 insertions(+)

Comments

Qu Wenruo Sept. 27, 2024, 10:10 a.m. UTC | #1
在 2024/9/27 19:25, Anand Jain 写道:
> This feature balances I/O across the striped devices when reading from
> RAID1 blocks.
>
> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> ---
>   fs/btrfs/sysfs.c   |  4 ++++
>   fs/btrfs/volumes.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++
>   fs/btrfs/volumes.h |  7 ++++++
>   3 files changed, 64 insertions(+)
>
> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
> index 03926ad467c9..18fb35a887c6 100644
> --- a/fs/btrfs/sysfs.c
> +++ b/fs/btrfs/sysfs.c
> @@ -1305,7 +1305,11 @@ static ssize_t btrfs_temp_fsid_show(struct kobject *kobj,
>   }
>   BTRFS_ATTR(, temp_fsid, btrfs_temp_fsid_show);
>
> +#ifdef CONFIG_BTRFS_DEBUG
> +static const char * const btrfs_read_policy_name[] = { "pid", "rotation" };
> +#else
>   static const char * const btrfs_read_policy_name[] = { "pid" };
> +#endif
>
>   static ssize_t btrfs_read_policy_show(struct kobject *kobj,
>   				      struct kobj_attribute *a, char *buf)
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 995b0647f538..c130a27386a7 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -5859,6 +5859,54 @@ int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
>   	return ret;
>   }
>
> +#ifdef CONFIG_BTRFS_DEBUG

It would be much better to utilize CONFIG_BTRFS_EXPERIMENTAL.
CONFIG_BTRFS_DEBUG is now for pure debug purposes.

Thanks,
Qu

> +struct stripe_mirror {
> +	u64 devid;
> +	int map;
> +};
> +
> +static int btrfs_cmp_devid(const void *a, const void *b)
> +{
> +	struct stripe_mirror *s1 = (struct stripe_mirror *)a;
> +	struct stripe_mirror *s2 = (struct stripe_mirror *)b;
> +
> +	if (s1->devid < s2->devid)
> +		return -1;
> +	if (s1->devid > s2->devid)
> +		return 1;
> +	return 0;
> +}
> +
> +static int btrfs_read_rotation(struct btrfs_chunk_map *map, int first,
> +			       int num_stripe)
> +{
> +	struct stripe_mirror stripes[4] = {0}; //4: for testing, works for now.
> +	struct btrfs_fs_devices *fs_devices;
> +	u64 devid;
> +	int index, j, cnt;
> +	int next_stripe;
> +
> +	index = 0;
> +	for (j = first; j < first + num_stripe; j++) {
> +		devid = map->stripes[j].dev->devid;
> +
> +		stripes[index].devid = devid;
> +		stripes[index].map = j;
> +
> +		index++;
> +	}
> +
> +	sort(stripes, num_stripe, sizeof(struct stripe_mirror),
> +	     btrfs_cmp_devid, NULL);
> +
> +	fs_devices = map->stripes[first].dev->fs_devices;
> +	cnt = atomic_inc_return(&fs_devices->total_reads);
> +	next_stripe = stripes[cnt % num_stripe].map;
> +
> +	return next_stripe;
> +}
> +#endif
> +
>   static int find_live_mirror(struct btrfs_fs_info *fs_info,
>   			    struct btrfs_chunk_map *map, int first,
>   			    int dev_replace_is_ongoing)
> @@ -5888,6 +5936,11 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
>   	case BTRFS_READ_POLICY_PID:
>   		preferred_mirror = first + (current->pid % num_stripes);
>   		break;
> +#ifdef CONFIG_BTRFS_DEBUG
> +	case BTRFS_READ_POLICY_ROTATION:
> +		preferred_mirror = btrfs_read_rotation(map, first, num_stripes);
> +		break;
> +#endif
>   	}
>
>   	if (dev_replace_is_ongoing &&
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index 4481575dd70f..81701217dbb9 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -303,6 +303,10 @@ enum btrfs_chunk_allocation_policy {
>   enum btrfs_read_policy {
>   	/* Use process PID to choose the stripe */
>   	BTRFS_READ_POLICY_PID,
> +#ifdef CONFIG_BTRFS_DEBUG
> +	/* Balancing raid1 reads across all striped devices */
> +	BTRFS_READ_POLICY_ROTATION,
> +#endif
>   	BTRFS_NR_READ_POLICY,
>   };
>
> @@ -431,6 +435,9 @@ struct btrfs_fs_devices {
>   	enum btrfs_read_policy read_policy;
>
>   #ifdef CONFIG_BTRFS_DEBUG
> +	/* read counter for the filesystem */
> +	atomic_t total_reads;
> +
>   	/* Checksum mode - offload it or do it synchronously. */
>   	enum btrfs_offload_csum_mode offload_csum_mode;
>   #endif
Anand Jain Oct. 11, 2024, 1:21 a.m. UTC | #2
>> +#ifdef CONFIG_BTRFS_DEBUG
> 
> It would be much better to utilize CONFIG_BTRFS_EXPERIMENTAL.
> CONFIG_BTRFS_DEBUG is now for pure debug purposes.

Yes, I noticed the recent patch that changed that.
Fixed in v2.

Thanks, Anand
diff mbox series

Patch

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 03926ad467c9..18fb35a887c6 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1305,7 +1305,11 @@  static ssize_t btrfs_temp_fsid_show(struct kobject *kobj,
 }
 BTRFS_ATTR(, temp_fsid, btrfs_temp_fsid_show);
 
+#ifdef CONFIG_BTRFS_DEBUG
+static const char * const btrfs_read_policy_name[] = { "pid", "rotation" };
+#else
 static const char * const btrfs_read_policy_name[] = { "pid" };
+#endif
 
 static ssize_t btrfs_read_policy_show(struct kobject *kobj,
 				      struct kobj_attribute *a, char *buf)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 995b0647f538..c130a27386a7 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5859,6 +5859,54 @@  int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 	return ret;
 }
 
+#ifdef CONFIG_BTRFS_DEBUG
+struct stripe_mirror {
+	u64 devid;
+	int map;
+};
+
+static int btrfs_cmp_devid(const void *a, const void *b)
+{
+	struct stripe_mirror *s1 = (struct stripe_mirror *)a;
+	struct stripe_mirror *s2 = (struct stripe_mirror *)b;
+
+	if (s1->devid < s2->devid)
+		return -1;
+	if (s1->devid > s2->devid)
+		return 1;
+	return 0;
+}
+
+static int btrfs_read_rotation(struct btrfs_chunk_map *map, int first,
+			       int num_stripe)
+{
+	struct stripe_mirror stripes[4] = {0}; //4: for testing, works for now.
+	struct btrfs_fs_devices *fs_devices;
+	u64 devid;
+	int index, j, cnt;
+	int next_stripe;
+
+	index = 0;
+	for (j = first; j < first + num_stripe; j++) {
+		devid = map->stripes[j].dev->devid;
+
+		stripes[index].devid = devid;
+		stripes[index].map = j;
+
+		index++;
+	}
+
+	sort(stripes, num_stripe, sizeof(struct stripe_mirror),
+	     btrfs_cmp_devid, NULL);
+
+	fs_devices = map->stripes[first].dev->fs_devices;
+	cnt = atomic_inc_return(&fs_devices->total_reads);
+	next_stripe = stripes[cnt % num_stripe].map;
+
+	return next_stripe;
+}
+#endif
+
 static int find_live_mirror(struct btrfs_fs_info *fs_info,
 			    struct btrfs_chunk_map *map, int first,
 			    int dev_replace_is_ongoing)
@@ -5888,6 +5936,11 @@  static int find_live_mirror(struct btrfs_fs_info *fs_info,
 	case BTRFS_READ_POLICY_PID:
 		preferred_mirror = first + (current->pid % num_stripes);
 		break;
+#ifdef CONFIG_BTRFS_DEBUG
+	case BTRFS_READ_POLICY_ROTATION:
+		preferred_mirror = btrfs_read_rotation(map, first, num_stripes);
+		break;
+#endif
 	}
 
 	if (dev_replace_is_ongoing &&
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 4481575dd70f..81701217dbb9 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -303,6 +303,10 @@  enum btrfs_chunk_allocation_policy {
 enum btrfs_read_policy {
 	/* Use process PID to choose the stripe */
 	BTRFS_READ_POLICY_PID,
+#ifdef CONFIG_BTRFS_DEBUG
+	/* Balancing raid1 reads across all striped devices */
+	BTRFS_READ_POLICY_ROTATION,
+#endif
 	BTRFS_NR_READ_POLICY,
 };
 
@@ -431,6 +435,9 @@  struct btrfs_fs_devices {
 	enum btrfs_read_policy read_policy;
 
 #ifdef CONFIG_BTRFS_DEBUG
+	/* read counter for the filesystem */ 
+	atomic_t total_reads;
+
 	/* Checksum mode - offload it or do it synchronously. */
 	enum btrfs_offload_csum_mode offload_csum_mode;
 #endif