Message ID | f284d932a61c293ac1c350ac11730b11b651300c.1727368214.git.anand.jain@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | raid1 balancing methods | expand |
在 2024/9/27 19:25, Anand Jain 写道: > This feature aims to direct the read I/O to the device with the lowest > known latency for reading RAID1 blocks. > > echo "latency" > /sys/fs/btrfs/<UUID>/read_policy > > Signed-off-by: Anand Jain <anand.jain@oracle.com> > --- > fs/btrfs/sysfs.c | 2 +- > fs/btrfs/volumes.c | 40 ++++++++++++++++++++++++++++++++++++++++ > fs/btrfs/volumes.h | 2 ++ > 3 files changed, 43 insertions(+), 1 deletion(-) > > diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c > index 18fb35a887c6..15abf931726c 100644 > --- a/fs/btrfs/sysfs.c > +++ b/fs/btrfs/sysfs.c > @@ -1306,7 +1306,7 @@ static ssize_t btrfs_temp_fsid_show(struct kobject *kobj, > BTRFS_ATTR(, temp_fsid, btrfs_temp_fsid_show); > > #ifdef CONFIG_BTRFS_DEBUG > -static const char * const btrfs_read_policy_name[] = { "pid", "rotation" }; > +static const char * const btrfs_read_policy_name[] = { "pid", "rotation", "latency" }; > #else > static const char * const btrfs_read_policy_name[] = { "pid" }; > #endif > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c > index c130a27386a7..20bc62d85b3b 100644 > --- a/fs/btrfs/volumes.c > +++ b/fs/btrfs/volumes.c > @@ -12,6 +12,9 @@ > #include <linux/uuid.h> > #include <linux/list_sort.h> > #include <linux/namei.h> > +#ifdef CONFIG_BTRFS_DEBUG > +#include <linux/part_stat.h> > +#endif > #include "misc.h" > #include "ctree.h" > #include "disk-io.h" > @@ -5860,6 +5863,39 @@ int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len) > } > > #ifdef CONFIG_BTRFS_DEBUG > +static int btrfs_best_stripe(struct btrfs_fs_info *fs_info, > + struct btrfs_chunk_map *map, int first, > + int num_stripe) > +{ > + u64 est_wait = 0; Is this a typo of best_wait? Or do you mean estimated wait? > + int best_stripe = 0; > + int index; > + > + for (index = first; index < first + num_stripe; index++) { > + u64 read_wait; > + u64 avg_wait = 0; > + unsigned long read_ios; > + struct btrfs_device *device = map->stripes[index].dev; > + > + read_wait = part_stat_read(device->bdev, nsecs[READ]); > + read_ios = part_stat_read(device->bdev, ios[READ]); > + > + if (read_wait && read_ios && read_wait >= read_ios) > + avg_wait = div_u64(read_wait, read_ios); > + else > + btrfs_debug_rl(fs_info, > + "devid: %llu avg_wait ZERO read_wait %llu read_ios %lu", > + device->devid, read_wait, read_ios); I do not think we need this debug messages. The device can have no read so far. > + > + if (est_wait == 0 || est_wait > avg_wait) { You can give @est_wait a default value of U64_MAX, so that you do not need to check for 0, just est_wait > avg_wait is enough. Thanks, Qu > + est_wait = avg_wait; > + best_stripe = index; > + } > + } > + > + return best_stripe; > +} > + > struct stripe_mirror { > u64 devid; > int map; > @@ -5940,6 +5976,10 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info, > case BTRFS_READ_POLICY_ROTATION: > preferred_mirror = btrfs_read_rotation(map, first, num_stripes); > break; > + case BTRFS_READ_POLICY_LATENCY: > + preferred_mirror = btrfs_best_stripe(fs_info, map, first, > + num_stripes); > + break; > #endif > } > > diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h > index 81701217dbb9..09920ef76a9b 100644 > --- a/fs/btrfs/volumes.h > +++ b/fs/btrfs/volumes.h > @@ -306,6 +306,8 @@ enum btrfs_read_policy { > #ifdef CONFIG_BTRFS_DEBUG > /* Balancing raid1 reads across all striped devices */ > BTRFS_READ_POLICY_ROTATION, > + /* Use the lowest-latency device dynamically */ > + BTRFS_READ_POLICY_LATENCY, > #endif > BTRFS_NR_READ_POLICY, > };
>> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c >> index c130a27386a7..20bc62d85b3b 100644 >> --- a/fs/btrfs/volumes.c >> +++ b/fs/btrfs/volumes.c >> @@ -12,6 +12,9 @@ >> #include <linux/uuid.h> >> #include <linux/list_sort.h> >> #include <linux/namei.h> >> +#ifdef CONFIG_BTRFS_DEBUG >> +#include <linux/part_stat.h> >> +#endif >> #include "misc.h" >> #include "ctree.h" >> #include "disk-io.h" >> @@ -5860,6 +5863,39 @@ int btrfs_is_parity_mirror(struct btrfs_fs_info >> *fs_info, u64 logical, u64 len) >> } >> >> #ifdef CONFIG_BTRFS_DEBUG >> +static int btrfs_best_stripe(struct btrfs_fs_info *fs_info, >> + struct btrfs_chunk_map *map, int first, >> + int num_stripe) >> +{ >> + u64 est_wait = 0; > > Is this a typo of best_wait? Or do you mean estimated wait? > It is best_wait. Fixed in v2. >> + int best_stripe = 0; >> + int index; >> + >> + for (index = first; index < first + num_stripe; index++) { >> + u64 read_wait; >> + u64 avg_wait = 0; >> + unsigned long read_ios; >> + struct btrfs_device *device = map->stripes[index].dev; >> + >> + read_wait = part_stat_read(device->bdev, nsecs[READ]); >> + read_ios = part_stat_read(device->bdev, ios[READ]); >> + >> + if (read_wait && read_ios && read_wait >= read_ios) >> + avg_wait = div_u64(read_wait, read_ios); >> + else >> + btrfs_debug_rl(fs_info, >> + "devid: %llu avg_wait ZERO read_wait %llu read_ios %lu", >> + device->devid, read_wait, read_ios); > > I do not think we need this debug messages. > > The device can have no read so far. > Um. Yeah, we can remove it. >> + >> + if (est_wait == 0 || est_wait > avg_wait) { > > You can give @est_wait a default value of U64_MAX, so that you do not > need to check for 0, just est_wait > avg_wait is enough. > Fixed in v2. Thanks for the review. - Anand > Thanks, > Qu
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 18fb35a887c6..15abf931726c 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1306,7 +1306,7 @@ static ssize_t btrfs_temp_fsid_show(struct kobject *kobj, BTRFS_ATTR(, temp_fsid, btrfs_temp_fsid_show); #ifdef CONFIG_BTRFS_DEBUG -static const char * const btrfs_read_policy_name[] = { "pid", "rotation" }; +static const char * const btrfs_read_policy_name[] = { "pid", "rotation", "latency" }; #else static const char * const btrfs_read_policy_name[] = { "pid" }; #endif diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c130a27386a7..20bc62d85b3b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -12,6 +12,9 @@ #include <linux/uuid.h> #include <linux/list_sort.h> #include <linux/namei.h> +#ifdef CONFIG_BTRFS_DEBUG +#include <linux/part_stat.h> +#endif #include "misc.h" #include "ctree.h" #include "disk-io.h" @@ -5860,6 +5863,39 @@ int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len) } #ifdef CONFIG_BTRFS_DEBUG +static int btrfs_best_stripe(struct btrfs_fs_info *fs_info, + struct btrfs_chunk_map *map, int first, + int num_stripe) +{ + u64 est_wait = 0; + int best_stripe = 0; + int index; + + for (index = first; index < first + num_stripe; index++) { + u64 read_wait; + u64 avg_wait = 0; + unsigned long read_ios; + struct btrfs_device *device = map->stripes[index].dev; + + read_wait = part_stat_read(device->bdev, nsecs[READ]); + read_ios = part_stat_read(device->bdev, ios[READ]); + + if (read_wait && read_ios && read_wait >= read_ios) + avg_wait = div_u64(read_wait, read_ios); + else + btrfs_debug_rl(fs_info, + "devid: %llu avg_wait ZERO read_wait %llu read_ios %lu", + device->devid, read_wait, read_ios); + + if (est_wait == 0 || est_wait > avg_wait) { + est_wait = avg_wait; + best_stripe = index; + } + } + + return best_stripe; +} + struct stripe_mirror { u64 devid; int map; @@ -5940,6 +5976,10 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info, case BTRFS_READ_POLICY_ROTATION: preferred_mirror = btrfs_read_rotation(map, first, num_stripes); break; + case BTRFS_READ_POLICY_LATENCY: + preferred_mirror = btrfs_best_stripe(fs_info, map, first, + num_stripes); + break; #endif } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 81701217dbb9..09920ef76a9b 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -306,6 +306,8 @@ enum btrfs_read_policy { #ifdef CONFIG_BTRFS_DEBUG /* Balancing raid1 reads across all striped devices */ BTRFS_READ_POLICY_ROTATION, + /* Use the lowest-latency device dynamically */ + BTRFS_READ_POLICY_LATENCY, #endif BTRFS_NR_READ_POLICY, };
This feature aims to direct the read I/O to the device with the lowest known latency for reading RAID1 blocks. echo "latency" > /sys/fs/btrfs/<UUID>/read_policy Signed-off-by: Anand Jain <anand.jain@oracle.com> --- fs/btrfs/sysfs.c | 2 +- fs/btrfs/volumes.c | 40 ++++++++++++++++++++++++++++++++++++++++ fs/btrfs/volumes.h | 2 ++ 3 files changed, 43 insertions(+), 1 deletion(-)