@@ -3320,6 +3320,9 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
fs_info->nodesize = nodesize;
fs_info->sectorsize = sectorsize;
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ fs_info->fs_devices->min_contiguous_read = sectorsize;
+#endif
fs_info->sectorsize_bits = ilog2(sectorsize);
fs_info->sectors_per_page = (PAGE_SIZE >> fs_info->sectorsize_bits);
fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size;
@@ -1305,7 +1305,11 @@ static ssize_t btrfs_temp_fsid_show(struct kobject *kobj,
}
BTRFS_ATTR(, temp_fsid, btrfs_temp_fsid_show);
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+static const char * const btrfs_read_policy_name[] = { "pid", "rotation" };
+#else
static const char * const btrfs_read_policy_name[] = { "pid" };
+#endif
static ssize_t btrfs_read_policy_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf)
@@ -1316,14 +1320,22 @@ static ssize_t btrfs_read_policy_show(struct kobject *kobj,
int i;
for (i = 0; i < BTRFS_NR_READ_POLICY; i++) {
- if (policy == i)
- ret += sysfs_emit_at(buf, ret, "%s[%s]",
- (ret == 0 ? "" : " "),
- btrfs_read_policy_name[i]);
- else
- ret += sysfs_emit_at(buf, ret, "%s%s",
- (ret == 0 ? "" : " "),
- btrfs_read_policy_name[i]);
+ if (ret != 0)
+ ret += sysfs_emit_at(buf, ret, " ");
+
+ if (i == policy)
+ ret += sysfs_emit_at(buf, ret, "[");
+
+ ret += sysfs_emit_at(buf, ret, "%s", btrfs_read_policy_name[i]);
+
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ if (i == BTRFS_READ_POLICY_ROTATION)
+ ret += sysfs_emit_at(buf, ret, ":%d",
+ fs_devices->min_contiguous_read);
+#endif
+
+ if (i == policy)
+ ret += sysfs_emit_at(buf, ret, "]");
}
ret += sysfs_emit_at(buf, ret, "\n");
@@ -1336,21 +1348,67 @@ static ssize_t btrfs_read_policy_store(struct kobject *kobj,
const char *buf, size_t len)
{
struct btrfs_fs_devices *fs_devices = to_fs_devs(kobj);
+ int index = -1;
int i;
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ char *value = strchr(buf, ':');
+
+ /* Separate value from input in policy:value format. */
+ if (value) {
+ *value = '\0';
+ value++;
+ }
+#endif
+
for (i = 0; i < BTRFS_NR_READ_POLICY; i++) {
if (sysfs_streq(buf, btrfs_read_policy_name[i])) {
- if (i != READ_ONCE(fs_devices->read_policy)) {
- WRITE_ONCE(fs_devices->read_policy, i);
- btrfs_info(fs_devices->fs_info,
- "read policy set to '%s'",
- btrfs_read_policy_name[i]);
+ index = i;
+ break;
+ }
+ }
+
+ if (index == -1)
+ return -EINVAL;
+
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ if (index == BTRFS_READ_POLICY_ROTATION) {
+ int value_rota = fs_devices->fs_info->sectorsize;
+
+ if (value) {
+ if (kstrtoint(value, 10, &value_rota))
+ return -EINVAL;
+
+ if (value_rota % fs_devices->fs_info->sectorsize != 0) {
+ btrfs_err(fs_devices->fs_info,
+"read_policy: min_contiguous_read %d should be multiples of the sectorsize %u",
+ value_rota,
+ fs_devices->fs_info->sectorsize);
+ return -EINVAL;
}
- return len;
}
+
+ if (index != READ_ONCE(fs_devices->read_policy) ||
+ value_rota != READ_ONCE(fs_devices->min_contiguous_read)) {
+ WRITE_ONCE(fs_devices->read_policy, index);
+ WRITE_ONCE(fs_devices->min_contiguous_read, value_rota);
+ atomic_set(&fs_devices->total_reads, 0);
+
+ btrfs_info(fs_devices->fs_info, "read policy set to '%s:%d'",
+ btrfs_read_policy_name[index], value_rota);
+
+ }
+
+ return len;
+ }
+#endif
+ if (index != READ_ONCE(fs_devices->read_policy)) {
+ WRITE_ONCE(fs_devices->read_policy, index);
+ btrfs_info(fs_devices->fs_info, "read policy set to '%s'",
+ btrfs_read_policy_name[index]);
}
- return -EINVAL;
+ return len;
}
BTRFS_ATTR_RW(, read_policy, btrfs_read_policy_show, btrfs_read_policy_store);
@@ -5962,6 +5962,54 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
return len;
}
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+struct stripe_mirror {
+ u64 devid;
+ int num;
+};
+
+static int btrfs_cmp_devid(const void *a, const void *b)
+{
+ struct stripe_mirror *s1 = (struct stripe_mirror *)a;
+ struct stripe_mirror *s2 = (struct stripe_mirror *)b;
+
+ if (s1->devid < s2->devid)
+ return -1;
+ if (s1->devid > s2->devid)
+ return 1;
+ return 0;
+}
+
+static int btrfs_read_rotation(struct btrfs_chunk_map *map, int first,
+ int num_stripe)
+{
+ struct stripe_mirror stripes[4] = {0}; //4: max possible mirrors
+ struct btrfs_fs_devices *fs_devices = map->stripes[first].dev->fs_devices;
+ int j;
+ int slot;
+ int index;
+ int ret_stripe;
+ int total_reads;
+ int reads_per_dev = fs_devices->min_contiguous_read/
+ fs_devices->fs_info->sectorsize;
+
+ index = 0;
+ for (j = first; j < first + num_stripe; j++) {
+ stripes[index].devid = map->stripes[j].dev->devid;
+ stripes[index].num = j;
+ index++;
+ }
+ sort(stripes, num_stripe, sizeof(struct stripe_mirror),
+ btrfs_cmp_devid, NULL);
+
+ total_reads = atomic_inc_return(&fs_devices->total_reads);
+ slot = total_reads/reads_per_dev;
+ ret_stripe = stripes[slot % num_stripe].num;
+
+ return ret_stripe;
+}
+#endif
+
static int find_live_mirror(struct btrfs_fs_info *fs_info,
struct btrfs_chunk_map *map, int first,
int dev_replace_is_ongoing)
@@ -5991,6 +6039,11 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
case BTRFS_READ_POLICY_PID:
preferred_mirror = first + (current->pid % num_stripes);
break;
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ case BTRFS_READ_POLICY_ROTATION:
+ preferred_mirror = btrfs_read_rotation(map, first, num_stripes);
+ break;
+#endif
}
if (dev_replace_is_ongoing &&
@@ -303,6 +303,10 @@ enum btrfs_chunk_allocation_policy {
enum btrfs_read_policy {
/* Use process PID to choose the stripe */
BTRFS_READ_POLICY_PID,
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ /* Balancing raid1 reads across all striped devices */
+ BTRFS_READ_POLICY_ROTATION,
+#endif
BTRFS_NR_READ_POLICY,
};
@@ -431,6 +435,11 @@ struct btrfs_fs_devices {
enum btrfs_read_policy read_policy;
#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ /* IO stat, read counter. */
+ atomic_t total_reads;
+ /* Min contiguous reads before switching to next device. */
+ int min_contiguous_read;
+
/* Checksum mode - offload it or do it synchronously. */
enum btrfs_offload_csum_mode offload_csum_mode;
#endif
This feature balances I/O across the striped devices when reading from RAID1 blocks. echo rotation:[min_contiguous_read] > /sys/fs/btrfs/<uuid>/read_policy Default value of min_contiguous_read is equal to the sectorsize. Signed-off-by: Anand Jain <anand.jain@oracle.com> --- fs/btrfs/disk-io.c | 3 ++ fs/btrfs/sysfs.c | 88 ++++++++++++++++++++++++++++++++++++++-------- fs/btrfs/volumes.c | 53 ++++++++++++++++++++++++++++ fs/btrfs/volumes.h | 9 +++++ 4 files changed, 138 insertions(+), 15 deletions(-)