@@ -4824,6 +4824,56 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
return 0;
}
+/*
+ * sort the devices in descending order by preferred_metadata,
+ * max_avail, total_avail
+ */
+static int btrfs_cmp_device_info_metadata(const void *a, const void *b)
+{
+ const struct btrfs_device_info *di_a = a;
+ const struct btrfs_device_info *di_b = b;
+
+ /* metadata -> preferred_metadata first */
+ if (di_a->preferred_metadata && !di_b->preferred_metadata)
+ return -1;
+ if (!di_a->preferred_metadata && di_b->preferred_metadata)
+ return 1;
+ if (di_a->max_avail > di_b->max_avail)
+ return -1;
+ if (di_a->max_avail < di_b->max_avail)
+ return 1;
+ if (di_a->total_avail > di_b->total_avail)
+ return -1;
+ if (di_a->total_avail < di_b->total_avail)
+ return 1;
+ return 0;
+}
+
+/*
+ * sort the devices in descending order by !preferred_metadata,
+ * max_avail, total_avail
+ */
+static int btrfs_cmp_device_info_data(const void *a, const void *b)
+{
+ const struct btrfs_device_info *di_a = a;
+ const struct btrfs_device_info *di_b = b;
+
+ /* data -> preferred_metadata last */
+ if (di_a->preferred_metadata && !di_b->preferred_metadata)
+ return 1;
+ if (!di_a->preferred_metadata && di_b->preferred_metadata)
+ return -1;
+ if (di_a->max_avail > di_b->max_avail)
+ return -1;
+ if (di_a->max_avail < di_b->max_avail)
+ return 1;
+ if (di_a->total_avail > di_b->total_avail)
+ return -1;
+ if (di_a->total_avail < di_b->total_avail)
+ return 1;
+ return 0;
+}
+
static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
{
if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK))
@@ -4939,6 +4989,7 @@ static int gather_device_info(struct btrfs_fs_devices *fs_devices,
int ndevs = 0;
u64 max_avail;
u64 dev_offset;
+ int nr_preferred_metadata = 0;
/*
* in the first pass through the devices list, we gather information
@@ -4991,15 +5042,65 @@ static int gather_device_info(struct btrfs_fs_devices *fs_devices,
devices_info[ndevs].max_avail = max_avail;
devices_info[ndevs].total_avail = total_avail;
devices_info[ndevs].dev = device;
+ devices_info[ndevs].preferred_metadata = !!(device->type &
+ BTRFS_DEV_PREFERRED_METADATA);
+ if (devices_info[ndevs].preferred_metadata)
+ nr_preferred_metadata++;
++ndevs;
}
ctl->ndevs = ndevs;
+ BUG_ON(nr_preferred_metadata > ndevs);
/*
* now sort the devices by hole size / available space
*/
- sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
- btrfs_cmp_device_info, NULL);
+ if (((ctl->type & BTRFS_BLOCK_GROUP_DATA) &&
+ (ctl->type & BTRFS_BLOCK_GROUP_METADATA)) ||
+ info->preferred_metadata_mode == BTRFS_PM_DISABLED) {
+ /* mixed bg or PREFERRED_METADATA not set */
+ sort(devices_info, ctl->ndevs, sizeof(struct btrfs_device_info),
+ btrfs_cmp_device_info, NULL);
+ } else {
+ /*
+ * if PREFERRED_METADATA is set, sort the device considering
+ * also the kind (preferred_metadata or not). Limit the
+ * availables devices to the ones of the same kind, to avoid
+ * that a striped profile, like raid5, spreads to all kind of
+ * devices.
+ * It is allowed to use different kinds of devices (if the ones
+ * of the same kind are not enough alone) in the following
+ * case:
+ * - preferred_metadata_mode == BTRFS_PM_SOFT:
+ * use the device of the same kind until these
+ * are enough. Otherwise it is allowed to
+ * use all the devices
+ * - preferred_metadata_mode == BTRFS_PM_HARD
+ * use the device of the same kind; if these are
+ * not enough, then an error will be raised raised
+ * - preferred_metadata_mode == BTRFS_PM_METADATA
+ * metadata/system -> as BTRFS_PM_SOFT
+ * data -> as BTRFS_PM_HARD
+ */
+ if (ctl->type & BTRFS_BLOCK_GROUP_DATA) {
+ int nr_data = ctl->ndevs - nr_preferred_metadata;
+ sort(devices_info, ctl->ndevs,
+ sizeof(struct btrfs_device_info),
+ btrfs_cmp_device_info_data, NULL);
+ if (info->preferred_metadata_mode == BTRFS_PM_HARD ||
+ info->preferred_metadata_mode == BTRFS_PM_METADATA)
+ ctl->ndevs = nr_data;
+ else if (nr_data >= ctl->devs_min)
+ ctl->ndevs = nr_data;
+ } else { /* non data -> metadata and system */
+ sort(devices_info, ctl->ndevs,
+ sizeof(struct btrfs_device_info),
+ btrfs_cmp_device_info_metadata, NULL);
+ if (info->preferred_metadata_mode == BTRFS_PM_HARD)
+ ctl->ndevs = nr_preferred_metadata;
+ else if (nr_preferred_metadata >= ctl->devs_min)
+ ctl->ndevs = nr_preferred_metadata;
+ }
+ }
return 0;
}
@@ -364,6 +364,7 @@ struct btrfs_device_info {
u64 dev_offset;
u64 max_avail;
u64 total_avail;
+ int preferred_metadata:1;
};
struct btrfs_raid_attr {