Message ID | 20200528183451.16654-5-kreijack@libero.it (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/4] Add an ioctl to set/retrive the device properties | expand |
Hi, On 5/28/20 8:34 PM, Goffredo Baroncelli wrote: > From: Goffredo Baroncelli <kreijack@inwind.it> > > When this mode is enabled, The commit message does not mention if this is either only a convenience during development and testing of the feature to be able to quickly turn it on/off, or if you intend to have this into the final change set. > the allocation policy of the chunk > is so modified: > - allocation of metadata chunk: priority is given to preferred_metadata > disks. > - allocation of data chunk: priority is given to a non preferred_metadata > disk. > > When a striped profile is involved (like RAID0,5,6), the logic > is a bit more complex. If there are enough disks, the data profiles > are stored on the non preferred_metadata disks; instead the metadata > profiles are stored on the preferred_metadata disk. > If the disks are not enough, then the profile is allocated on all > the disks. > > Example: assuming that sda, sdb, sdc are ssd disks, and sde, sdf are > non preferred_metadata ones. > A data profile raid6, will be stored on sda, sdb, sdc, sde, sdf (sde > and sdf are not enough to host a raid5 profile). > A metadata profile raid6, will be stored on sda, sdb, sdc (these > are enough to host a raid6 profile). > > To enable this mode pass -o dedicated_metadata at mount time. Is it dedicated_metadata or preferred_metadata? > Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it> > --- > fs/btrfs/ctree.h | 1 + > fs/btrfs/super.c | 8 +++++ > fs/btrfs/volumes.c | 89 ++++++++++++++++++++++++++++++++++++++++++++-- > fs/btrfs/volumes.h | 1 + > 4 files changed, 97 insertions(+), 2 deletions(-) > > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h > index 03ea7370aea7..779760fd27b1 100644 > --- a/fs/btrfs/ctree.h > +++ b/fs/btrfs/ctree.h > @@ -1239,6 +1239,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info) > #define BTRFS_MOUNT_NOLOGREPLAY (1 << 27) > #define BTRFS_MOUNT_REF_VERIFY (1 << 28) > #define BTRFS_MOUNT_DISCARD_ASYNC (1 << 29) > +#define BTRFS_MOUNT_PREFERRED_METADATA (1 << 30) > > #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) > #define BTRFS_DEFAULT_MAX_INLINE (2048) > diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c > index 438ecba26557..80700dc9dcf8 100644 > --- a/fs/btrfs/super.c > +++ b/fs/btrfs/super.c > @@ -359,6 +359,7 @@ enum { > #ifdef CONFIG_BTRFS_FS_REF_VERIFY > Opt_ref_verify, > #endif > + Opt_preferred_metadata, > Opt_err, > }; > > @@ -430,6 +431,7 @@ static const match_table_t tokens = { > #ifdef CONFIG_BTRFS_FS_REF_VERIFY > {Opt_ref_verify, "ref_verify"}, > #endif > + {Opt_preferred_metadata, "preferred_metadata"}, > {Opt_err, NULL}, > }; > > @@ -881,6 +883,10 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, > btrfs_set_opt(info->mount_opt, REF_VERIFY); > break; > #endif > + case Opt_preferred_metadata: > + btrfs_set_and_info(info, PREFERRED_METADATA, > + "enabling preferred_metadata"); > + break; > case Opt_err: > btrfs_err(info, "unrecognized mount option '%s'", p); > ret = -EINVAL; > @@ -1403,6 +1409,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) > #endif > if (btrfs_test_opt(info, REF_VERIFY)) > seq_puts(seq, ",ref_verify"); > + if (btrfs_test_opt(info, PREFERRED_METADATA)) > + seq_puts(seq, ",preferred_metadata"); > seq_printf(seq, ",subvolid=%llu", > BTRFS_I(d_inode(dentry))->root->root_key.objectid); > seq_puts(seq, ",subvol="); > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c > index 5265f54c2931..c68efb15e473 100644 > --- a/fs/btrfs/volumes.c > +++ b/fs/btrfs/volumes.c > @@ -4770,6 +4770,56 @@ static int btrfs_cmp_device_info(const void *a, const void *b) > return 0; > } > > +/* > + * sort the devices in descending order by preferred_metadata, > + * max_avail, total_avail > + */ > +static int btrfs_cmp_device_info_metadata(const void *a, const void *b) > +{ > + const struct btrfs_device_info *di_a = a; > + const struct btrfs_device_info *di_b = b; > + > + /* metadata -> preferred_metadata first */ > + if (di_a->preferred_metadata && !di_b->preferred_metadata) > + return -1; > + if (!di_a->preferred_metadata && di_b->preferred_metadata) > + return 1; > + if (di_a->max_avail > di_b->max_avail) > + return -1; > + if (di_a->max_avail < di_b->max_avail) > + return 1; > + if (di_a->total_avail > di_b->total_avail) > + return -1; > + if (di_a->total_avail < di_b->total_avail) > + return 1; > + return 0; > +} > + > +/* > + * sort the devices in descending order by !preferred_metadata, > + * max_avail, total_avail > + */ > +static int btrfs_cmp_device_info_data(const void *a, const void *b) > +{ > + const struct btrfs_device_info *di_a = a; > + const struct btrfs_device_info *di_b = b; > + > + /* data -> preferred_metadata last */ > + if (di_a->preferred_metadata && !di_b->preferred_metadata) > + return 1; > + if (!di_a->preferred_metadata && di_b->preferred_metadata) > + return -1; > + if (di_a->max_avail > di_b->max_avail) > + return -1; > + if (di_a->max_avail < di_b->max_avail) > + return 1; > + if (di_a->total_avail > di_b->total_avail) > + return -1; > + if (di_a->total_avail < di_b->total_avail) > + return 1; > + return 0; > +} > + > static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) > { > if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK)) > @@ -4885,6 +4935,7 @@ static int gather_device_info(struct btrfs_fs_devices *fs_devices, > int ndevs = 0; > u64 max_avail; > u64 dev_offset; > + int nr_preferred_metadata = 0; > > /* > * in the first pass through the devices list, we gather information > @@ -4937,15 +4988,49 @@ static int gather_device_info(struct btrfs_fs_devices *fs_devices, > devices_info[ndevs].max_avail = max_avail; > devices_info[ndevs].total_avail = total_avail; > devices_info[ndevs].dev = device; > + devices_info[ndevs].preferred_metadata = !!(device->type & > + BTRFS_DEV_PREFERRED_METADATA); > + if (devices_info[ndevs].preferred_metadata) > + nr_preferred_metadata++; > ++ndevs; > } > ctl->ndevs = ndevs; > > + BUG_ON(nr_preferred_metadata > ndevs); > /* > * now sort the devices by hole size / available space > */ > - sort(devices_info, ndevs, sizeof(struct btrfs_device_info), > - btrfs_cmp_device_info, NULL); > + if (((ctl->type & BTRFS_BLOCK_GROUP_DATA) && > + (ctl->type & BTRFS_BLOCK_GROUP_METADATA)) || > + !btrfs_test_opt(info, PREFERRED_METADATA)) { > + /* mixed bg or PREFERRED_METADATA not set */ > + sort(devices_info, ctl->ndevs, sizeof(struct btrfs_device_info), > + btrfs_cmp_device_info, NULL); > + } else { > + /* > + * if PREFERRED_METADATA is set, sort the device considering > + * also the kind (preferred_metadata or not). Limit the > + * availables devices to the ones of the same kind, to avoid > + * that a striped profile, like raid5, spreads to all kind of > + * devices. > + * It is allowed to use different kinds of devices if the ones > + * of the same kind are not enough alone. > + */ > + if (ctl->type & BTRFS_BLOCK_GROUP_DATA) { > + int nr_data = ctl->ndevs - nr_preferred_metadata; > + sort(devices_info, ctl->ndevs, > + sizeof(struct btrfs_device_info), > + btrfs_cmp_device_info_data, NULL); > + if (nr_data >= ctl->devs_min) > + ctl->ndevs = nr_data; > + } else { /* non data -> metadata and system */ > + sort(devices_info, ctl->ndevs, > + sizeof(struct btrfs_device_info), > + btrfs_cmp_device_info_metadata, NULL); > + if (nr_preferred_metadata >= ctl->devs_min) > + ctl->ndevs = nr_preferred_metadata; > + } > + } > > return 0; > } > diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h > index 0ac5bf2b95e6..d39c3b0e7569 100644 > --- a/fs/btrfs/volumes.h > +++ b/fs/btrfs/volumes.h > @@ -347,6 +347,7 @@ struct btrfs_device_info { > u64 dev_offset; > u64 max_avail; > u64 total_avail; > + int preferred_metadata:1; > }; > > struct btrfs_raid_attr { >
On 5/29/20 12:02 AM, Hans van Kranenburg wrote: > Hi, > > On 5/28/20 8:34 PM, Goffredo Baroncelli wrote: >> From: Goffredo Baroncelli <kreijack@inwind.it> >> >> When this mode is enabled, > > The commit message does not mention if this is either only a convenience > during development and testing of the feature to be able to quickly turn > it on/off, or if you intend to have this into the final change set. Good question. IMHO for the initial devel phase I think that it is useful to have a preferred_metadata disk (opt-in). Then we could reverse the logic and default to preferred_metadata. Of course then we will have a no-preferred_metadata flag (opt-out) > >> the allocation policy of the chunk >> is so modified: >> - allocation of metadata chunk: priority is given to preferred_metadata >> disks. >> - allocation of data chunk: priority is given to a non preferred_metadata >> disk. >> >> When a striped profile is involved (like RAID0,5,6), the logic >> is a bit more complex. If there are enough disks, the data profiles >> are stored on the non preferred_metadata disks; instead the metadata >> profiles are stored on the preferred_metadata disk. >> If the disks are not enough, then the profile is allocated on all >> the disks. >> >> Example: assuming that sda, sdb, sdc are ssd disks, and sde, sdf are >> non preferred_metadata ones. >> A data profile raid6, will be stored on sda, sdb, sdc, sde, sdf (sde >> and sdf are not enough to host a raid5 profile). >> A metadata profile raid6, will be stored on sda, sdb, sdc (these >> are enough to host a raid6 profile). >> >> To enable this mode pass -o dedicated_metadata at mount time. > > Is it dedicated_metadata or preferred_metadata? It was an copy&paste error. It should be preferred_metadata > >> Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it> >> --- >> fs/btrfs/ctree.h | 1 + >> fs/btrfs/super.c | 8 +++++ >> fs/btrfs/volumes.c | 89 ++++++++++++++++++++++++++++++++++++++++++++-- >> fs/btrfs/volumes.h | 1 + >> 4 files changed, 97 insertions(+), 2 deletions(-) >> >> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h >> index 03ea7370aea7..779760fd27b1 100644 >> --- a/fs/btrfs/ctree.h >> +++ b/fs/btrfs/ctree.h >> @@ -1239,6 +1239,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info) >> #define BTRFS_MOUNT_NOLOGREPLAY (1 << 27) >> #define BTRFS_MOUNT_REF_VERIFY (1 << 28) >> #define BTRFS_MOUNT_DISCARD_ASYNC (1 << 29) >> +#define BTRFS_MOUNT_PREFERRED_METADATA (1 << 30) >> >> #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) >> #define BTRFS_DEFAULT_MAX_INLINE (2048) >> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c >> index 438ecba26557..80700dc9dcf8 100644 >> --- a/fs/btrfs/super.c >> +++ b/fs/btrfs/super.c >> @@ -359,6 +359,7 @@ enum { >> #ifdef CONFIG_BTRFS_FS_REF_VERIFY >> Opt_ref_verify, >> #endif >> + Opt_preferred_metadata, >> Opt_err, >> }; >> >> @@ -430,6 +431,7 @@ static const match_table_t tokens = { >> #ifdef CONFIG_BTRFS_FS_REF_VERIFY >> {Opt_ref_verify, "ref_verify"}, >> #endif >> + {Opt_preferred_metadata, "preferred_metadata"}, >> {Opt_err, NULL}, >> }; >> >> @@ -881,6 +883,10 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, >> btrfs_set_opt(info->mount_opt, REF_VERIFY); >> break; >> #endif >> + case Opt_preferred_metadata: >> + btrfs_set_and_info(info, PREFERRED_METADATA, >> + "enabling preferred_metadata"); >> + break; >> case Opt_err: >> btrfs_err(info, "unrecognized mount option '%s'", p); >> ret = -EINVAL; >> @@ -1403,6 +1409,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) >> #endif >> if (btrfs_test_opt(info, REF_VERIFY)) >> seq_puts(seq, ",ref_verify"); >> + if (btrfs_test_opt(info, PREFERRED_METADATA)) >> + seq_puts(seq, ",preferred_metadata"); >> seq_printf(seq, ",subvolid=%llu", >> BTRFS_I(d_inode(dentry))->root->root_key.objectid); >> seq_puts(seq, ",subvol="); >> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c >> index 5265f54c2931..c68efb15e473 100644 >> --- a/fs/btrfs/volumes.c >> +++ b/fs/btrfs/volumes.c >> @@ -4770,6 +4770,56 @@ static int btrfs_cmp_device_info(const void *a, const void *b) >> return 0; >> } >> >> +/* >> + * sort the devices in descending order by preferred_metadata, >> + * max_avail, total_avail >> + */ >> +static int btrfs_cmp_device_info_metadata(const void *a, const void *b) >> +{ >> + const struct btrfs_device_info *di_a = a; >> + const struct btrfs_device_info *di_b = b; >> + >> + /* metadata -> preferred_metadata first */ >> + if (di_a->preferred_metadata && !di_b->preferred_metadata) >> + return -1; >> + if (!di_a->preferred_metadata && di_b->preferred_metadata) >> + return 1; >> + if (di_a->max_avail > di_b->max_avail) >> + return -1; >> + if (di_a->max_avail < di_b->max_avail) >> + return 1; >> + if (di_a->total_avail > di_b->total_avail) >> + return -1; >> + if (di_a->total_avail < di_b->total_avail) >> + return 1; >> + return 0; >> +} >> + >> +/* >> + * sort the devices in descending order by !preferred_metadata, >> + * max_avail, total_avail >> + */ >> +static int btrfs_cmp_device_info_data(const void *a, const void *b) >> +{ >> + const struct btrfs_device_info *di_a = a; >> + const struct btrfs_device_info *di_b = b; >> + >> + /* data -> preferred_metadata last */ >> + if (di_a->preferred_metadata && !di_b->preferred_metadata) >> + return 1; >> + if (!di_a->preferred_metadata && di_b->preferred_metadata) >> + return -1; >> + if (di_a->max_avail > di_b->max_avail) >> + return -1; >> + if (di_a->max_avail < di_b->max_avail) >> + return 1; >> + if (di_a->total_avail > di_b->total_avail) >> + return -1; >> + if (di_a->total_avail < di_b->total_avail) >> + return 1; >> + return 0; >> +} >> + >> static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) >> { >> if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK)) >> @@ -4885,6 +4935,7 @@ static int gather_device_info(struct btrfs_fs_devices *fs_devices, >> int ndevs = 0; >> u64 max_avail; >> u64 dev_offset; >> + int nr_preferred_metadata = 0; >> >> /* >> * in the first pass through the devices list, we gather information >> @@ -4937,15 +4988,49 @@ static int gather_device_info(struct btrfs_fs_devices *fs_devices, >> devices_info[ndevs].max_avail = max_avail; >> devices_info[ndevs].total_avail = total_avail; >> devices_info[ndevs].dev = device; >> + devices_info[ndevs].preferred_metadata = !!(device->type & >> + BTRFS_DEV_PREFERRED_METADATA); >> + if (devices_info[ndevs].preferred_metadata) >> + nr_preferred_metadata++; >> ++ndevs; >> } >> ctl->ndevs = ndevs; >> >> + BUG_ON(nr_preferred_metadata > ndevs); >> /* >> * now sort the devices by hole size / available space >> */ >> - sort(devices_info, ndevs, sizeof(struct btrfs_device_info), >> - btrfs_cmp_device_info, NULL); >> + if (((ctl->type & BTRFS_BLOCK_GROUP_DATA) && >> + (ctl->type & BTRFS_BLOCK_GROUP_METADATA)) || >> + !btrfs_test_opt(info, PREFERRED_METADATA)) { >> + /* mixed bg or PREFERRED_METADATA not set */ >> + sort(devices_info, ctl->ndevs, sizeof(struct btrfs_device_info), >> + btrfs_cmp_device_info, NULL); >> + } else { >> + /* >> + * if PREFERRED_METADATA is set, sort the device considering >> + * also the kind (preferred_metadata or not). Limit the >> + * availables devices to the ones of the same kind, to avoid >> + * that a striped profile, like raid5, spreads to all kind of >> + * devices. >> + * It is allowed to use different kinds of devices if the ones >> + * of the same kind are not enough alone. >> + */ >> + if (ctl->type & BTRFS_BLOCK_GROUP_DATA) { >> + int nr_data = ctl->ndevs - nr_preferred_metadata; >> + sort(devices_info, ctl->ndevs, >> + sizeof(struct btrfs_device_info), >> + btrfs_cmp_device_info_data, NULL); >> + if (nr_data >= ctl->devs_min) >> + ctl->ndevs = nr_data; >> + } else { /* non data -> metadata and system */ >> + sort(devices_info, ctl->ndevs, >> + sizeof(struct btrfs_device_info), >> + btrfs_cmp_device_info_metadata, NULL); >> + if (nr_preferred_metadata >= ctl->devs_min) >> + ctl->ndevs = nr_preferred_metadata; >> + } >> + } >> >> return 0; >> } >> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h >> index 0ac5bf2b95e6..d39c3b0e7569 100644 >> --- a/fs/btrfs/volumes.h >> +++ b/fs/btrfs/volumes.h >> @@ -347,6 +347,7 @@ struct btrfs_device_info { >> u64 dev_offset; >> u64 max_avail; >> u64 total_avail; >> + int preferred_metadata:1; >> }; >> >> struct btrfs_raid_attr { >> >
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 03ea7370aea7..779760fd27b1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1239,6 +1239,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info) #define BTRFS_MOUNT_NOLOGREPLAY (1 << 27) #define BTRFS_MOUNT_REF_VERIFY (1 << 28) #define BTRFS_MOUNT_DISCARD_ASYNC (1 << 29) +#define BTRFS_MOUNT_PREFERRED_METADATA (1 << 30) #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) #define BTRFS_DEFAULT_MAX_INLINE (2048) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 438ecba26557..80700dc9dcf8 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -359,6 +359,7 @@ enum { #ifdef CONFIG_BTRFS_FS_REF_VERIFY Opt_ref_verify, #endif + Opt_preferred_metadata, Opt_err, }; @@ -430,6 +431,7 @@ static const match_table_t tokens = { #ifdef CONFIG_BTRFS_FS_REF_VERIFY {Opt_ref_verify, "ref_verify"}, #endif + {Opt_preferred_metadata, "preferred_metadata"}, {Opt_err, NULL}, }; @@ -881,6 +883,10 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, btrfs_set_opt(info->mount_opt, REF_VERIFY); break; #endif + case Opt_preferred_metadata: + btrfs_set_and_info(info, PREFERRED_METADATA, + "enabling preferred_metadata"); + break; case Opt_err: btrfs_err(info, "unrecognized mount option '%s'", p); ret = -EINVAL; @@ -1403,6 +1409,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) #endif if (btrfs_test_opt(info, REF_VERIFY)) seq_puts(seq, ",ref_verify"); + if (btrfs_test_opt(info, PREFERRED_METADATA)) + seq_puts(seq, ",preferred_metadata"); seq_printf(seq, ",subvolid=%llu", BTRFS_I(d_inode(dentry))->root->root_key.objectid); seq_puts(seq, ",subvol="); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5265f54c2931..c68efb15e473 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4770,6 +4770,56 @@ static int btrfs_cmp_device_info(const void *a, const void *b) return 0; } +/* + * sort the devices in descending order by preferred_metadata, + * max_avail, total_avail + */ +static int btrfs_cmp_device_info_metadata(const void *a, const void *b) +{ + const struct btrfs_device_info *di_a = a; + const struct btrfs_device_info *di_b = b; + + /* metadata -> preferred_metadata first */ + if (di_a->preferred_metadata && !di_b->preferred_metadata) + return -1; + if (!di_a->preferred_metadata && di_b->preferred_metadata) + return 1; + if (di_a->max_avail > di_b->max_avail) + return -1; + if (di_a->max_avail < di_b->max_avail) + return 1; + if (di_a->total_avail > di_b->total_avail) + return -1; + if (di_a->total_avail < di_b->total_avail) + return 1; + return 0; +} + +/* + * sort the devices in descending order by !preferred_metadata, + * max_avail, total_avail + */ +static int btrfs_cmp_device_info_data(const void *a, const void *b) +{ + const struct btrfs_device_info *di_a = a; + const struct btrfs_device_info *di_b = b; + + /* data -> preferred_metadata last */ + if (di_a->preferred_metadata && !di_b->preferred_metadata) + return 1; + if (!di_a->preferred_metadata && di_b->preferred_metadata) + return -1; + if (di_a->max_avail > di_b->max_avail) + return -1; + if (di_a->max_avail < di_b->max_avail) + return 1; + if (di_a->total_avail > di_b->total_avail) + return -1; + if (di_a->total_avail < di_b->total_avail) + return 1; + return 0; +} + static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) { if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK)) @@ -4885,6 +4935,7 @@ static int gather_device_info(struct btrfs_fs_devices *fs_devices, int ndevs = 0; u64 max_avail; u64 dev_offset; + int nr_preferred_metadata = 0; /* * in the first pass through the devices list, we gather information @@ -4937,15 +4988,49 @@ static int gather_device_info(struct btrfs_fs_devices *fs_devices, devices_info[ndevs].max_avail = max_avail; devices_info[ndevs].total_avail = total_avail; devices_info[ndevs].dev = device; + devices_info[ndevs].preferred_metadata = !!(device->type & + BTRFS_DEV_PREFERRED_METADATA); + if (devices_info[ndevs].preferred_metadata) + nr_preferred_metadata++; ++ndevs; } ctl->ndevs = ndevs; + BUG_ON(nr_preferred_metadata > ndevs); /* * now sort the devices by hole size / available space */ - sort(devices_info, ndevs, sizeof(struct btrfs_device_info), - btrfs_cmp_device_info, NULL); + if (((ctl->type & BTRFS_BLOCK_GROUP_DATA) && + (ctl->type & BTRFS_BLOCK_GROUP_METADATA)) || + !btrfs_test_opt(info, PREFERRED_METADATA)) { + /* mixed bg or PREFERRED_METADATA not set */ + sort(devices_info, ctl->ndevs, sizeof(struct btrfs_device_info), + btrfs_cmp_device_info, NULL); + } else { + /* + * if PREFERRED_METADATA is set, sort the device considering + * also the kind (preferred_metadata or not). Limit the + * availables devices to the ones of the same kind, to avoid + * that a striped profile, like raid5, spreads to all kind of + * devices. + * It is allowed to use different kinds of devices if the ones + * of the same kind are not enough alone. + */ + if (ctl->type & BTRFS_BLOCK_GROUP_DATA) { + int nr_data = ctl->ndevs - nr_preferred_metadata; + sort(devices_info, ctl->ndevs, + sizeof(struct btrfs_device_info), + btrfs_cmp_device_info_data, NULL); + if (nr_data >= ctl->devs_min) + ctl->ndevs = nr_data; + } else { /* non data -> metadata and system */ + sort(devices_info, ctl->ndevs, + sizeof(struct btrfs_device_info), + btrfs_cmp_device_info_metadata, NULL); + if (nr_preferred_metadata >= ctl->devs_min) + ctl->ndevs = nr_preferred_metadata; + } + } return 0; } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 0ac5bf2b95e6..d39c3b0e7569 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -347,6 +347,7 @@ struct btrfs_device_info { u64 dev_offset; u64 max_avail; u64 total_avail; + int preferred_metadata:1; }; struct btrfs_raid_attr {