Message ID | 35832c0abf72cfd1e118d484f6a9ebdd689f8757.1445013000.git.dsterba@suse.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
On Fri, Oct 16, 2015 at 06:50:08PM +0200, David Sterba wrote: > The 'limit' filter is underdesigned, it should have been a range for > [min,max], with some relaxed semantics when one of the bounds is > missing. Besides that, using a full u64 for a single value is a waste of > bytes. What is min for? If we have more than 'max' matching chunks, we'd process 'max' and stop. If we have fewer than 'min' chunks, we'd process what we have and run out. If we have more than 'min' chunks but fewer than 'max' chunks, why would we stop before we reach 'max' chunks? > Let's fix both by extending the use of the u64 bytes for the [min,max] > range. This can be done in a backward compatible way, the range will be > interpreted only if the appropriate flag is set > (BTRFS_BALANCE_ARGS_LIMITS). > > Signed-off-by: David Sterba <dsterba@suse.com> > --- > fs/btrfs/ctree.h | 14 ++++++++++++-- > fs/btrfs/volumes.c | 42 ++++++++++++++++++++++++++++++++++++++++++ > fs/btrfs/volumes.h | 1 + > include/uapi/linux/btrfs.h | 13 ++++++++++++- > 4 files changed, 67 insertions(+), 3 deletions(-) > > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h > index 938efe33be80..7d2e1b6d0ac1 100644 > --- a/fs/btrfs/ctree.h > +++ b/fs/btrfs/ctree.h > @@ -846,8 +846,18 @@ struct btrfs_disk_balance_args { > /* BTRFS_BALANCE_ARGS_* */ > __le64 flags; > > - /* BTRFS_BALANCE_ARGS_LIMIT value */ > - __le64 limit; > + /* > + * BTRFS_BALANCE_ARGS_LIMIT with value 'limit' > + * BTRFS_BALANCE_ARGS_LIMITS - the extend version can use minimum and > + * maximum > + */ > + union { > + __le64 limit; > + struct { > + __le32 limit_min; > + __le32 limit_max; > + }; > + }; > > __le64 unused[7]; > } __attribute__ ((__packed__)); > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c > index 6fc735869c18..0fbe6855ea05 100644 > --- a/fs/btrfs/volumes.c > +++ b/fs/btrfs/volumes.c > @@ -3250,6 +3250,16 @@ static int should_balance_chunk(struct btrfs_root *root, > return 0; > else > bargs->limit--; > + } else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMITS)) { > + /* > + * Same logic as the 'limit' filter; the minimum cannot be > + * determined here because we do not have the global informatoin > + * about the count of all chunks that satisfy the filters. > + */ > + if (bargs->limit_max == 0) > + return 0; > + else > + bargs->limit_max--; > } > > return 1; > @@ -3264,6 +3274,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) > struct btrfs_device *device; > u64 old_size; > u64 size_to_free; > + u64 chunk_type; > struct btrfs_chunk *chunk; > struct btrfs_path *path; > struct btrfs_key key; > @@ -3274,9 +3285,13 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) > int ret; > int enospc_errors = 0; > bool counting = true; > + /* The single value limit and min/max limits use the same bytes in the */ > u64 limit_data = bctl->data.limit; > u64 limit_meta = bctl->meta.limit; > u64 limit_sys = bctl->sys.limit; > + u32 count_data = 0; > + u32 count_meta = 0; > + u32 count_sys = 0; > > /* step one make some room on all the devices */ > devices = &fs_info->fs_devices->devices; > @@ -3317,6 +3332,10 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) > spin_unlock(&fs_info->balance_lock); > again: > if (!counting) { > + /* > + * The single value limit and min/max limits use the same bytes > + * in the > + */ > bctl->data.limit = limit_data; > bctl->meta.limit = limit_meta; > bctl->sys.limit = limit_sys; > @@ -3364,6 +3383,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) > } > > chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); > + chunk_type = btrfs_chunk_type(leaf, chunk); > > if (!counting) { > spin_lock(&fs_info->balance_lock); > @@ -3384,6 +3404,28 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) > spin_lock(&fs_info->balance_lock); > bctl->stat.expected++; > spin_unlock(&fs_info->balance_lock); > + > + if (chunk_type & BTRFS_BLOCK_GROUP_DATA) > + count_data++; > + else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) > + count_sys++; > + else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA) > + count_meta++; > + > + goto loop; > + } > + > + /* > + * Apply limit_min filter, no need to check if the LIMITS > + * filter is used, limit_min is 0 by default > + */ > + if (((chunk_type & BTRFS_BLOCK_GROUP_DATA) && > + count_data < bctl->data.limit_min) > + || ((chunk_type & BTRFS_BLOCK_GROUP_METADATA) && > + count_meta < bctl->meta.limit_min) > + || ((chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) && > + count_sys < bctl->sys.limit_min)) { > + mutex_unlock(&fs_info->delete_unused_bgs_mutex); > goto loop; > } > > diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h > index 2ca784a14e84..1c9d8edd7d57 100644 > --- a/fs/btrfs/volumes.h > +++ b/fs/btrfs/volumes.h > @@ -375,6 +375,7 @@ struct map_lookup { > #define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3) > #define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4) > #define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5) > +#define BTRFS_BALANCE_ARGS_LIMITS (1ULL << 6) > > /* > * Profile changing flags. When SOFT is set we won't relocate chunk if > diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h > index b6dec05c7196..264ecea5ecfc 100644 > --- a/include/uapi/linux/btrfs.h > +++ b/include/uapi/linux/btrfs.h > @@ -217,7 +217,18 @@ struct btrfs_balance_args { > > __u64 flags; > > - __u64 limit; /* limit number of processed chunks */ > + /* > + * BTRFS_BALANCE_ARGS_LIMIT with value 'limit' > + * BTRFS_BALANCE_ARGS_LIMITS - the extend version can use minimum and > + * maximum > + */ > + union { > + __u64 limit; /* limit number of processed chunks */ > + struct { > + __u32 limit_min; > + __u32 limit_max; > + }; > + }; > __u64 unused[7]; > } __attribute__ ((__packed__)); > > -- > 2.6.1 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Oct 16, 2015 at 07:08:37PM -0400, Zygo Blaxell wrote: > On Fri, Oct 16, 2015 at 06:50:08PM +0200, David Sterba wrote: > > The 'limit' filter is underdesigned, it should have been a range for > > [min,max], with some relaxed semantics when one of the bounds is > > missing. Besides that, using a full u64 for a single value is a waste of > > bytes. > > What is min for? > > If we have more than 'max' matching chunks, we'd process 'max' and stop. Right. > If we have fewer than 'min' chunks, we'd process what we have and run out. If we have fewer than min, we'll do nothing. > If we have more than 'min' chunks but fewer than 'max' chunks, why would > we stop before we reach 'max' chunks? I must be missing something here. If there are less than 'max' chunks (with all filters applied), how are we supposed to reach 'max'? The 'limit' filter is applied after all the other filters. It can be used standalone, or with eg. usage. The usecase where I find it useful: * we want to make metadata chunks more compact * let's set usage=30 * avoid unnecessary work (which is a bigger performance hit in case of metadata chunks), ie. if there's a single chunk with usage < 30%, let's skip it The command: $ btrfs balance start -musage=30,limit=2.. /path So in case there's only one such chunk, balance would just move it without any gain. Avoiding the unnecessary work here is IMO a win, though it might seem limited. But I'm going to utilize this in the btrfsmaintenance package that among others does periodic balancing on the background, I believe the effects will be noticeable. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Oct 19, 2015 at 11:14:16AM +0200, David Sterba wrote: > On Fri, Oct 16, 2015 at 07:08:37PM -0400, Zygo Blaxell wrote: > > On Fri, Oct 16, 2015 at 06:50:08PM +0200, David Sterba wrote: > > > The 'limit' filter is underdesigned, it should have been a range for > > > [min,max], with some relaxed semantics when one of the bounds is > > > missing. Besides that, using a full u64 for a single value is a waste of > > > bytes. > > > > What is min for? > > > > If we have more than 'max' matching chunks, we'd process 'max' and stop. > > Right. > > > If we have fewer than 'min' chunks, we'd process what we have and run out. > > If we have fewer than min, we'll do nothing. Aha. My mental model of balance was missing the step where chunks are enumerated and filtered before any real work starts. > > If we have more than 'min' chunks but fewer than 'max' chunks, why would > > we stop before we reach 'max' chunks? > > I must be missing something here. If there are less than 'max' chunks > (with all filters applied), how are we supposed to reach 'max'? > > The 'limit' filter is applied after all the other filters. It can be > used standalone, or with eg. usage. The usecase where I find it useful: > > * we want to make metadata chunks more compact > * let's set usage=30 > * avoid unnecessary work (which is a bigger performance hit in case > of metadata chunks), ie. if there's a single chunk with usage < 30%, > let's skip it > > The command: > > $ btrfs balance start -musage=30,limit=2.. /path > > So in case there's only one such chunk, balance would just move it > without any gain. ...and if there's two chunks with <50% space used, then we can guarantee that the data from one fits in the unused space of the other. With a bit of math we can figure out what limit-min has to be for any particular value of usage to guarantee one chunk is always deallocated. That makes sense. > Avoiding the unnecessary work here is IMO a win, > though it might seem limited. But I'm going to utilize this in the > btrfsmaintenance package that among others does periodic balancing > on the background, I believe the effects will be noticeable. ...and this use case did not occur to me because I try to make sure I never do anything remotely similar to this, even accidentally. I've learned the hard way not to balance metadata except when reshaping a filesystem array. Apart from being egregiously slow and hard to interrupt, balancing metadata deallocates space that was previously allocated to metadata. If you keep doing it on a nearly-full filesystem, the risk that you'll run out of metadata space in the future increases to certainty. The penalty for running out of metadata space is currently much, much worse than the cost of unused metadata space could ever be. :-/ "-dusage=49,limit=2.." is something I might use, though, especially for small or mostly static filesystems where there might be a shortage of work for balance (a large filesystem with a moderate rate of modification doesn't need a min-limit). > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 938efe33be80..7d2e1b6d0ac1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -846,8 +846,18 @@ struct btrfs_disk_balance_args { /* BTRFS_BALANCE_ARGS_* */ __le64 flags; - /* BTRFS_BALANCE_ARGS_LIMIT value */ - __le64 limit; + /* + * BTRFS_BALANCE_ARGS_LIMIT with value 'limit' + * BTRFS_BALANCE_ARGS_LIMITS - the extend version can use minimum and + * maximum + */ + union { + __le64 limit; + struct { + __le32 limit_min; + __le32 limit_max; + }; + }; __le64 unused[7]; } __attribute__ ((__packed__)); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6fc735869c18..0fbe6855ea05 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3250,6 +3250,16 @@ static int should_balance_chunk(struct btrfs_root *root, return 0; else bargs->limit--; + } else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMITS)) { + /* + * Same logic as the 'limit' filter; the minimum cannot be + * determined here because we do not have the global informatoin + * about the count of all chunks that satisfy the filters. + */ + if (bargs->limit_max == 0) + return 0; + else + bargs->limit_max--; } return 1; @@ -3264,6 +3274,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) struct btrfs_device *device; u64 old_size; u64 size_to_free; + u64 chunk_type; struct btrfs_chunk *chunk; struct btrfs_path *path; struct btrfs_key key; @@ -3274,9 +3285,13 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) int ret; int enospc_errors = 0; bool counting = true; + /* The single value limit and min/max limits use the same bytes in the */ u64 limit_data = bctl->data.limit; u64 limit_meta = bctl->meta.limit; u64 limit_sys = bctl->sys.limit; + u32 count_data = 0; + u32 count_meta = 0; + u32 count_sys = 0; /* step one make some room on all the devices */ devices = &fs_info->fs_devices->devices; @@ -3317,6 +3332,10 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) spin_unlock(&fs_info->balance_lock); again: if (!counting) { + /* + * The single value limit and min/max limits use the same bytes + * in the + */ bctl->data.limit = limit_data; bctl->meta.limit = limit_meta; bctl->sys.limit = limit_sys; @@ -3364,6 +3383,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) } chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); + chunk_type = btrfs_chunk_type(leaf, chunk); if (!counting) { spin_lock(&fs_info->balance_lock); @@ -3384,6 +3404,28 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) spin_lock(&fs_info->balance_lock); bctl->stat.expected++; spin_unlock(&fs_info->balance_lock); + + if (chunk_type & BTRFS_BLOCK_GROUP_DATA) + count_data++; + else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) + count_sys++; + else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA) + count_meta++; + + goto loop; + } + + /* + * Apply limit_min filter, no need to check if the LIMITS + * filter is used, limit_min is 0 by default + */ + if (((chunk_type & BTRFS_BLOCK_GROUP_DATA) && + count_data < bctl->data.limit_min) + || ((chunk_type & BTRFS_BLOCK_GROUP_METADATA) && + count_meta < bctl->meta.limit_min) + || ((chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) && + count_sys < bctl->sys.limit_min)) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); goto loop; } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 2ca784a14e84..1c9d8edd7d57 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -375,6 +375,7 @@ struct map_lookup { #define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3) #define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4) #define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5) +#define BTRFS_BALANCE_ARGS_LIMITS (1ULL << 6) /* * Profile changing flags. When SOFT is set we won't relocate chunk if diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index b6dec05c7196..264ecea5ecfc 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -217,7 +217,18 @@ struct btrfs_balance_args { __u64 flags; - __u64 limit; /* limit number of processed chunks */ + /* + * BTRFS_BALANCE_ARGS_LIMIT with value 'limit' + * BTRFS_BALANCE_ARGS_LIMITS - the extend version can use minimum and + * maximum + */ + union { + __u64 limit; /* limit number of processed chunks */ + struct { + __u32 limit_min; + __u32 limit_max; + }; + }; __u64 unused[7]; } __attribute__ ((__packed__));
The 'limit' filter is underdesigned, it should have been a range for [min,max], with some relaxed semantics when one of the bounds is missing. Besides that, using a full u64 for a single value is a waste of bytes. Let's fix both by extending the use of the u64 bytes for the [min,max] range. This can be done in a backward compatible way, the range will be interpreted only if the appropriate flag is set (BTRFS_BALANCE_ARGS_LIMITS). Signed-off-by: David Sterba <dsterba@suse.com> --- fs/btrfs/ctree.h | 14 ++++++++++++-- fs/btrfs/volumes.c | 42 ++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/volumes.h | 1 + include/uapi/linux/btrfs.h | 13 ++++++++++++- 4 files changed, 67 insertions(+), 3 deletions(-)