diff mbox series

[v3] btrfs: round down stripe size and chunk size to pow of 2

Message ID 20220817145800.36175-1-wangyugui@e16-tech.com (mailing list archive)
State New, archived
Headers show
Series [v3] btrfs: round down stripe size and chunk size to pow of 2 | expand

Commit Message

Wang Yugui Aug. 17, 2022, 2:58 p.m. UTC
In decide_stripe_size_regular(), when new disk is added to RAID0/RAID10/RAID56,
it is better to free-then-reuse the free space if stripe size is kept or
changed to 1/2. so stripe size of pow of 2 will be more friendly. Although
roundup_pow_of_two() match better with orig round_up(), but
rounddown_pow_of_two() is better to make sure <=ctl->max_chunk_size here.

In another rare case that file system is quite small, we calc max chunk size
in pow of 2 too, so that max chunk size / chunk size /stripe size are same or
match easy in some case.

Signed-off-by: Wang Yugui <wangyugui@e16-tech.com>
---
changes since v2:
	restore to rounddown_pow_of_two() from roundup_pow_of_two()
changes since v1:
 - change rounddown_pow_of_two() to roundup_pow_of_two() to match better with
   orig roundup().

 fs/btrfs/volumes.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

Comments

Qu Wenruo Aug. 18, 2022, 5:30 a.m. UTC | #1
On 2022/8/17 22:58, Wang Yugui wrote:
> In decide_stripe_size_regular(), when new disk is added to RAID0/RAID10/RAID56,
> it is better to free-then-reuse the free space if stripe size is kept or
> changed to 1/2. so stripe size of pow of 2 will be more friendly. Although
> roundup_pow_of_two() match better with orig round_up(), but
> rounddown_pow_of_two() is better to make sure <=ctl->max_chunk_size here.

Why insist on round*_pow_of_two()?

I see no reason that a power of 2 sized chunk has any benefit to btrfs.

>
> In another rare case that file system is quite small, we calc max chunk size
> in pow of 2 too, so that max chunk size / chunk size /stripe size are same or
> match easy in some case.
>
> Signed-off-by: Wang Yugui <wangyugui@e16-tech.com>
> ---
> changes since v2:
> 	restore to rounddown_pow_of_two() from roundup_pow_of_two()
> changes since v1:
>   - change rounddown_pow_of_two() to roundup_pow_of_two() to match better with
>     orig roundup().
>
>   fs/btrfs/volumes.c | 20 +++++++++-----------
>   1 file changed, 9 insertions(+), 11 deletions(-)
>
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 6595755..fab9765 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -5083,9 +5083,9 @@ static void init_alloc_chunk_ctl_policy_regular(
>   	if (ctl->type & BTRFS_BLOCK_GROUP_SYSTEM)
>   		ctl->devs_max = min_t(int, ctl->devs_max, BTRFS_MAX_DEVS_SYS_CHUNK);
>
> -	/* We don't want a chunk larger than 10% of writable space */
> -	ctl->max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
> -				  ctl->max_chunk_size);
> +	/* We don't want a chunk larger than 1/8 of writable space */
> +	ctl->max_chunk_size = min_t(u64, ctl->max_chunk_size,
> +			rounddown_pow_of_two(fs_devices->total_rw_bytes >> 3));
>   	ctl->dev_extent_min = BTRFS_STRIPE_LEN * ctl->dev_stripes;
>   }
>
> @@ -5143,10 +5143,9 @@ static void init_alloc_chunk_ctl_policy_zoned(
>   		BUG();
>   	}
>
> -	/* We don't want a chunk larger than 10% of writable space */
> -	limit = max(round_down(div_factor(fs_devices->total_rw_bytes, 1),
> -			       zone_size),
> -		    min_chunk_size);
> +	/* We don't want a chunk larger than 1/8 of writable space */
> +	limit = max_t(u64, min_chunk_size,
> +		rounddown_pow_of_two(fs_devices->total_rw_bytes >> 3));
>   	ctl->max_chunk_size = min(limit, ctl->max_chunk_size);
>   	ctl->dev_extent_min = zone_size * ctl->dev_stripes;
>   }
> @@ -5284,13 +5283,12 @@ static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl,
>   	 */
>   	if (ctl->stripe_size * data_stripes > ctl->max_chunk_size) {
>   		/*
> -		 * Reduce stripe_size, round it up to a 16MB boundary again and
> +		 * Reduce stripe_size, round it down to pow of 2 boundary again and
>   		 * then use it, unless it ends up being even bigger than the
>   		 * previous value we had already.
>   		 */
> -		ctl->stripe_size = min(round_up(div_u64(ctl->max_chunk_size,
> -							data_stripes), SZ_16M),
> -				       ctl->stripe_size);
> +		ctl->stripe_size = min_t(u64, ctl->stripe_size,
> +			rounddown_pow_of_two(div_u64(ctl->max_chunk_size, data_stripes)));
>   	}
>
>   	/* Align to BTRFS_STRIPE_LEN */
Wang Yugui Aug. 18, 2022, 5:53 a.m. UTC | #2
Hi,

> On 2022/8/17 22:58, Wang Yugui wrote:
> > In decide_stripe_size_regular(), when new disk is added to RAID0/RAID10/RAID56,
> > it is better to free-then-reuse the free space if stripe size is kept or
> > changed to 1/2. so stripe size of pow of 2 will be more friendly. Although
> > roundup_pow_of_two() match better with orig round_up(), but
> > rounddown_pow_of_two() is better to make sure <=ctl->max_chunk_size here.
> 
> Why insist on round*_pow_of_two()?
> 
> I see no reason that a power of 2 sized chunk has any benefit to btrfs.

For stripe size, in some case, 
decide_stripe_size_regular()
    if (ctl->stripe_size * data_stripes > ctl->max_chunk_size) {
        /*
         * Reduce stripe_size, round it up to a 16MB boundary again and
         * then use it, unless it ends up being even bigger than the
         * previous value we had already.
         */
        ctl->stripe_size = min(round_up(div_u64(ctl->max_chunk_size,
                            data_stripes), SZ_16M),
                       ctl->stripe_size);
    }

For example, RAID0/3 disk/max_chunk_size=1G,
then stripe_size = about 1/3G

If another disk is added, RAID0/4 disk/max_chunk_size=1G,
then stripe_size = 1/4G

the mix of 1/3G and 1/4G stripe_size is difficult to manage alloc/free
the space than the mix of 1/2G and 1/4G .

For chunk size, it is more complex because of raid profile.
decide_stripe_size_regular()
    ctl->chunk_size = ctl->stripe_size * data_stripes;
'
for some raid proflie  such as single/RAID1,
because stripe size is already set to a power of 2,
if we set max_chunk_size to  a power of 2, then max_chunk_size will have
a value same to chunk size/ stripe size in some case. it will be more
easy to understand.

Best Regards
Wang Yugui (wangyugui@e16-tech.com)
2022/08/18


> >
> > In another rare case that file system is quite small, we calc max chunk size
> > in pow of 2 too, so that max chunk size / chunk size /stripe size are same or
> > match easy in some case.
> >
> > Signed-off-by: Wang Yugui <wangyugui@e16-tech.com>
> > ---
> > changes since v2:
> > 	restore to rounddown_pow_of_two() from roundup_pow_of_two()
> > changes since v1:
> >   - change rounddown_pow_of_two() to roundup_pow_of_two() to match better with
> >     orig roundup().
> >
> >   fs/btrfs/volumes.c | 20 +++++++++-----------
> >   1 file changed, 9 insertions(+), 11 deletions(-)
> >
> > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> > index 6595755..fab9765 100644
> > --- a/fs/btrfs/volumes.c
> > +++ b/fs/btrfs/volumes.c
> > @@ -5083,9 +5083,9 @@ static void init_alloc_chunk_ctl_policy_regular(
> >   	if (ctl->type & BTRFS_BLOCK_GROUP_SYSTEM)
> >   		ctl->devs_max = min_t(int, ctl->devs_max, BTRFS_MAX_DEVS_SYS_CHUNK);
> >
> > -	/* We don't want a chunk larger than 10% of writable space */
> > -	ctl->max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
> > -				  ctl->max_chunk_size);
> > +	/* We don't want a chunk larger than 1/8 of writable space */
> > +	ctl->max_chunk_size = min_t(u64, ctl->max_chunk_size,
> > +			rounddown_pow_of_two(fs_devices->total_rw_bytes >> 3));
> >   	ctl->dev_extent_min = BTRFS_STRIPE_LEN * ctl->dev_stripes;
> >   }
> >
> > @@ -5143,10 +5143,9 @@ static void init_alloc_chunk_ctl_policy_zoned(
> >   		BUG();
> >   	}
> >
> > -	/* We don't want a chunk larger than 10% of writable space */
> > -	limit = max(round_down(div_factor(fs_devices->total_rw_bytes, 1),
> > -			       zone_size),
> > -		    min_chunk_size);
> > +	/* We don't want a chunk larger than 1/8 of writable space */
> > +	limit = max_t(u64, min_chunk_size,
> > +		rounddown_pow_of_two(fs_devices->total_rw_bytes >> 3));
> >   	ctl->max_chunk_size = min(limit, ctl->max_chunk_size);
> >   	ctl->dev_extent_min = zone_size * ctl->dev_stripes;
> >   }
> > @@ -5284,13 +5283,12 @@ static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl,
> >   	 */
> >   	if (ctl->stripe_size * data_stripes > ctl->max_chunk_size) {
> >   		/*
> > -		 * Reduce stripe_size, round it up to a 16MB boundary again and
> > +		 * Reduce stripe_size, round it down to pow of 2 boundary again and
> >   		 * then use it, unless it ends up being even bigger than the
> >   		 * previous value we had already.
> >   		 */
> > -		ctl->stripe_size = min(round_up(div_u64(ctl->max_chunk_size,
> > -							data_stripes), SZ_16M),
> > -				       ctl->stripe_size);
> > +		ctl->stripe_size = min_t(u64, ctl->stripe_size,
> > +			rounddown_pow_of_two(div_u64(ctl->max_chunk_size, data_stripes)));
> >   	}
> >
> >   	/* Align to BTRFS_STRIPE_LEN */
Qu Wenruo Aug. 18, 2022, 6:25 a.m. UTC | #3
On 2022/8/18 13:53, Wang Yugui wrote:
> Hi,
>
>> On 2022/8/17 22:58, Wang Yugui wrote:
>>> In decide_stripe_size_regular(), when new disk is added to RAID0/RAID10/RAID56,
>>> it is better to free-then-reuse the free space if stripe size is kept or
>>> changed to 1/2. so stripe size of pow of 2 will be more friendly. Although
>>> roundup_pow_of_two() match better with orig round_up(), but
>>> rounddown_pow_of_two() is better to make sure <=ctl->max_chunk_size here.
>>
>> Why insist on round*_pow_of_two()?
>>
>> I see no reason that a power of 2 sized chunk has any benefit to btrfs.
>
> For stripe size, in some case,
> decide_stripe_size_regular()
>      if (ctl->stripe_size * data_stripes > ctl->max_chunk_size) {
>          /*
>           * Reduce stripe_size, round it up to a 16MB boundary again and
>           * then use it, unless it ends up being even bigger than the
>           * previous value we had already.
>           */
>          ctl->stripe_size = min(round_up(div_u64(ctl->max_chunk_size,
>                              data_stripes), SZ_16M),
>                         ctl->stripe_size);
>      }
>
> For example, RAID0/3 disk/max_chunk_size=1G,
> then stripe_size = about 1/3G

It's not correct already.

Firstly, @data_stripes for 3 disks RAID0 we should not have
max_chunk_size as 1G.

This is already a behavior change and should be investigated first.

>
> If another disk is added, RAID0/4 disk/max_chunk_size=1G,
> then stripe_size = 1/4G

Then your assumption on reduced stripe_size is already based on the
incorrect max chunk size behavior.

You're fixing a problem with wrong root cause.
>
> the mix of 1/3G and 1/4G stripe_size is difficult to manage alloc/free
> the space than the mix of 1/2G and 1/4G .
>
> For chunk size, it is more complex because of raid profile.
> decide_stripe_size_regular()
>      ctl->chunk_size = ctl->stripe_size * data_stripes;
> '
> for some raid proflie  such as single/RAID1,
> because stripe size is already set to a power of 2,
> if we set max_chunk_size to  a power of 2, then max_chunk_size will have
> a value same to chunk size/ stripe size in some case. it will be more
> easy to understand.
>
> Best Regards
> Wang Yugui (wangyugui@e16-tech.com)
> 2022/08/18
>
>
>>>
>>> In another rare case that file system is quite small, we calc max chunk size
>>> in pow of 2 too, so that max chunk size / chunk size /stripe size are same or
>>> match easy in some case.
>>>
>>> Signed-off-by: Wang Yugui <wangyugui@e16-tech.com>
>>> ---
>>> changes since v2:
>>> 	restore to rounddown_pow_of_two() from roundup_pow_of_two()
>>> changes since v1:
>>>    - change rounddown_pow_of_two() to roundup_pow_of_two() to match better with
>>>      orig roundup().
>>>
>>>    fs/btrfs/volumes.c | 20 +++++++++-----------
>>>    1 file changed, 9 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
>>> index 6595755..fab9765 100644
>>> --- a/fs/btrfs/volumes.c
>>> +++ b/fs/btrfs/volumes.c
>>> @@ -5083,9 +5083,9 @@ static void init_alloc_chunk_ctl_policy_regular(
>>>    	if (ctl->type & BTRFS_BLOCK_GROUP_SYSTEM)
>>>    		ctl->devs_max = min_t(int, ctl->devs_max, BTRFS_MAX_DEVS_SYS_CHUNK);
>>>
>>> -	/* We don't want a chunk larger than 10% of writable space */
>>> -	ctl->max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
>>> -				  ctl->max_chunk_size);
>>> +	/* We don't want a chunk larger than 1/8 of writable space */
>>> +	ctl->max_chunk_size = min_t(u64, ctl->max_chunk_size,
>>> +			rounddown_pow_of_two(fs_devices->total_rw_bytes >> 3));
>>>    	ctl->dev_extent_min = BTRFS_STRIPE_LEN * ctl->dev_stripes;
>>>    }
>>>
>>> @@ -5143,10 +5143,9 @@ static void init_alloc_chunk_ctl_policy_zoned(
>>>    		BUG();
>>>    	}
>>>
>>> -	/* We don't want a chunk larger than 10% of writable space */
>>> -	limit = max(round_down(div_factor(fs_devices->total_rw_bytes, 1),
>>> -			       zone_size),
>>> -		    min_chunk_size);
>>> +	/* We don't want a chunk larger than 1/8 of writable space */
>>> +	limit = max_t(u64, min_chunk_size,
>>> +		rounddown_pow_of_two(fs_devices->total_rw_bytes >> 3));
>>>    	ctl->max_chunk_size = min(limit, ctl->max_chunk_size);
>>>    	ctl->dev_extent_min = zone_size * ctl->dev_stripes;
>>>    }
>>> @@ -5284,13 +5283,12 @@ static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl,
>>>    	 */
>>>    	if (ctl->stripe_size * data_stripes > ctl->max_chunk_size) {
>>>    		/*
>>> -		 * Reduce stripe_size, round it up to a 16MB boundary again and
>>> +		 * Reduce stripe_size, round it down to pow of 2 boundary again and
>>>    		 * then use it, unless it ends up being even bigger than the
>>>    		 * previous value we had already.
>>>    		 */
>>> -		ctl->stripe_size = min(round_up(div_u64(ctl->max_chunk_size,
>>> -							data_stripes), SZ_16M),
>>> -				       ctl->stripe_size);
>>> +		ctl->stripe_size = min_t(u64, ctl->stripe_size,
>>> +			rounddown_pow_of_two(div_u64(ctl->max_chunk_size, data_stripes)));
>>>    	}
>>>
>>>    	/* Align to BTRFS_STRIPE_LEN */
>
>
diff mbox series

Patch

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6595755..fab9765 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5083,9 +5083,9 @@  static void init_alloc_chunk_ctl_policy_regular(
 	if (ctl->type & BTRFS_BLOCK_GROUP_SYSTEM)
 		ctl->devs_max = min_t(int, ctl->devs_max, BTRFS_MAX_DEVS_SYS_CHUNK);
 
-	/* We don't want a chunk larger than 10% of writable space */
-	ctl->max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
-				  ctl->max_chunk_size);
+	/* We don't want a chunk larger than 1/8 of writable space */
+	ctl->max_chunk_size = min_t(u64, ctl->max_chunk_size,
+			rounddown_pow_of_two(fs_devices->total_rw_bytes >> 3));
 	ctl->dev_extent_min = BTRFS_STRIPE_LEN * ctl->dev_stripes;
 }
 
@@ -5143,10 +5143,9 @@  static void init_alloc_chunk_ctl_policy_zoned(
 		BUG();
 	}
 
-	/* We don't want a chunk larger than 10% of writable space */
-	limit = max(round_down(div_factor(fs_devices->total_rw_bytes, 1),
-			       zone_size),
-		    min_chunk_size);
+	/* We don't want a chunk larger than 1/8 of writable space */
+	limit = max_t(u64, min_chunk_size,
+		rounddown_pow_of_two(fs_devices->total_rw_bytes >> 3));
 	ctl->max_chunk_size = min(limit, ctl->max_chunk_size);
 	ctl->dev_extent_min = zone_size * ctl->dev_stripes;
 }
@@ -5284,13 +5283,12 @@  static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl,
 	 */
 	if (ctl->stripe_size * data_stripes > ctl->max_chunk_size) {
 		/*
-		 * Reduce stripe_size, round it up to a 16MB boundary again and
+		 * Reduce stripe_size, round it down to pow of 2 boundary again and
 		 * then use it, unless it ends up being even bigger than the
 		 * previous value we had already.
 		 */
-		ctl->stripe_size = min(round_up(div_u64(ctl->max_chunk_size,
-							data_stripes), SZ_16M),
-				       ctl->stripe_size);
+		ctl->stripe_size = min_t(u64, ctl->stripe_size,
+			rounddown_pow_of_two(div_u64(ctl->max_chunk_size, data_stripes)));
 	}
 
 	/* Align to BTRFS_STRIPE_LEN */