diff mbox series

[v2,1/3] btrfs: introduce BTRFS_EXCLOP_BALANCE_PAUSED exclusive state

Message ID 20211108142820.1003187-2-nborisov@suse.com (mailing list archive)
State New, archived
Headers show
Series x Balance vs device add fixes | expand

Commit Message

Nikolay Borisov Nov. 8, 2021, 2:28 p.m. UTC
Current set of exclusive operation states is not sufficient to handle all
practical use cases. In particular there is a need to be able to add a
device to a filesystem that have paused balance. Currently there is no
way to distinguish between a running and a paused balance. Fix this by
introducing BTRFS_EXCLOP_BALANCE_PAUSED which is going to be set in 2
occasions:

1. When a filesystem is mounted with skip_balance and there is an
   unfinished balance it will now be into BALANCE_PAUSED instead of
   simply BALANCE state.

2. When a running balance is paused.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
---
 fs/btrfs/ctree.h   |  3 +++
 fs/btrfs/ioctl.c   | 13 +++++++++++++
 fs/btrfs/volumes.c | 11 +++++++++--
 3 files changed, 25 insertions(+), 2 deletions(-)

Comments

Josef Bacik Nov. 8, 2021, 2:57 p.m. UTC | #1
On Mon, Nov 08, 2021 at 04:28:18PM +0200, Nikolay Borisov wrote:
> Current set of exclusive operation states is not sufficient to handle all
> practical use cases. In particular there is a need to be able to add a
> device to a filesystem that have paused balance. Currently there is no
> way to distinguish between a running and a paused balance. Fix this by
> introducing BTRFS_EXCLOP_BALANCE_PAUSED which is going to be set in 2
> occasions:
> 
> 1. When a filesystem is mounted with skip_balance and there is an
>    unfinished balance it will now be into BALANCE_PAUSED instead of
>    simply BALANCE state.
> 
> 2. When a running balance is paused.
> 
> Signed-off-by: Nikolay Borisov <nborisov@suse.com>
> ---
>  fs/btrfs/ctree.h   |  3 +++
>  fs/btrfs/ioctl.c   | 13 +++++++++++++
>  fs/btrfs/volumes.c | 11 +++++++++--
>  3 files changed, 25 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 7553e9dc5f93..8376271bfed1 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -613,6 +613,7 @@ enum {
>   */
>  enum btrfs_exclusive_operation {
>  	BTRFS_EXCLOP_NONE,
> +	BTRFS_EXCLOP_BALANCE_PAUSED,
>  	BTRFS_EXCLOP_BALANCE,
>  	BTRFS_EXCLOP_DEV_ADD,
>  	BTRFS_EXCLOP_DEV_REMOVE,
> @@ -3305,6 +3306,8 @@ bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
>  				 enum btrfs_exclusive_operation type);
>  void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info);
>  void btrfs_exclop_finish(struct btrfs_fs_info *fs_info);
> +void btrfs_exclop_pause_balance(struct btrfs_fs_info *fs_info);
> +
>  
>  /* file.c */
>  int __init btrfs_auto_defrag_init(void);
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 92424a22d8d6..f4c05a9aab84 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -414,6 +414,15 @@ void btrfs_exclop_finish(struct btrfs_fs_info *fs_info)
>  	sysfs_notify(&fs_info->fs_devices->fsid_kobj, NULL, "exclusive_operation");
>  }
>  
> +void btrfs_exclop_pause_balance(struct btrfs_fs_info *fs_info)
> +{
> +	spin_lock(&fs_info->super_lock);
> +	ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE ||
> +	       fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD);
> +	fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED;
> +	spin_unlock(&fs_info->super_lock);
> +}
> +
>  static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
>  {
>  	struct inode *inode = file_inode(file);
> @@ -4020,6 +4029,10 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
>  			if (fs_info->balance_ctl &&
>  			    !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
>  				/* this is (3) */
> +				spin_lock(&fs_info->super_lock);
> +				ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
> +				fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE;
> +				spin_unlock(&fs_info->super_lock);
>  				need_unlock = false;
>  				goto locked;
>  			}
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index cc80f2a97a0b..e87f9ac440c2 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -4355,8 +4355,10 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
>  	ret = __btrfs_balance(fs_info);
>  
>  	mutex_lock(&fs_info->balance_mutex);
> -	if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req))
> +	if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req)) {
>  		btrfs_info(fs_info, "balance: paused");
> +		btrfs_exclop_pause_balance(fs_info);
> +	}
>  	/*
>  	 * Balance can be canceled by:
>  	 *
> @@ -4406,6 +4408,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
>  static int balance_kthread(void *data)
>  {
>  	struct btrfs_fs_info *fs_info = data;
> +	bool paused = false;

Unused variable.  Thanks,

Josef
Nikolay Borisov Nov. 8, 2021, 2:58 p.m. UTC | #2
On 8.11.21 г. 16:57, Josef Bacik wrote:
> +	bool paused = false;


Argh, I thought I removed it .... David unless there are more changes
requested can you remove it during merge or I can resend as well.
Anand Jain Nov. 9, 2021, 11:35 a.m. UTC | #3
On 8/11/21 10:28 pm, Nikolay Borisov wrote:
> Current set of exclusive operation states is not sufficient to handle all
> practical use cases. In particular there is a need to be able to add a
> device to a filesystem that have paused balance. Currently there is no
> way to distinguish between a running and a paused balance. Fix this by
> introducing BTRFS_EXCLOP_BALANCE_PAUSED which is going to be set in 2
> occasions:
> 
> 1. When a filesystem is mounted with skip_balance and there is an
>     unfinished balance it will now be into BALANCE_PAUSED instead of
>     simply BALANCE state.
> 
> 2. When a running balance is paused.
> 
> Signed-off-by: Nikolay Borisov <nborisov@suse.com>
> ---
>   fs/btrfs/ctree.h   |  3 +++
>   fs/btrfs/ioctl.c   | 13 +++++++++++++
>   fs/btrfs/volumes.c | 11 +++++++++--
>   3 files changed, 25 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 7553e9dc5f93..8376271bfed1 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -613,6 +613,7 @@ enum {
>    */
>   enum btrfs_exclusive_operation {
>   	BTRFS_EXCLOP_NONE,
> +	BTRFS_EXCLOP_BALANCE_PAUSED,
>   	BTRFS_EXCLOP_BALANCE,
>   	BTRFS_EXCLOP_DEV_ADD,
>   	BTRFS_EXCLOP_DEV_REMOVE,
> @@ -3305,6 +3306,8 @@ bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
>   				 enum btrfs_exclusive_operation type);
>   void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info);
>   void btrfs_exclop_finish(struct btrfs_fs_info *fs_info);
> +void btrfs_exclop_pause_balance(struct btrfs_fs_info *fs_info);
> +
>   
>   /* file.c */
>   int __init btrfs_auto_defrag_init(void);
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 92424a22d8d6..f4c05a9aab84 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -414,6 +414,15 @@ void btrfs_exclop_finish(struct btrfs_fs_info *fs_info)
>   	sysfs_notify(&fs_info->fs_devices->fsid_kobj, NULL, "exclusive_operation");
>   }
> 



> +void btrfs_exclop_pause_balance(struct btrfs_fs_info *fs_info)
> +{
> +	spin_lock(&fs_info->super_lock);
> +	ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE ||
> +	       fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD);
> +	fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED;
> +	spin_unlock(&fs_info->super_lock);
> +}
> +

This function can be more generic and replace its open coded version
in a few places.

  btrfs_exclop_balance(fs_info, exclop)
  {
::
	switch(exclop)
	{
		case BTRFS_EXCLOP_BALANCE_PAUSED:
	  		ASSERT(fs_info->exclusive_operation ==
				BTRFS_EXCLOP_BALANCE ||
                  		fs_info->exclusive_operation ==
				BTRFS_EXCLOP_DEV_ADD);
			break;
		case BTRFS_EXCLOP_BALANCE:
			ASSERT(fs_info->exclusive_operation ==
				BTRFS_EXCLOP_BALANCE_PAUSED);
			break;
	}
::
  }


>   static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
>   {
>   	struct inode *inode = file_inode(file);
> @@ -4020,6 +4029,10 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
>   			if (fs_info->balance_ctl &&
>   			    !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {


>   				/* this is (3) */
> +				spin_lock(&fs_info->super_lock);
> +				ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
> +				fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE;

Here you set the status to BALANCE running. Why do we do it so early
without even checking if the user cmd is a resume? Like a few lines
below?

     4064 if (bargs->flags & BTRFS_BALANCE_RESUME) {

I guess it is because of the legacy balance ioctl.

     4927 case BTRFS_IOC_BALANCE:
     4928 return btrfs_ioctl_balance(file, NULL);

Could you confirm?

-Anand

> +				spin_unlock(&fs_info->super_lock);
>   				need_unlock = false;
>   				goto locked;
>   			}
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index cc80f2a97a0b..e87f9ac440c2 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -4355,8 +4355,10 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
>   	ret = __btrfs_balance(fs_info);
>   
>   	mutex_lock(&fs_info->balance_mutex);
> -	if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req))
> +	if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req)) {
>   		btrfs_info(fs_info, "balance: paused");
> +		btrfs_exclop_pause_balance(fs_info);
> +	}
>   	/*
>   	 * Balance can be canceled by:
>   	 *
> @@ -4406,6 +4408,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
>   static int balance_kthread(void *data)
>   {
>   	struct btrfs_fs_info *fs_info = data;
> +	bool paused = false;
>   	int ret = 0;
>   
>   	mutex_lock(&fs_info->balance_mutex);
> @@ -4432,6 +4435,10 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
>   		return 0;
>   	}
>   
> +	spin_lock(&fs_info->super_lock);
> +	ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
> +	fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE;
> +	spin_unlock(&fs_info->super_lock);
>   	/*
>   	 * A ro->rw remount sequence should continue with the paused balance
>   	 * regardless of who pauses it, system or the user as of now, so set
> @@ -4500,7 +4507,7 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
>   	 * is in a paused state and must have fs_info::balance_ctl properly
>   	 * set up.
>   	 */
> -	if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
> +	if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE_PAUSED))
>   		btrfs_warn(fs_info,
>   	"balance: cannot set exclusive op status, resume manually");
>   
>
Nikolay Borisov Nov. 9, 2021, 3:33 p.m. UTC | #4
<snip>

> 
>> +void btrfs_exclop_pause_balance(struct btrfs_fs_info *fs_info)
>> +{
>> +    spin_lock(&fs_info->super_lock);
>> +    ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE ||
>> +           fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD);
>> +    fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED;
>> +    spin_unlock(&fs_info->super_lock);
>> +}
>> +
> 
> This function can be more generic and replace its open coded version
> in a few places.
> 
>  btrfs_exclop_balance(fs_info, exclop)
>  {
> ::
>     switch(exclop)
>     {
>         case BTRFS_EXCLOP_BALANCE_PAUSED:
>               ASSERT(fs_info->exclusive_operation ==
>                 BTRFS_EXCLOP_BALANCE ||
>                          fs_info->exclusive_operation ==
>                 BTRFS_EXCLOP_DEV_ADD);
>             break;
>         case BTRFS_EXCLOP_BALANCE:
>             ASSERT(fs_info->exclusive_operation ==
>                 BTRFS_EXCLOP_BALANCE_PAUSED);
>             break;
>     }
> ::
>  }
> 
> 
>>   static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
>>   {
>>       struct inode *inode = file_inode(file);
>> @@ -4020,6 +4029,10 @@ static long btrfs_ioctl_balance(struct file
>> *file, void __user *arg)
>>               if (fs_info->balance_ctl &&
>>                   !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
> 
> 
>>                   /* this is (3) */
>> +                spin_lock(&fs_info->super_lock);
>> +                ASSERT(fs_info->exclusive_operation ==
>> BTRFS_EXCLOP_BALANCE_PAUSED);
>> +                fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE;
> 
> Here you set the status to BALANCE running. Why do we do it so early
> without even checking if the user cmd is a resume? Like a few lines
> below?
> 
>     4064 if (bargs->flags & BTRFS_BALANCE_RESUME) {
> 
> I guess it is because of the legacy balance ioctl.
> 
>     4927 case BTRFS_IOC_BALANCE:
>     4928 return btrfs_ioctl_balance(file, NULL);
> 
> Could you confirm?


Actually no, I thought that just because we are in (3) (based on the
comments) the right thing would be done. However, that's clearly not the
case...

I wonder whether putting the code under the & BALANCE_RESUME branch is
sufficient because as you pointed out the v1 ioctl doesn't handle args
at all. If I'm reading the code correctly balance ioctl v1 can't really
resume balance because it will always return with :

    20         if (fs_info->balance_ctl) {

    19                 ret = -EINPROGRESS;

    18                 goto out_bargs;

    17         }

OTOH if I put the code right before we call btrfs_balance then there's
no way to distinguish we are starting from paused state

<snip>
Anand Jain Nov. 10, 2021, 8:56 a.m. UTC | #5
On 9/11/21 11:33 pm, Nikolay Borisov wrote:
> <snip>
> 
>>
>>> +void btrfs_exclop_pause_balance(struct btrfs_fs_info *fs_info)
>>> +{
>>> +    spin_lock(&fs_info->super_lock);
>>> +    ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE ||
>>> +           fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD);
>>> +    fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED;
>>> +    spin_unlock(&fs_info->super_lock);
>>> +}
>>> +
>>
>> This function can be more generic and replace its open coded version
>> in a few places.
>>
>>   btrfs_exclop_balance(fs_info, exclop)
>>   {
>> ::
>>      switch(exclop)
>>      {
>>          case BTRFS_EXCLOP_BALANCE_PAUSED:
>>                ASSERT(fs_info->exclusive_operation ==
>>                  BTRFS_EXCLOP_BALANCE ||
>>                           fs_info->exclusive_operation ==
>>                  BTRFS_EXCLOP_DEV_ADD);
>>              break;
>>          case BTRFS_EXCLOP_BALANCE:
>>              ASSERT(fs_info->exclusive_operation ==
>>                  BTRFS_EXCLOP_BALANCE_PAUSED);
>>              break;
>>      }
>> ::
>>   }
>>
>>
>>>    static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
>>>    {
>>>        struct inode *inode = file_inode(file);
>>> @@ -4020,6 +4029,10 @@ static long btrfs_ioctl_balance(struct file
>>> *file, void __user *arg)
>>>                if (fs_info->balance_ctl &&
>>>                    !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
>>
>>
>>>                    /* this is (3) */
>>> +                spin_lock(&fs_info->super_lock);
>>> +                ASSERT(fs_info->exclusive_operation ==
>>> BTRFS_EXCLOP_BALANCE_PAUSED);
>>> +                fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE;
>>
>> Here you set the status to BALANCE running. Why do we do it so early
>> without even checking if the user cmd is a resume? Like a few lines
>> below?
>>
>>      4064 if (bargs->flags & BTRFS_BALANCE_RESUME) {
>>
>> I guess it is because of the legacy balance ioctl.
>>
>>      4927 case BTRFS_IOC_BALANCE:
>>      4928 return btrfs_ioctl_balance(file, NULL);
>>
>> Could you confirm?
> 
> 
> Actually no, I thought that just because we are in (3) (based on the
> comments) the right thing would be done. However, that's clearly not the
> case...
> 
> I wonder whether putting the code under the & BALANCE_RESUME branch is
> sufficient because as you pointed out the v1 ioctl doesn't handle args
> at all. If I'm reading the code correctly balance ioctl v1 can't really
> resume balance because it will always return with :
> 


>      20         if (fs_info->balance_ctl) {
> 
>      19                 ret = -EINPROGRESS;
> 
>      18                 goto out_bargs;
> 
>      17         }
> 
> OTOH if I put the code right before we call btrfs_balance then there's
> no way to distinguish we are starting from paused state
> 
> <snip>

Yeah looks like the legacy code did not resume the balance, it supported
the pause though or may be the trick was to remount to resume the
balance?

As this part of the code is very confusing I think it is better to split
the balance v1 and v2 codes into separate functions.
Nikolay Borisov Nov. 10, 2021, 9:31 a.m. UTC | #6
On 10.11.21 г. 10:56, Anand Jain wrote:
> 
> 
> On 9/11/21 11:33 pm, Nikolay Borisov wrote:
>> <snip>
>>
>>>
>>>> +void btrfs_exclop_pause_balance(struct btrfs_fs_info *fs_info)
>>>> +{
>>>> +    spin_lock(&fs_info->super_lock);
>>>> +    ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE ||
>>>> +           fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD);
>>>> +    fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED;
>>>> +    spin_unlock(&fs_info->super_lock);
>>>> +}
>>>> +
>>>
>>> This function can be more generic and replace its open coded version
>>> in a few places.
>>>
>>>   btrfs_exclop_balance(fs_info, exclop)
>>>   {
>>> ::
>>>      switch(exclop)
>>>      {
>>>          case BTRFS_EXCLOP_BALANCE_PAUSED:
>>>                ASSERT(fs_info->exclusive_operation ==
>>>                  BTRFS_EXCLOP_BALANCE ||
>>>                           fs_info->exclusive_operation ==
>>>                  BTRFS_EXCLOP_DEV_ADD);
>>>              break;
>>>          case BTRFS_EXCLOP_BALANCE:
>>>              ASSERT(fs_info->exclusive_operation ==
>>>                  BTRFS_EXCLOP_BALANCE_PAUSED);
>>>              break;
>>>      }
>>> ::
>>>   }
>>>
>>>
>>>>    static int btrfs_ioctl_getversion(struct file *file, int __user
>>>> *arg)
>>>>    {
>>>>        struct inode *inode = file_inode(file);
>>>> @@ -4020,6 +4029,10 @@ static long btrfs_ioctl_balance(struct file
>>>> *file, void __user *arg)
>>>>                if (fs_info->balance_ctl &&
>>>>                    !test_bit(BTRFS_FS_BALANCE_RUNNING,
>>>> &fs_info->flags)) {
>>>
>>>
>>>>                    /* this is (3) */
>>>> +                spin_lock(&fs_info->super_lock);
>>>> +                ASSERT(fs_info->exclusive_operation ==
>>>> BTRFS_EXCLOP_BALANCE_PAUSED);
>>>> +                fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE;
>>>
>>> Here you set the status to BALANCE running. Why do we do it so early
>>> without even checking if the user cmd is a resume? Like a few lines
>>> below?
>>>
>>>      4064 if (bargs->flags & BTRFS_BALANCE_RESUME) {
>>>
>>> I guess it is because of the legacy balance ioctl.
>>>
>>>      4927 case BTRFS_IOC_BALANCE:
>>>      4928 return btrfs_ioctl_balance(file, NULL);
>>>
>>> Could you confirm?
>>
>>
>> Actually no, I thought that just because we are in (3) (based on the
>> comments) the right thing would be done. However, that's clearly not the
>> case...
>>
>> I wonder whether putting the code under the & BALANCE_RESUME branch is
>> sufficient because as you pointed out the v1 ioctl doesn't handle args
>> at all. If I'm reading the code correctly balance ioctl v1 can't really
>> resume balance because it will always return with :
>>
> 
> 
>>      20         if (fs_info->balance_ctl) {
As this part of the code is very confusing I think it is better to split
the balance v1 and v2 codes into separate functions.
>>
>>      19                 ret = -EINPROGRESS;
>>
>>      18                 goto out_bargs;
>>
>>      17         }
>>
>> OTOH if I put the code right before we call btrfs_balance then there's
>> no way to distinguish we are starting from paused state
>>
>> <snip>
> 
> Yeah looks like the legacy code did not resume the balance, it supported
> the pause though or may be the trick was to remount to resume the
> balance?
> 
> As this part of the code is very confusing I think it is better to split
> the balance v1 and v2 codes into separate functions.


Actually V1 is going to be deprecated so I think the way forward is to
move the resume under the & BALANCE_RESUME branch.
David Sterba Nov. 11, 2021, 2:48 p.m. UTC | #7
On Tue, Nov 09, 2021 at 07:35:40PM +0800, Anand Jain wrote:
> On 8/11/21 10:28 pm, Nikolay Borisov wrote:
> > Current set of exclusive operation states is not sufficient to handle all
> > practical use cases. In particular there is a need to be able to add a
> > device to a filesystem that have paused balance. Currently there is no
> > way to distinguish between a running and a paused balance. Fix this by
> > introducing BTRFS_EXCLOP_BALANCE_PAUSED which is going to be set in 2
> > occasions:
> > 
> > 1. When a filesystem is mounted with skip_balance and there is an
> >     unfinished balance it will now be into BALANCE_PAUSED instead of
> >     simply BALANCE state.
> > 
> > 2. When a running balance is paused.
> > 
> > Signed-off-by: Nikolay Borisov <nborisov@suse.com>
> > ---
> >   fs/btrfs/ctree.h   |  3 +++
> >   fs/btrfs/ioctl.c   | 13 +++++++++++++
> >   fs/btrfs/volumes.c | 11 +++++++++--
> >   3 files changed, 25 insertions(+), 2 deletions(-)
> > 
> > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> > index 7553e9dc5f93..8376271bfed1 100644
> > --- a/fs/btrfs/ctree.h
> > +++ b/fs/btrfs/ctree.h
> > @@ -613,6 +613,7 @@ enum {
> >    */
> >   enum btrfs_exclusive_operation {
> >   	BTRFS_EXCLOP_NONE,
> > +	BTRFS_EXCLOP_BALANCE_PAUSED,
> >   	BTRFS_EXCLOP_BALANCE,
> >   	BTRFS_EXCLOP_DEV_ADD,
> >   	BTRFS_EXCLOP_DEV_REMOVE,
> > @@ -3305,6 +3306,8 @@ bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
> >   				 enum btrfs_exclusive_operation type);
> >   void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info);
> >   void btrfs_exclop_finish(struct btrfs_fs_info *fs_info);
> > +void btrfs_exclop_pause_balance(struct btrfs_fs_info *fs_info);
> > +
> >   
> >   /* file.c */
> >   int __init btrfs_auto_defrag_init(void);
> > diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> > index 92424a22d8d6..f4c05a9aab84 100644
> > --- a/fs/btrfs/ioctl.c
> > +++ b/fs/btrfs/ioctl.c
> > @@ -414,6 +414,15 @@ void btrfs_exclop_finish(struct btrfs_fs_info *fs_info)
> >   	sysfs_notify(&fs_info->fs_devices->fsid_kobj, NULL, "exclusive_operation");
> >   }
> > 
> 
> 
> 
> > +void btrfs_exclop_pause_balance(struct btrfs_fs_info *fs_info)
> > +{
> > +	spin_lock(&fs_info->super_lock);
> > +	ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE ||
> > +	       fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD);
> > +	fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED;
> > +	spin_unlock(&fs_info->super_lock);
> > +}
> > +
> 
> This function can be more generic and replace its open coded version
> in a few places.
> 
>   btrfs_exclop_balance(fs_info, exclop)
>   {
> ::
> 	switch(exclop)
> 	{
> 		case BTRFS_EXCLOP_BALANCE_PAUSED:
> 	  		ASSERT(fs_info->exclusive_operation ==
> 				BTRFS_EXCLOP_BALANCE ||
>                   		fs_info->exclusive_operation ==
> 				BTRFS_EXCLOP_DEV_ADD);
> 			break;
> 		case BTRFS_EXCLOP_BALANCE:
> 			ASSERT(fs_info->exclusive_operation ==
> 				BTRFS_EXCLOP_BALANCE_PAUSED);
> 			break;
> 	}
> ::
>   }

I don't see this comment answered, I think it's a good point to do the
exclusive state change and assertions in the helper, there's too much
code repetition.
David Sterba Nov. 11, 2021, 3 p.m. UTC | #8
On Wed, Nov 10, 2021 at 11:31:25AM +0200, Nikolay Borisov wrote:
> 
> 
> On 10.11.21 г. 10:56, Anand Jain wrote:
> > 
> > 
> > On 9/11/21 11:33 pm, Nikolay Borisov wrote:
> >> <snip>
> >>
> >>>
> >>>> +void btrfs_exclop_pause_balance(struct btrfs_fs_info *fs_info)
> >>>> +{
> >>>> +    spin_lock(&fs_info->super_lock);
> >>>> +    ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE ||
> >>>> +           fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD);
> >>>> +    fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED;
> >>>> +    spin_unlock(&fs_info->super_lock);
> >>>> +}
> >>>> +
> >>>
> >>> This function can be more generic and replace its open coded version
> >>> in a few places.
> >>>
> >>>   btrfs_exclop_balance(fs_info, exclop)
> >>>   {
> >>> ::
> >>>      switch(exclop)
> >>>      {
> >>>          case BTRFS_EXCLOP_BALANCE_PAUSED:
> >>>                ASSERT(fs_info->exclusive_operation ==
> >>>                  BTRFS_EXCLOP_BALANCE ||
> >>>                           fs_info->exclusive_operation ==
> >>>                  BTRFS_EXCLOP_DEV_ADD);
> >>>              break;
> >>>          case BTRFS_EXCLOP_BALANCE:
> >>>              ASSERT(fs_info->exclusive_operation ==
> >>>                  BTRFS_EXCLOP_BALANCE_PAUSED);
> >>>              break;
> >>>      }
> >>> ::
> >>>   }
> >>>
> >>>
> >>>>    static int btrfs_ioctl_getversion(struct file *file, int __user
> >>>> *arg)
> >>>>    {
> >>>>        struct inode *inode = file_inode(file);
> >>>> @@ -4020,6 +4029,10 @@ static long btrfs_ioctl_balance(struct file
> >>>> *file, void __user *arg)
> >>>>                if (fs_info->balance_ctl &&
> >>>>                    !test_bit(BTRFS_FS_BALANCE_RUNNING,
> >>>> &fs_info->flags)) {
> >>>
> >>>
> >>>>                    /* this is (3) */
> >>>> +                spin_lock(&fs_info->super_lock);
> >>>> +                ASSERT(fs_info->exclusive_operation ==
> >>>> BTRFS_EXCLOP_BALANCE_PAUSED);
> >>>> +                fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE;
> >>>
> >>> Here you set the status to BALANCE running. Why do we do it so early
> >>> without even checking if the user cmd is a resume? Like a few lines
> >>> below?
> >>>
> >>>      4064 if (bargs->flags & BTRFS_BALANCE_RESUME) {
> >>>
> >>> I guess it is because of the legacy balance ioctl.
> >>>
> >>>      4927 case BTRFS_IOC_BALANCE:
> >>>      4928 return btrfs_ioctl_balance(file, NULL);
> >>>
> >>> Could you confirm?
> >>
> >>
> >> Actually no, I thought that just because we are in (3) (based on the
> >> comments) the right thing would be done. However, that's clearly not the
> >> case...
> >>
> >> I wonder whether putting the code under the & BALANCE_RESUME branch is
> >> sufficient because as you pointed out the v1 ioctl doesn't handle args
> >> at all. If I'm reading the code correctly balance ioctl v1 can't really
> >> resume balance because it will always return with :
> >>
> > 
> > 
> >>      20         if (fs_info->balance_ctl) {
> As this part of the code is very confusing I think it is better to split
> the balance v1 and v2 codes into separate functions.
> >>
> >>      19                 ret = -EINPROGRESS;
> >>
> >>      18                 goto out_bargs;
> >>
> >>      17         }
> >>
> >> OTOH if I put the code right before we call btrfs_balance then there's
> >> no way to distinguish we are starting from paused state
> >>
> >> <snip>
> > 
> > Yeah looks like the legacy code did not resume the balance, it supported
> > the pause though or may be the trick was to remount to resume the
> > balance?
> > 
> > As this part of the code is very confusing I think it is better to split
> > the balance v1 and v2 codes into separate functions.
> 
> 
> Actually V1 is going to be deprecated so I think the way forward is to
> move the resume under the & BALANCE_RESUME branch.

I think we don't need to take v1 into account anymore. If the
deprecation goes to 5.16 and the device add / balance pause
compatibility into 5.17, we can actually remove v1 in the same release
so there's not even a chance to get to some weird state.
diff mbox series

Patch

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 7553e9dc5f93..8376271bfed1 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -613,6 +613,7 @@  enum {
  */
 enum btrfs_exclusive_operation {
 	BTRFS_EXCLOP_NONE,
+	BTRFS_EXCLOP_BALANCE_PAUSED,
 	BTRFS_EXCLOP_BALANCE,
 	BTRFS_EXCLOP_DEV_ADD,
 	BTRFS_EXCLOP_DEV_REMOVE,
@@ -3305,6 +3306,8 @@  bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
 				 enum btrfs_exclusive_operation type);
 void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info);
 void btrfs_exclop_finish(struct btrfs_fs_info *fs_info);
+void btrfs_exclop_pause_balance(struct btrfs_fs_info *fs_info);
+
 
 /* file.c */
 int __init btrfs_auto_defrag_init(void);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 92424a22d8d6..f4c05a9aab84 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -414,6 +414,15 @@  void btrfs_exclop_finish(struct btrfs_fs_info *fs_info)
 	sysfs_notify(&fs_info->fs_devices->fsid_kobj, NULL, "exclusive_operation");
 }
 
+void btrfs_exclop_pause_balance(struct btrfs_fs_info *fs_info)
+{
+	spin_lock(&fs_info->super_lock);
+	ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE ||
+	       fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD);
+	fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED;
+	spin_unlock(&fs_info->super_lock);
+}
+
 static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
 {
 	struct inode *inode = file_inode(file);
@@ -4020,6 +4029,10 @@  static long btrfs_ioctl_balance(struct file *file, void __user *arg)
 			if (fs_info->balance_ctl &&
 			    !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
 				/* this is (3) */
+				spin_lock(&fs_info->super_lock);
+				ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
+				fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE;
+				spin_unlock(&fs_info->super_lock);
 				need_unlock = false;
 				goto locked;
 			}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index cc80f2a97a0b..e87f9ac440c2 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4355,8 +4355,10 @@  int btrfs_balance(struct btrfs_fs_info *fs_info,
 	ret = __btrfs_balance(fs_info);
 
 	mutex_lock(&fs_info->balance_mutex);
-	if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req))
+	if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req)) {
 		btrfs_info(fs_info, "balance: paused");
+		btrfs_exclop_pause_balance(fs_info);
+	}
 	/*
 	 * Balance can be canceled by:
 	 *
@@ -4406,6 +4408,7 @@  int btrfs_balance(struct btrfs_fs_info *fs_info,
 static int balance_kthread(void *data)
 {
 	struct btrfs_fs_info *fs_info = data;
+	bool paused = false;
 	int ret = 0;
 
 	mutex_lock(&fs_info->balance_mutex);
@@ -4432,6 +4435,10 @@  int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
 		return 0;
 	}
 
+	spin_lock(&fs_info->super_lock);
+	ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
+	fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE;
+	spin_unlock(&fs_info->super_lock);
 	/*
 	 * A ro->rw remount sequence should continue with the paused balance
 	 * regardless of who pauses it, system or the user as of now, so set
@@ -4500,7 +4507,7 @@  int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
 	 * is in a paused state and must have fs_info::balance_ctl properly
 	 * set up.
 	 */
-	if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
+	if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE_PAUSED))
 		btrfs_warn(fs_info,
 	"balance: cannot set exclusive op status, resume manually");