diff mbox series

[v2,2/7] btrfs: handle device allocation failure in btrfs_close_one_device()

Message ID 20191113102728.8835-3-jthumshirn@suse.de (mailing list archive)
State New, archived
Headers show
Series [v2,1/7] btrfs: decrement number of open devices after closing the device not before | expand

Commit Message

Johannes Thumshirn Nov. 13, 2019, 10:27 a.m. UTC
In btrfs_close_one_device() we're allocating a new device and if this
fails we BUG().

Move the allocation to the top of the function and return an error in case
it failed.

The BUG_ON() is temporarily moved to close_fs_devices(), the caller of
btrfs_close_one_device() as further work is pending to untangle this.

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
---
 fs/btrfs/volumes.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

Comments

David Sterba Nov. 13, 2019, 2:58 p.m. UTC | #1
On Wed, Nov 13, 2019 at 11:27:23AM +0100, Johannes Thumshirn wrote:
> In btrfs_close_one_device() we're allocating a new device and if this
> fails we BUG().
> 
> Move the allocation to the top of the function and return an error in case
> it failed.
> 
> The BUG_ON() is temporarily moved to close_fs_devices(), the caller of
> btrfs_close_one_device() as further work is pending to untangle this.
> 
> Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
> ---
>  fs/btrfs/volumes.c | 27 +++++++++++++++++++++------
>  1 file changed, 21 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 5ee26e7fca32..0a2a73907563 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -1061,12 +1061,17 @@ static void btrfs_close_bdev(struct btrfs_device *device)
>  	blkdev_put(device->bdev, device->mode);
>  }
>  
> -static void btrfs_close_one_device(struct btrfs_device *device)
> +static int btrfs_close_one_device(struct btrfs_device *device)
>  {
>  	struct btrfs_fs_devices *fs_devices = device->fs_devices;
>  	struct btrfs_device *new_device;
>  	struct rcu_string *name;
>  
> +	new_device = btrfs_alloc_device(NULL, &device->devid,
> +					device->uuid);
> +	if (IS_ERR(new_device))
> +		goto err_close_device;
> +
>  	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
>  	    device->devid != BTRFS_DEV_REPLACE_DEVID) {
>  		list_del_init(&device->dev_alloc_list);
> @@ -1080,10 +1085,6 @@ static void btrfs_close_one_device(struct btrfs_device *device)
>  	if (device->bdev)
>  		fs_devices->open_devices--;
>  
> -	new_device = btrfs_alloc_device(NULL, &device->devid,
> -					device->uuid);
> -	BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
> -
>  	/* Safe because we are under uuid_mutex */
>  	if (device->name) {
>  		name = rcu_string_strdup(device->name->str, GFP_NOFS);
> @@ -1096,18 +1097,32 @@ static void btrfs_close_one_device(struct btrfs_device *device)
>  
>  	synchronize_rcu();
>  	btrfs_free_device(device);
> +
> +	return 0;
> +
> +err_close_device:
> +	btrfs_close_bdev(device);
> +	if (device->bdev) {
> +		fs_devices->open_devices--;
> +		btrfs_sysfs_rm_device_link(fs_devices, device);
> +		device->bdev = NULL;
> +	}

I don't understand this part: the 'device' pointer is from the argument,
so the device we want to delete from the list and for that all the state
bit tests, bdev close, list replace rcu and synchronize_rcu should
happen -- in case we have a newly allocated new_device.

What I don't understand how the short version after label
err_close_device: is correct. The device is still left in the list but
with NULL bdev but rw_devices, missing_devices is untouched.

That a device closing needs to allocate memory for a new device instead
of reinitializing it again is stupid but with the simplified device
closing I'm not sure the state is well defined.
Johannes Thumshirn Nov. 14, 2019, 8:48 a.m. UTC | #2
On 13/11/2019 15:58, David Sterba wrote:
> On Wed, Nov 13, 2019 at 11:27:23AM +0100, Johannes Thumshirn wrote:
>> In btrfs_close_one_device() we're allocating a new device and if this
>> fails we BUG().
>>
>> Move the allocation to the top of the function and return an error in case
>> it failed.
>>
>> The BUG_ON() is temporarily moved to close_fs_devices(), the caller of
>> btrfs_close_one_device() as further work is pending to untangle this.
>>
>> Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
>> ---
>>  fs/btrfs/volumes.c | 27 +++++++++++++++++++++------
>>  1 file changed, 21 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
>> index 5ee26e7fca32..0a2a73907563 100644
>> --- a/fs/btrfs/volumes.c
>> +++ b/fs/btrfs/volumes.c
>> @@ -1061,12 +1061,17 @@ static void btrfs_close_bdev(struct btrfs_device *device)
>>  	blkdev_put(device->bdev, device->mode);
>>  }
>>  
>> -static void btrfs_close_one_device(struct btrfs_device *device)
>> +static int btrfs_close_one_device(struct btrfs_device *device)
>>  {
>>  	struct btrfs_fs_devices *fs_devices = device->fs_devices;
>>  	struct btrfs_device *new_device;
>>  	struct rcu_string *name;
>>  
>> +	new_device = btrfs_alloc_device(NULL, &device->devid,
>> +					device->uuid);
>> +	if (IS_ERR(new_device))
>> +		goto err_close_device;
>> +
>>  	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
>>  	    device->devid != BTRFS_DEV_REPLACE_DEVID) {
>>  		list_del_init(&device->dev_alloc_list);
>> @@ -1080,10 +1085,6 @@ static void btrfs_close_one_device(struct btrfs_device *device)
>>  	if (device->bdev)
>>  		fs_devices->open_devices--;
>>  
>> -	new_device = btrfs_alloc_device(NULL, &device->devid,
>> -					device->uuid);
>> -	BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
>> -
>>  	/* Safe because we are under uuid_mutex */
>>  	if (device->name) {
>>  		name = rcu_string_strdup(device->name->str, GFP_NOFS);
>> @@ -1096,18 +1097,32 @@ static void btrfs_close_one_device(struct btrfs_device *device)
>>  
>>  	synchronize_rcu();
>>  	btrfs_free_device(device);
>> +
>> +	return 0;
>> +
>> +err_close_device:
>> +	btrfs_close_bdev(device);
>> +	if (device->bdev) {
>> +		fs_devices->open_devices--;
>> +		btrfs_sysfs_rm_device_link(fs_devices, device);
>> +		device->bdev = NULL;
>> +	}
> 
> I don't understand this part: the 'device' pointer is from the argument,
> so the device we want to delete from the list and for that all the state
> bit tests, bdev close, list replace rcu and synchronize_rcu should
> happen -- in case we have a newly allocated new_device.
> 
> What I don't understand how the short version after label
> err_close_device: is correct. The device is still left in the list but
> with NULL bdev but rw_devices, missing_devices is untouched.
> 
> That a device closing needs to allocate memory for a new device instead
> of reinitializing it again is stupid but with the simplified device
> closing I'm not sure the state is well defined.

As we couldn't allocate memory to remove the device from the list, we
have to keep it in the list (technically even leaking some memory here).

What we definitively need to do is clear the ->bdev pointer, otherwise
we'll trip over a NULL-pointer in open_fs_devices().

open_fs_devices() will traverse the list and call
btrfs_open_one_device() this will fail as device->bdev is (still) set
thus latest_dev is NULL and then this 'fs_devices->latest_bdev =
latest_dev->bdev;' will blow up.

If you have a better solution I'm all ears. This is what I came up with
to tackle the problem of half initialized devices.

One thing we could do though is call btrfs_free_stale_devices() in the
error case.

Byte,
	Johannes
Anand Jain Nov. 14, 2019, 10:56 a.m. UTC | #3
On 14/11/19 4:48 PM, Johannes Thumshirn wrote:
> On 13/11/2019 15:58, David Sterba wrote:
>> On Wed, Nov 13, 2019 at 11:27:23AM +0100, Johannes Thumshirn wrote:
>>> In btrfs_close_one_device() we're allocating a new device and if this
>>> fails we BUG().
>>>
>>> Move the allocation to the top of the function and return an error in case
>>> it failed.
>>>
>>> The BUG_ON() is temporarily moved to close_fs_devices(), the caller of
>>> btrfs_close_one_device() as further work is pending to untangle this.
>>>
>>> Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
>>> ---
>>>   fs/btrfs/volumes.c | 27 +++++++++++++++++++++------
>>>   1 file changed, 21 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
>>> index 5ee26e7fca32..0a2a73907563 100644
>>> --- a/fs/btrfs/volumes.c
>>> +++ b/fs/btrfs/volumes.c
>>> @@ -1061,12 +1061,17 @@ static void btrfs_close_bdev(struct btrfs_device *device)
>>>   	blkdev_put(device->bdev, device->mode);
>>>   }
>>>   
>>> -static void btrfs_close_one_device(struct btrfs_device *device)
>>> +static int btrfs_close_one_device(struct btrfs_device *device)
>>>   {
>>>   	struct btrfs_fs_devices *fs_devices = device->fs_devices;
>>>   	struct btrfs_device *new_device;
>>>   	struct rcu_string *name;
>>>   
>>> +	new_device = btrfs_alloc_device(NULL, &device->devid,
>>> +					device->uuid);
>>> +	if (IS_ERR(new_device))
>>> +		goto err_close_device;
>>> +
>>>   	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
>>>   	    device->devid != BTRFS_DEV_REPLACE_DEVID) {
>>>   		list_del_init(&device->dev_alloc_list);
>>> @@ -1080,10 +1085,6 @@ static void btrfs_close_one_device(struct btrfs_device *device)
>>>   	if (device->bdev)
>>>   		fs_devices->open_devices--;
>>>   
>>> -	new_device = btrfs_alloc_device(NULL, &device->devid,
>>> -					device->uuid);
>>> -	BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
>>> -
>>>   	/* Safe because we are under uuid_mutex */
>>>   	if (device->name) {
>>>   		name = rcu_string_strdup(device->name->str, GFP_NOFS);
>>> @@ -1096,18 +1097,32 @@ static void btrfs_close_one_device(struct btrfs_device *device)
>>>   
>>>   	synchronize_rcu();
>>>   	btrfs_free_device(device);
>>> +
>>> +	return 0;
>>> +
>>> +err_close_device:
>>> +	btrfs_close_bdev(device);
>>> +	if (device->bdev) {
>>> +		fs_devices->open_devices--;
>>> +		btrfs_sysfs_rm_device_link(fs_devices, device);
>>> +		device->bdev = NULL;
>>> +	}
>>
>> I don't understand this part: the 'device' pointer is from the argument,
>> so the device we want to delete from the list and for that all the state
>> bit tests, bdev close, list replace rcu and synchronize_rcu should
>> happen -- in case we have a newly allocated new_device.
>>
>> What I don't understand how the short version after label
>> err_close_device: is correct. The device is still left in the list but
>> with NULL bdev but rw_devices, missing_devices is untouched.
>>
>> That a device closing needs to allocate memory for a new device instead
>> of reinitializing it again is stupid but with the simplified device
>> closing I'm not sure the state is well defined.
> 
> As we couldn't allocate memory to remove the device from the list, we
> have to keep it in the list (technically even leaking some memory here).
> 
> What we definitively need to do is clear the ->bdev pointer, otherwise
> we'll trip over a NULL-pointer in open_fs_devices().
> 
> open_fs_devices() will traverse the list and call
> btrfs_open_one_device() this will fail as device->bdev is (still) set
> thus latest_dev is NULL and then this 'fs_devices->latest_bdev =
> latest_dev->bdev;' will blow up.
> 
> If you have a better solution I'm all ears. This is what I came up with
> to tackle the problem of half initialized devices.
> 
> One thing we could do though is call btrfs_free_stale_devices() in the
> error case.
> 
> Byte,
> 	Johannes
> 

Johannes,

   Thanks for attempting to fix this.

   I wrote comments about this unoptimized code here [1]

   [1]
    ML email therad
     'invalid opcode in close_fs_devices'

 
https://groups.google.com/forum/#!msg/syzkaller-bugs/eSgcqygYaXE/6wuz-0jMCwAJ

   You may want to review.

   Yes David is correct why a closed device will still remain in the
   dev_alloc_list even after the close here in this patch.

Thanks, Anand
Johannes Thumshirn Nov. 14, 2019, 12:03 p.m. UTC | #4
On 14/11/2019 11:56, Anand Jain wrote:
> On 14/11/19 4:48 PM, Johannes Thumshirn wrote:
>> On 13/11/2019 15:58, David Sterba wrote:
>>> On Wed, Nov 13, 2019 at 11:27:23AM +0100, Johannes Thumshirn wrote:
>>>> In btrfs_close_one_device() we're allocating a new device and if this
>>>> fails we BUG().
>>>>
>>>> Move the allocation to the top of the function and return an error
>>>> in case
>>>> it failed.
>>>>
>>>> The BUG_ON() is temporarily moved to close_fs_devices(), the caller of
>>>> btrfs_close_one_device() as further work is pending to untangle this.
>>>>
>>>> Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
>>>> ---
>>>>   fs/btrfs/volumes.c | 27 +++++++++++++++++++++------
>>>>   1 file changed, 21 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
>>>> index 5ee26e7fca32..0a2a73907563 100644
>>>> --- a/fs/btrfs/volumes.c
>>>> +++ b/fs/btrfs/volumes.c
>>>> @@ -1061,12 +1061,17 @@ static void btrfs_close_bdev(struct
>>>> btrfs_device *device)
>>>>       blkdev_put(device->bdev, device->mode);
>>>>   }
>>>>   -static void btrfs_close_one_device(struct btrfs_device *device)
>>>> +static int btrfs_close_one_device(struct btrfs_device *device)
>>>>   {
>>>>       struct btrfs_fs_devices *fs_devices = device->fs_devices;
>>>>       struct btrfs_device *new_device;
>>>>       struct rcu_string *name;
>>>>   +    new_device = btrfs_alloc_device(NULL, &device->devid,
>>>> +                    device->uuid);
>>>> +    if (IS_ERR(new_device))
>>>> +        goto err_close_device;
>>>> +
>>>>       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
>>>>           device->devid != BTRFS_DEV_REPLACE_DEVID) {
>>>>           list_del_init(&device->dev_alloc_list);
>>>> @@ -1080,10 +1085,6 @@ static void btrfs_close_one_device(struct
>>>> btrfs_device *device)
>>>>       if (device->bdev)
>>>>           fs_devices->open_devices--;
>>>>   -    new_device = btrfs_alloc_device(NULL, &device->devid,
>>>> -                    device->uuid);
>>>> -    BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
>>>> -
>>>>       /* Safe because we are under uuid_mutex */
>>>>       if (device->name) {
>>>>           name = rcu_string_strdup(device->name->str, GFP_NOFS);
>>>> @@ -1096,18 +1097,32 @@ static void btrfs_close_one_device(struct
>>>> btrfs_device *device)
>>>>         synchronize_rcu();
>>>>       btrfs_free_device(device);
>>>> +
>>>> +    return 0;
>>>> +
>>>> +err_close_device:
>>>> +    btrfs_close_bdev(device);
>>>> +    if (device->bdev) {
>>>> +        fs_devices->open_devices--;
>>>> +        btrfs_sysfs_rm_device_link(fs_devices, device);
>>>> +        device->bdev = NULL;
>>>> +    }
>>>
>>> I don't understand this part: the 'device' pointer is from the argument,
>>> so the device we want to delete from the list and for that all the state
>>> bit tests, bdev close, list replace rcu and synchronize_rcu should
>>> happen -- in case we have a newly allocated new_device.
>>>
>>> What I don't understand how the short version after label
>>> err_close_device: is correct. The device is still left in the list but
>>> with NULL bdev but rw_devices, missing_devices is untouched.
>>>
>>> That a device closing needs to allocate memory for a new device instead
>>> of reinitializing it again is stupid but with the simplified device
>>> closing I'm not sure the state is well defined.
>>
>> As we couldn't allocate memory to remove the device from the list, we
>> have to keep it in the list (technically even leaking some memory here).
>>
>> What we definitively need to do is clear the ->bdev pointer, otherwise
>> we'll trip over a NULL-pointer in open_fs_devices().
>>
>> open_fs_devices() will traverse the list and call
>> btrfs_open_one_device() this will fail as device->bdev is (still) set
>> thus latest_dev is NULL and then this 'fs_devices->latest_bdev =
>> latest_dev->bdev;' will blow up.
>>
>> If you have a better solution I'm all ears. This is what I came up with
>> to tackle the problem of half initialized devices.
>>
>> One thing we could do though is call btrfs_free_stale_devices() in the
>> error case.
>>
>> Byte,
>>     Johannes
>>
> 
> Johannes,
> 
>   Thanks for attempting to fix this.
> 
>   I wrote comments about this unoptimized code here [1]
> 
>   [1]
>    ML email therad
>     'invalid opcode in close_fs_devices'
> 
> 
> https://groups.google.com/forum/#!msg/syzkaller-bugs/eSgcqygYaXE/6wuz-0jMCwAJ
> 
> 
>   You may want to review.
> 
>   Yes David is correct why a closed device will still remain in the
>   dev_alloc_list even after the close here in this patch.

Yes I know, this is why I did this dance. One thing I thought of is,
having a temporary list of the devices to delete and then do the
list_for_each_entry_safe() btrfs_close_one_device() loop on this list.

But this will only work if we really want to remove all devices.
Johannes Thumshirn Nov. 14, 2019, 1:02 p.m. UTC | #5
On 14/11/2019 11:56, Anand Jain wrote:
> Yes David is correct why a closed device will still remain in the
>   dev_alloc_list even after the close here in this patch.

OK, re-visited the Code again. And I think you're right I've moved this
hunk quite a bit:

        if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
            device->devid != BTRFS_DEV_REPLACE_DEVID) {
                list_del_init(&device->dev_alloc_list);
                fs_devices->rw_devices--;
        }


My initial intention was to first have the allocations done so I don't
have to undo anything in case of a failure.

I'm back to the drawing board here.
diff mbox series

Patch

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5ee26e7fca32..0a2a73907563 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1061,12 +1061,17 @@  static void btrfs_close_bdev(struct btrfs_device *device)
 	blkdev_put(device->bdev, device->mode);
 }
 
-static void btrfs_close_one_device(struct btrfs_device *device)
+static int btrfs_close_one_device(struct btrfs_device *device)
 {
 	struct btrfs_fs_devices *fs_devices = device->fs_devices;
 	struct btrfs_device *new_device;
 	struct rcu_string *name;
 
+	new_device = btrfs_alloc_device(NULL, &device->devid,
+					device->uuid);
+	if (IS_ERR(new_device))
+		goto err_close_device;
+
 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
 	    device->devid != BTRFS_DEV_REPLACE_DEVID) {
 		list_del_init(&device->dev_alloc_list);
@@ -1080,10 +1085,6 @@  static void btrfs_close_one_device(struct btrfs_device *device)
 	if (device->bdev)
 		fs_devices->open_devices--;
 
-	new_device = btrfs_alloc_device(NULL, &device->devid,
-					device->uuid);
-	BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
-
 	/* Safe because we are under uuid_mutex */
 	if (device->name) {
 		name = rcu_string_strdup(device->name->str, GFP_NOFS);
@@ -1096,18 +1097,32 @@  static void btrfs_close_one_device(struct btrfs_device *device)
 
 	synchronize_rcu();
 	btrfs_free_device(device);
+
+	return 0;
+
+err_close_device:
+	btrfs_close_bdev(device);
+	if (device->bdev) {
+		fs_devices->open_devices--;
+		btrfs_sysfs_rm_device_link(fs_devices, device);
+		device->bdev = NULL;
+	}
+
+	return -ENOMEM;
 }
 
 static int close_fs_devices(struct btrfs_fs_devices *fs_devices)
 {
 	struct btrfs_device *device, *tmp;
+	int ret;
 
 	if (--fs_devices->opened > 0)
 		return 0;
 
 	mutex_lock(&fs_devices->device_list_mutex);
 	list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) {
-		btrfs_close_one_device(device);
+		ret = btrfs_close_one_device(device);
+		BUG_ON(ret); /* -ENOMEM */
 	}
 	mutex_unlock(&fs_devices->device_list_mutex);