diff mbox series

brd: defer automatic disk creation until module initialization succeeds

Message ID 20241028090726.2958921-1-yangerkun@huaweicloud.com (mailing list archive)
State New
Headers show
Series brd: defer automatic disk creation until module initialization succeeds | expand

Commit Message

yangerkun Oct. 28, 2024, 9:07 a.m. UTC
From: Yang Erkun <yangerkun@huawei.com>

My colleague Wupeng found the following problems during fault injection:

BUG: unable to handle page fault for address: fffffbfff809d073
PGD 6e648067 P4D 123ec8067 PUD 123ec4067 PMD 100e38067 PTE 0
Oops: Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI
CPU: 5 UID: 0 PID: 755 Comm: modprobe Not tainted 6.12.0-rc3+ #17
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
1.16.1-2.fc37 04/01/2014
RIP: 0010:__asan_load8+0x4c/0xa0
...
Call Trace:
 <TASK>
 blkdev_put_whole+0x41/0x70
 bdev_release+0x1a3/0x250
 blkdev_release+0x11/0x20
 __fput+0x1d7/0x4a0
 task_work_run+0xfc/0x180
 syscall_exit_to_user_mode+0x1de/0x1f0
 do_syscall_64+0x6b/0x170
 entry_SYSCALL_64_after_hwframe+0x76/0x7e

loop_init() is calling loop_add() after __register_blkdev() succeeds and
is ignoring disk_add() failure from loop_add(), for loop_add() failure
is not fatal and successfully created disks are already visible to
bdev_open().

brd_init() is currently calling brd_alloc() before __register_blkdev()
succeeds and is releasing successfully created disks when brd_init()
returns an error. This can cause UAF for the latter two case:

case 1:
    T1:
modprobe brd
  brd_init
    brd_alloc(0) // success
      add_disk
        disk_scan_partitions
          bdev_file_open_by_dev // alloc file
          fput // won't free until back to userspace
    brd_alloc(1) // failed since mem alloc error inject
  // error path for modprobe will release code segment
  // back to userspace
  __fput
    blkdev_release
      bdev_release
        blkdev_put_whole
          bdev->bd_disk->fops->release // fops is freed now, UAF!

case 2:
    T1:                            T2:
modprobe brd
  brd_init
    brd_alloc(0) // success
                                   open(/dev/ram0)
    brd_alloc(1) // fail
  // error path for modprobe

                                   close(/dev/ram0)
                                   ...
                                   /* UAF! */
                                   bdev->bd_disk->fops->release

Fix this problem by following what loop_init() does. Besides,
reintroduce brd_devices_mutex to help serialize modifications to
brd_list.

Fixes: 7f9b348cb5e9 ("brd: convert to blk_alloc_disk/blk_cleanup_disk")
Reported-by: Wupeng Ma <mawupeng1@huawei.com>
Signed-off-by: Yang Erkun <yangerkun@huawei.com>
---
 drivers/block/brd.c | 57 +++++++++++++++++++++++++--------------------
 1 file changed, 32 insertions(+), 25 deletions(-)

Comments

Christoph Hellwig Oct. 28, 2024, 9:44 a.m. UTC | #1
On Mon, Oct 28, 2024 at 05:07:26PM +0800, Yang Erkun wrote:
> Fix this problem by following what loop_init() does. Besides,
> reintroduce brd_devices_mutex to help serialize modifications to
> brd_list.

This looks generally good.  Minor comments below:

> +	snprintf(buf, DISK_NAME_LEN, "ram%d", i);
> +	mutex_lock(&brd_devices_mutex);
> +	list_for_each_entry(brd, &brd_devices, brd_list) {
> +		if (brd->brd_number == i) {
> +			mutex_unlock(&brd_devices_mutex);
> +			err = -EEXIST;
> +			goto out;

This now prints an error message for an already existing
device, which should not happen for the module_init case,
but will happen all the time for the probe callback, and
is really annoying.  Please drop that part of the change.

> +		}
> +	}
>  	brd = kzalloc(sizeof(*brd), GFP_KERNEL);
> +	if (!brd) {
> +		mutex_unlock(&brd_devices_mutex);
> +		err = -ENOMEM;
> +		goto out;
> +	}
>  	brd->brd_number		= i;
>  	list_add_tail(&brd->brd_list, &brd_devices);
> +	mutex_unlock(&brd_devices_mutex);

And maybe just split the whole locked section into a
brd_find_or_alloc_device helper to make verifying the locking easier?

> +	mutex_lock(&brd_devices_mutex);
>  	list_del(&brd->brd_list);
> +	mutex_unlock(&brd_devices_mutex);
>  	kfree(brd);

> +		mutex_lock(&brd_devices_mutex);
>  		list_del(&brd->brd_list);
> +		mutex_unlock(&brd_devices_mutex);
>  		kfree(brd);

Maybe a brd_free_device helper for this pattern would be nice as well.

> +	brd_check_and_reset_par();
> +	brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
>  
>  	if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) {
> -		err = -EIO;
> -		goto out_free;
> +		pr_info("brd: module NOT loaded !!!\n");
> +		debugfs_remove_recursive(brd_debugfs_dir);
> +		return -EIO;

I'd keep an error handling goto for this to keep the main path
straight.
yangerkun Oct. 28, 2024, 11:14 a.m. UTC | #2
在 2024/10/28 17:44, Christoph Hellwig 写道:
> On Mon, Oct 28, 2024 at 05:07:26PM +0800, Yang Erkun wrote:
>> Fix this problem by following what loop_init() does. Besides,
>> reintroduce brd_devices_mutex to help serialize modifications to
>> brd_list.
> 
> This looks generally good.  Minor comments below:

Hi,

Thanks for your review!

> 
>> +	snprintf(buf, DISK_NAME_LEN, "ram%d", i);
>> +	mutex_lock(&brd_devices_mutex);
>> +	list_for_each_entry(brd, &brd_devices, brd_list) {
>> +		if (brd->brd_number == i) {
>> +			mutex_unlock(&brd_devices_mutex);
>> +			err = -EEXIST;
>> +			goto out;
> 
> This now prints an error message for an already existing
> device, which should not happen for the module_init case,
> but will happen all the time for the probe callback, and
> is really annoying.  Please drop that part of the change.

OK, maybe print nothing is better like loop_add in loop_init has did to
leave code clean? mknod can help to create /dev/ram%d... Hi, Kuai, what
do you think?

> 
>> +		}
>> +	}
>>   	brd = kzalloc(sizeof(*brd), GFP_KERNEL);
>> +	if (!brd) {
>> +		mutex_unlock(&brd_devices_mutex);
>> +		err = -ENOMEM;
>> +		goto out;
>> +	}
>>   	brd->brd_number		= i;
>>   	list_add_tail(&brd->brd_list, &brd_devices);
>> +	mutex_unlock(&brd_devices_mutex);
> 
> And maybe just split the whole locked section into a
> brd_find_or_alloc_device helper to make verifying the locking easier?

Ok.

> 
>> +	mutex_lock(&brd_devices_mutex);
>>   	list_del(&brd->brd_list);
>> +	mutex_unlock(&brd_devices_mutex);
>>   	kfree(brd);
> 
>> +		mutex_lock(&brd_devices_mutex);
>>   		list_del(&brd->brd_list);
>> +		mutex_unlock(&brd_devices_mutex);
>>   		kfree(brd);
> 
> Maybe a brd_free_device helper for this pattern would be nice as well.

Ok.

> 
>> +	brd_check_and_reset_par();
>> +	brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
>>   
>>   	if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) {
>> -		err = -EIO;
>> -		goto out_free;
>> +		pr_info("brd: module NOT loaded !!!\n");
>> +		debugfs_remove_recursive(brd_debugfs_dir);
>> +		return -EIO;
> 
> I'd keep an error handling goto for this to keep the main path
> straight.

Ok.
Yu Kuai Oct. 28, 2024, 11:29 a.m. UTC | #3
Hi,

在 2024/10/28 17:44, Christoph Hellwig 写道:
> On Mon, Oct 28, 2024 at 05:07:26PM +0800, Yang Erkun wrote:
>> Fix this problem by following what loop_init() does. Besides,
>> reintroduce brd_devices_mutex to help serialize modifications to
>> brd_list.
> 
> This looks generally good.  Minor comments below:
> 
>> +	snprintf(buf, DISK_NAME_LEN, "ram%d", i);
>> +	mutex_lock(&brd_devices_mutex);
>> +	list_for_each_entry(brd, &brd_devices, brd_list) {
>> +		if (brd->brd_number == i) {
>> +			mutex_unlock(&brd_devices_mutex);
>> +			err = -EEXIST;
>> +			goto out;
> 
> This now prints an error message for an already existing
> device, which should not happen for the module_init case,
> but will happen all the time for the probe callback, and
> is really annoying.  Please drop that part of the change.

I don't quite understand this, if the gendisk already exists,
the probe callback won't be called from the open path, because
ilookup() from blkdev_get_no_open() will found the bdev inode.
Hence there will only be a small race windown for concurrent
create on open callers to return -EEXIST here.

Thanks,
Kuai
Christoph Hellwig Oct. 28, 2024, 4:13 p.m. UTC | #4
On Mon, Oct 28, 2024 at 07:29:54PM +0800, Yu Kuai wrote:
> I don't quite understand this, if the gendisk already exists,
> the probe callback won't be called from the open path, because
> ilookup() from blkdev_get_no_open() will found the bdev inode.
> Hence there will only be a small race windown for concurrent
> create on open callers to return -EEXIST here.

True.  I'd still avoid the noice printk for that corner case, though.
Yu Kuai Oct. 29, 2024, 3:12 a.m. UTC | #5
Hi,

在 2024/10/29 0:13, Christoph Hellwig 写道:
> On Mon, Oct 28, 2024 at 07:29:54PM +0800, Yu Kuai wrote:
>> I don't quite understand this, if the gendisk already exists,
>> the probe callback won't be called from the open path, because
>> ilookup() from blkdev_get_no_open() will found the bdev inode.
>> Hence there will only be a small race windown for concurrent
>> create on open callers to return -EEXIST here.
> 
> True.  I'd still avoid the noice printk for that corner case, though.
> 

ok, then I'm fine with remove this printk or move it to brd_init(), I
don't have strong preference.

Thanks,
Kuai

> .
>
diff mbox series

Patch

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 2fd1ed101748..a8615256fc57 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -316,6 +316,7 @@  __setup("ramdisk_size=", ramdisk_size);
  * (should share code eventually).
  */
 static LIST_HEAD(brd_devices);
+static DEFINE_MUTEX(brd_devices_mutex);
 static struct dentry *brd_debugfs_dir;
 
 static int brd_alloc(int i)
@@ -340,18 +341,27 @@  static int brd_alloc(int i)
 					  BLK_FEAT_NOWAIT,
 	};
 
-	list_for_each_entry(brd, &brd_devices, brd_list)
-		if (brd->brd_number == i)
-			return -EEXIST;
+	snprintf(buf, DISK_NAME_LEN, "ram%d", i);
+	mutex_lock(&brd_devices_mutex);
+	list_for_each_entry(brd, &brd_devices, brd_list) {
+		if (brd->brd_number == i) {
+			mutex_unlock(&brd_devices_mutex);
+			err = -EEXIST;
+			goto out;
+		}
+	}
 	brd = kzalloc(sizeof(*brd), GFP_KERNEL);
-	if (!brd)
-		return -ENOMEM;
+	if (!brd) {
+		mutex_unlock(&brd_devices_mutex);
+		err = -ENOMEM;
+		goto out;
+	}
 	brd->brd_number		= i;
 	list_add_tail(&brd->brd_list, &brd_devices);
+	mutex_unlock(&brd_devices_mutex);
 
 	xa_init(&brd->brd_pages);
 
-	snprintf(buf, DISK_NAME_LEN, "ram%d", i);
 	if (!IS_ERR_OR_NULL(brd_debugfs_dir))
 		debugfs_create_u64(buf, 0444, brd_debugfs_dir,
 				&brd->brd_nr_pages);
@@ -378,8 +388,13 @@  static int brd_alloc(int i)
 out_cleanup_disk:
 	put_disk(disk);
 out_free_dev:
+	mutex_lock(&brd_devices_mutex);
 	list_del(&brd->brd_list);
+	mutex_unlock(&brd_devices_mutex);
 	kfree(brd);
+out:
+	pr_info("brd: %s for %s failed with %d.\n",
+		__func__, buf, err);
 	return err;
 }
 
@@ -398,7 +413,9 @@  static void brd_cleanup(void)
 		del_gendisk(brd->brd_disk);
 		put_disk(brd->brd_disk);
 		brd_free_pages(brd);
+		mutex_lock(&brd_devices_mutex);
 		list_del(&brd->brd_list);
+		mutex_unlock(&brd_devices_mutex);
 		kfree(brd);
 	}
 }
@@ -424,17 +441,7 @@  static inline void brd_check_and_reset_par(void)
 
 static int __init brd_init(void)
 {
-	int err, i;
-
-	brd_check_and_reset_par();
-
-	brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
-
-	for (i = 0; i < rd_nr; i++) {
-		err = brd_alloc(i);
-		if (err)
-			goto out_free;
-	}
+	int i;
 
 	/*
 	 * brd module now has a feature to instantiate underlying device
@@ -450,20 +457,20 @@  static int __init brd_init(void)
 	 *	If (X / max_part) was not already created it will be created
 	 *	dynamically.
 	 */
+	brd_check_and_reset_par();
+	brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
 
 	if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) {
-		err = -EIO;
-		goto out_free;
+		pr_info("brd: module NOT loaded !!!\n");
+		debugfs_remove_recursive(brd_debugfs_dir);
+		return -EIO;
 	}
 
+	for (i = 0; i < rd_nr; i++)
+		brd_alloc(i);
+
 	pr_info("brd: module loaded\n");
 	return 0;
-
-out_free:
-	brd_cleanup();
-
-	pr_info("brd: module NOT loaded !!!\n");
-	return err;
 }
 
 static void __exit brd_exit(void)