diff mbox series

[v2] Input: uinput: Avoid Object-Already-Free with a global lock

Message ID 1554883176-24318-1-git-send-email-mojha@codeaurora.org (mailing list archive)
State New, archived
Headers show
Series [v2] Input: uinput: Avoid Object-Already-Free with a global lock | expand

Commit Message

Mukesh Ojha April 10, 2019, 7:59 a.m. UTC
uinput_destroy_device() gets called from two places. In one place,
uinput_ioctl_handler() where it is protected under a lock
udev->mutex but there is no protection on udev device from freeing
inside uinput_release().

This can result in Object-Already-Free case where uinput parent
device already got freed while a child being inserted inside it.
That result in a double free case for parent while kernfs_put()
being done for child in a failure path of adding a node.

[  160.093398] Call trace:
[  160.093417]  kernfs_get+0x64/0x88
[  160.093438]  kernfs_new_node+0x94/0xc8
[  160.093450]  kernfs_create_dir_ns+0x44/0xfc
[  160.093463]  sysfs_create_dir_ns+0xa8/0x130
[  160.093479]  kobject_add_internal+0x278/0x650
[  160.093491]  kobject_add_varg+0xe0/0x130
[  160.093502]  kobject_add+0x15c/0x1d0
[  160.093518]  device_add+0x2bc/0xde0
[  160.093533]  input_register_device+0x5f4/0xa0c
[  160.093547]  uinput_ioctl_handler+0x1184/0x2198
[  160.093560]  uinput_ioctl+0x38/0x48
[  160.093573]  vfs_ioctl+0x7c/0xb4
[  160.093585]  do_vfs_ioctl+0x9ec/0x2350
[  160.093597]  SyS_ioctl+0x6c/0xa4
[  160.093610]  el0_svc_naked+0x34/0x38
[  160.093621] ---[ end trace bccf0093cda2c538 ]---
[  160.099041] =============================================================================
[  160.107459] BUG kernfs_node_cache (Tainted: G S      W  O   ): Object already free
[  160.115235] -----------------------------------------------------------------------------
[  160.115235]
[  160.125151] Disabling lock debugging due to kernel taint
[  160.130626] INFO: Allocated in __kernfs_new_node+0x8c/0x3c0 age=11 cpu=2 pid=7098
[  160.138314] 	kmem_cache_alloc+0x358/0x388
[  160.142445] 	__kernfs_new_node+0x8c/0x3c0
[  160.146590] 	kernfs_new_node+0x80/0xc8
[  160.150462] 	kernfs_create_dir_ns+0x44/0xfc
[  160.154777] 	sysfs_create_dir_ns+0xa8/0x130
[  160.158416] CPU5: update max cpu_capacity 1024
[  160.159085] 	kobject_add_internal+0x278/0x650
[  160.163567] 	kobject_add_varg+0xe0/0x130
[  160.167606] 	kobject_add+0x15c/0x1d0
[  160.168452] CPU5: update max cpu_capacity 780
[  160.171287] 	get_device_parent+0x2d0/0x34c
[  160.175510] 	device_add+0x240/0xde0
[  160.178371] CPU6: update max cpu_capacity 916
[  160.179108] 	input_register_device+0x5f4/0xa0c
[  160.183686] 	uinput_ioctl_handler+0x1184/0x2198
[  160.188346] 	uinput_ioctl+0x38/0x48
[  160.191941] 	vfs_ioctl+0x7c/0xb4
[  160.195261] 	do_vfs_ioctl+0x9ec/0x2350
[  160.199111] 	SyS_ioctl+0x6c/0xa4
[  160.202436] INFO: Freed in kernfs_put+0x2c8/0x434 age=14 cpu=0 pid=7096
[  160.209230] 	kernfs_put+0x2c8/0x434
[  160.212825] 	kobject_del+0x50/0xcc
[  160.216332] 	cleanup_glue_dir+0x124/0x16c
[  160.220456] 	device_del+0x55c/0x5c8
[  160.224047] 	__input_unregister_device+0x274/0x2a8
[  160.228974] 	input_unregister_device+0x90/0xd0
[  160.233553] 	uinput_destroy_device+0x15c/0x1dc
[  160.238131] 	uinput_release+0x44/0x5c
[  160.241898] 	__fput+0x1f4/0x4e4
[  160.245127] 	____fput+0x20/0x2c
[  160.248358] 	task_work_run+0x9c/0x174
[  160.252127] 	do_notify_resume+0x104/0x6bc
[  160.256253] 	work_pending+0x8/0x14
[  160.259751] INFO: Slab 0xffffffbf0215ff00 objects=33 used=11 fp=0xffffffc0857ffd08 flags=0x8101
[  160.268693] INFO: Object 0xffffffc0857ffd08 @offset=15624 fp=0xffffffc0857fefb0
[  160.268693]
[  160.277721] Redzone ffffffc0857ffd00: bb bb bb bb bb bb bb bb                          ........
[  160.286656] Object ffffffc0857ffd08: 00 00 00 00 01 00 00 80 58 a2 37 45 c1 ff ff ff  ........X.7E....
[  160.296207] Object ffffffc0857ffd18: ae 21 10 0b 90 ff ff ff 20 fd 7f 85 c0 ff ff ff  .!...... .......
[  160.305780] Object ffffffc0857ffd28: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
[  160.315342] Object ffffffc0857ffd38: 00 00 00 00 00 00 00 00 7d a3 25 69 00 00 00 00  ........}.%i....
[  160.324896] Object ffffffc0857ffd48: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
[  160.334446] Object ffffffc0857ffd58: 80 c0 28 47 c1 ff ff ff 00 00 00 00 00 00 00 00  ..(G............
[  160.344000] Object ffffffc0857ffd68: 80 4a ce d1 c0 ff ff ff dc 32 01 00 01 00 00 00  .J.......2......
[  160.353554] Object ffffffc0857ffd78: 11 00 ed 41 00 00 00 00 00 00 00 00 00 00 00 00  ...A............
[  160.363099] Redzone ffffffc0857ffd88: bb bb bb bb bb bb bb bb                          ........
[  160.372032] Padding ffffffc0857ffee0: 5a 5a 5a 5a 5a 5a 5a 5a                          ZZZZZZZZ
[  160.378299] CPU6: update max cpu_capacity 780
[  160.380971] CPU: 4 PID: 7098 Comm: syz-executor Tainted: G S  B   W  O    4.14.98+ #1

So, avoid the race by taking a global lock inside uinput_release().

Signed-off-by: Mukesh Ojha <mojha@codeaurora.org>
Cc: Gaurav Kohli <gkohli@codeaurora.org>
Cc: Peter Hutterer <peter.hutterer@who-t.net>
Cc: Martin Kepplinger <martink@posteo.de>
Cc: "Paul E. McKenney" <paulmck@linux.ibm.com>
---
Also, if this looks good we can further use this global lock inside
read/write in separate patches and release can happen at any time.

Changes from v1->v2:
 - Mistakenly added udev lock replaced by global lock.




 drivers/input/misc/uinput.c | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

Comments

Mukesh Ojha April 15, 2019, 10:05 a.m. UTC | #1
Hi Dmitry,

Can you please have a look at this patch ? as this seems to reproducing 
quite frequently

Thanks,
Mukesh

On 4/10/2019 1:29 PM, Mukesh Ojha wrote:
> uinput_destroy_device() gets called from two places. In one place,
> uinput_ioctl_handler() where it is protected under a lock
> udev->mutex but there is no protection on udev device from freeing
> inside uinput_release().
>
> This can result in Object-Already-Free case where uinput parent
> device already got freed while a child being inserted inside it.
> That result in a double free case for parent while kernfs_put()
> being done for child in a failure path of adding a node.
>
> [  160.093398] Call trace:
> [  160.093417]  kernfs_get+0x64/0x88
> [  160.093438]  kernfs_new_node+0x94/0xc8
> [  160.093450]  kernfs_create_dir_ns+0x44/0xfc
> [  160.093463]  sysfs_create_dir_ns+0xa8/0x130
> [  160.093479]  kobject_add_internal+0x278/0x650
> [  160.093491]  kobject_add_varg+0xe0/0x130
> [  160.093502]  kobject_add+0x15c/0x1d0
> [  160.093518]  device_add+0x2bc/0xde0
> [  160.093533]  input_register_device+0x5f4/0xa0c
> [  160.093547]  uinput_ioctl_handler+0x1184/0x2198
> [  160.093560]  uinput_ioctl+0x38/0x48
> [  160.093573]  vfs_ioctl+0x7c/0xb4
> [  160.093585]  do_vfs_ioctl+0x9ec/0x2350
> [  160.093597]  SyS_ioctl+0x6c/0xa4
> [  160.093610]  el0_svc_naked+0x34/0x38
> [  160.093621] ---[ end trace bccf0093cda2c538 ]---
> [  160.099041] =============================================================================
> [  160.107459] BUG kernfs_node_cache (Tainted: G S      W  O   ): Object already free
> [  160.115235] -----------------------------------------------------------------------------
> [  160.115235]
> [  160.125151] Disabling lock debugging due to kernel taint
> [  160.130626] INFO: Allocated in __kernfs_new_node+0x8c/0x3c0 age=11 cpu=2 pid=7098
> [  160.138314] 	kmem_cache_alloc+0x358/0x388
> [  160.142445] 	__kernfs_new_node+0x8c/0x3c0
> [  160.146590] 	kernfs_new_node+0x80/0xc8
> [  160.150462] 	kernfs_create_dir_ns+0x44/0xfc
> [  160.154777] 	sysfs_create_dir_ns+0xa8/0x130
> [  160.158416] CPU5: update max cpu_capacity 1024
> [  160.159085] 	kobject_add_internal+0x278/0x650
> [  160.163567] 	kobject_add_varg+0xe0/0x130
> [  160.167606] 	kobject_add+0x15c/0x1d0
> [  160.168452] CPU5: update max cpu_capacity 780
> [  160.171287] 	get_device_parent+0x2d0/0x34c
> [  160.175510] 	device_add+0x240/0xde0
> [  160.178371] CPU6: update max cpu_capacity 916
> [  160.179108] 	input_register_device+0x5f4/0xa0c
> [  160.183686] 	uinput_ioctl_handler+0x1184/0x2198
> [  160.188346] 	uinput_ioctl+0x38/0x48
> [  160.191941] 	vfs_ioctl+0x7c/0xb4
> [  160.195261] 	do_vfs_ioctl+0x9ec/0x2350
> [  160.199111] 	SyS_ioctl+0x6c/0xa4
> [  160.202436] INFO: Freed in kernfs_put+0x2c8/0x434 age=14 cpu=0 pid=7096
> [  160.209230] 	kernfs_put+0x2c8/0x434
> [  160.212825] 	kobject_del+0x50/0xcc
> [  160.216332] 	cleanup_glue_dir+0x124/0x16c
> [  160.220456] 	device_del+0x55c/0x5c8
> [  160.224047] 	__input_unregister_device+0x274/0x2a8
> [  160.228974] 	input_unregister_device+0x90/0xd0
> [  160.233553] 	uinput_destroy_device+0x15c/0x1dc
> [  160.238131] 	uinput_release+0x44/0x5c
> [  160.241898] 	__fput+0x1f4/0x4e4
> [  160.245127] 	____fput+0x20/0x2c
> [  160.248358] 	task_work_run+0x9c/0x174
> [  160.252127] 	do_notify_resume+0x104/0x6bc
> [  160.256253] 	work_pending+0x8/0x14
> [  160.259751] INFO: Slab 0xffffffbf0215ff00 objects=33 used=11 fp=0xffffffc0857ffd08 flags=0x8101
> [  160.268693] INFO: Object 0xffffffc0857ffd08 @offset=15624 fp=0xffffffc0857fefb0
> [  160.268693]
> [  160.277721] Redzone ffffffc0857ffd00: bb bb bb bb bb bb bb bb                          ........
> [  160.286656] Object ffffffc0857ffd08: 00 00 00 00 01 00 00 80 58 a2 37 45 c1 ff ff ff  ........X.7E....
> [  160.296207] Object ffffffc0857ffd18: ae 21 10 0b 90 ff ff ff 20 fd 7f 85 c0 ff ff ff  .!...... .......
> [  160.305780] Object ffffffc0857ffd28: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
> [  160.315342] Object ffffffc0857ffd38: 00 00 00 00 00 00 00 00 7d a3 25 69 00 00 00 00  ........}.%i....
> [  160.324896] Object ffffffc0857ffd48: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
> [  160.334446] Object ffffffc0857ffd58: 80 c0 28 47 c1 ff ff ff 00 00 00 00 00 00 00 00  ..(G............
> [  160.344000] Object ffffffc0857ffd68: 80 4a ce d1 c0 ff ff ff dc 32 01 00 01 00 00 00  .J.......2......
> [  160.353554] Object ffffffc0857ffd78: 11 00 ed 41 00 00 00 00 00 00 00 00 00 00 00 00  ...A............
> [  160.363099] Redzone ffffffc0857ffd88: bb bb bb bb bb bb bb bb                          ........
> [  160.372032] Padding ffffffc0857ffee0: 5a 5a 5a 5a 5a 5a 5a 5a                          ZZZZZZZZ
> [  160.378299] CPU6: update max cpu_capacity 780
> [  160.380971] CPU: 4 PID: 7098 Comm: syz-executor Tainted: G S  B   W  O    4.14.98+ #1
>
> So, avoid the race by taking a global lock inside uinput_release().
>
> Signed-off-by: Mukesh Ojha <mojha@codeaurora.org>
> Cc: Gaurav Kohli <gkohli@codeaurora.org>
> Cc: Peter Hutterer <peter.hutterer@who-t.net>
> Cc: Martin Kepplinger <martink@posteo.de>
> Cc: "Paul E. McKenney" <paulmck@linux.ibm.com>
> ---
> Also, if this looks good we can further use this global lock inside
> read/write in separate patches and release can happen at any time.
>
> Changes from v1->v2:
>   - Mistakenly added udev lock replaced by global lock.
>
>
>
>
>   drivers/input/misc/uinput.c | 28 ++++++++++++++++++++++++++--
>   1 file changed, 26 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
> index 26ec603f..2e7e096 100644
> --- a/drivers/input/misc/uinput.c
> +++ b/drivers/input/misc/uinput.c
> @@ -81,6 +81,8 @@ struct uinput_device {
>   	spinlock_t		requests_lock;
>   };
>   
> +static DEFINE_MUTEX(uinput_glb_mutex);
> +
>   static int uinput_dev_event(struct input_dev *dev,
>   			    unsigned int type, unsigned int code, int value)
>   {
> @@ -714,10 +716,18 @@ static __poll_t uinput_poll(struct file *file, poll_table *wait)
>   static int uinput_release(struct inode *inode, struct file *file)
>   {
>   	struct uinput_device *udev = file->private_data;
> +	ssize_t retval;
> +
> +	retval = mutex_lock_interruptible(&uinput_glb_mutex);
> +	if (retval)
> +		return retval;
>   
>   	uinput_destroy_device(udev);
> +	file->private_data = NULL;
>   	kfree(udev);
>   
> +	mutex_unlock(&uinput_glb_mutex);
> +
>   	return 0;
>   }
>   
> @@ -848,7 +858,7 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
>   				 unsigned long arg, void __user *p)
>   {
>   	int			retval;
> -	struct uinput_device	*udev = file->private_data;
> +	struct uinput_device	*udev;
>   	struct uinput_ff_upload ff_up;
>   	struct uinput_ff_erase  ff_erase;
>   	struct uinput_request   *req;
> @@ -856,10 +866,20 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
>   	const char		*name;
>   	unsigned int		size;
>   
> -	retval = mutex_lock_interruptible(&udev->mutex);
> +	retval = mutex_lock_interruptible(&uinput_glb_mutex);
>   	if (retval)
>   		return retval;
>   
> +	udev = file->private_data;
> +	if (!udev) {
> +		retval = -EINVAL;
> +		goto unlock_glb_mutex;
> +	}
> +
> +	retval = mutex_lock_interruptible(&udev->mutex);
> +	if (retval)
> +		goto unlock_glb_mutex;
> +
>   	if (!udev->dev) {
>   		udev->dev = input_allocate_device();
>   		if (!udev->dev) {
> @@ -1039,8 +1059,12 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
>   	}
>   
>   	retval = -EINVAL;
> +
>    out:
>   	mutex_unlock(&udev->mutex);
> +
> + unlock_glb_mutex:
> +	mutex_unlock(&uinput_glb_mutex);
>   	return retval;
>   }
>
Dmitry Torokhov April 18, 2019, 1:43 a.m. UTC | #2
Hi Mukesh,

On Mon, Apr 15, 2019 at 03:35:51PM +0530, Mukesh Ojha wrote:
> 
> Hi Dmitry,
> 
> Can you please have a look at this patch ? as this seems to reproducing
> quite frequently
> 
> Thanks,
> Mukesh
> 
> On 4/10/2019 1:29 PM, Mukesh Ojha wrote:
> > uinput_destroy_device() gets called from two places. In one place,
> > uinput_ioctl_handler() where it is protected under a lock
> > udev->mutex but there is no protection on udev device from freeing
> > inside uinput_release().

uinput_release() should be called when last file handle to the uinput
instance is being dropped, so there should be no other users and thus we
can't be racing with anyone.

> > 
> > This can result in Object-Already-Free case where uinput parent
> > device already got freed while a child being inserted inside it.
> > That result in a double free case for parent while kernfs_put()
> > being done for child in a failure path of adding a node.

Can you please describe scenario in more detail? How do you free the
parent device while child input device is being registered?

Thanks.

- 
Dmitry
Dmitry Torokhov April 19, 2019, 7:11 a.m. UTC | #3
Hi Mukesh,

On Fri, Apr 19, 2019 at 12:17:44PM +0530, Mukesh Ojha wrote:
> For some reason my last mail did not get delivered,  sending it again.
> 
> 
> On 4/18/2019 11:55 AM, Mukesh Ojha wrote:
> > 
> > 
> > On 4/18/2019 7:13 AM, dmitry.torokhov@gmail.com wrote:
> > > Hi Mukesh,
> > > 
> > > On Mon, Apr 15, 2019 at 03:35:51PM +0530, Mukesh Ojha wrote:
> > > > Hi Dmitry,
> > > > 
> > > > Can you please have a look at this patch ? as this seems to reproducing
> > > > quite frequently
> > > > 
> > > > Thanks,
> > > > Mukesh
> > > > 
> > > > On 4/10/2019 1:29 PM, Mukesh Ojha wrote:
> > > > > uinput_destroy_device() gets called from two places. In one place,
> > > > > uinput_ioctl_handler() where it is protected under a lock
> > > > > udev->mutex but there is no protection on udev device from freeing
> > > > > inside uinput_release().
> > > uinput_release() should be called when last file handle to the uinput
> > > instance is being dropped, so there should be no other users and thus we
> > > can't be racing with anyone.
> > 
> > Lets say an example where i am creating input device quite frequently
> > 
> > [   97.836603] input: syz0 as /devices/virtual/input/input262
> > [   97.845589] input: syz0 as /devices/virtual/input/input261
> > [   97.849415] input: syz0 as /devices/virtual/input/input263
> > [   97.856479] input: syz0 as /devices/virtual/input/input264
> > [   97.936128] input: syz0 as /devices/virtual/input/input265
> > 
> > e.g input265
> > 
> > while input265 gets created [1] and handlers are getting registered on
> > that device*fput* gets called on
> > that device as user space got to know that input265 is created and its
> > reference is still 1(rare but possible).

Are you saying that there are 2 threads sharing the same file
descriptor, one issuing the registration ioctl while the other closing
the same fd?

Thanks.
Mukesh Ojha April 19, 2019, 8:43 a.m. UTC | #4
On 4/19/2019 12:41 PM, dmitry.torokhov@gmail.com wrote:
> Hi Mukesh,
>
> On Fri, Apr 19, 2019 at 12:17:44PM +0530, Mukesh Ojha wrote:
>> For some reason my last mail did not get delivered,  sending it again.
>>
>>
>> On 4/18/2019 11:55 AM, Mukesh Ojha wrote:
>>>
>>> On 4/18/2019 7:13 AM, dmitry.torokhov@gmail.com wrote:
>>>> Hi Mukesh,
>>>>
>>>> On Mon, Apr 15, 2019 at 03:35:51PM +0530, Mukesh Ojha wrote:
>>>>> Hi Dmitry,
>>>>>
>>>>> Can you please have a look at this patch ? as this seems to reproducing
>>>>> quite frequently
>>>>>
>>>>> Thanks,
>>>>> Mukesh
>>>>>
>>>>> On 4/10/2019 1:29 PM, Mukesh Ojha wrote:
>>>>>> uinput_destroy_device() gets called from two places. In one place,
>>>>>> uinput_ioctl_handler() where it is protected under a lock
>>>>>> udev->mutex but there is no protection on udev device from freeing
>>>>>> inside uinput_release().
>>>> uinput_release() should be called when last file handle to the uinput
>>>> instance is being dropped, so there should be no other users and thus we
>>>> can't be racing with anyone.
>>> Lets say an example where i am creating input device quite frequently
>>>
>>> [   97.836603] input: syz0 as /devices/virtual/input/input262
>>> [   97.845589] input: syz0 as /devices/virtual/input/input261
>>> [   97.849415] input: syz0 as /devices/virtual/input/input263
>>> [   97.856479] input: syz0 as /devices/virtual/input/input264
>>> [   97.936128] input: syz0 as /devices/virtual/input/input265
>>>
>>> e.g input265
>>>
>>> while input265 gets created [1] and handlers are getting registered on
>>> that device*fput* gets called on
>>> that device as user space got to know that input265 is created and its
>>> reference is still 1(rare but possible).
> Are you saying that there are 2 threads sharing the same file
> descriptor, one issuing the registration ioctl while the other closing
> the same fd?

Dmitry,

I don't have a the exact look inside the app here, but this looks like 
the same as it is able to do
fput on the uinput device.

FYI
Syskaller app is running in userspace (which is for syscall fuzzing) on 
kernel which is enabled
with various config fault injection, FAULT_INJECTION,FAIL_SLAB, 
FAIL_PAGEALLOC, KASAN etc.

Thanks,
Mukesh

>
> Thanks.
>
Dmitry Torokhov April 23, 2019, 3:28 a.m. UTC | #5
On Fri, Apr 19, 2019 at 02:13:48PM +0530, Mukesh Ojha wrote:
> 
> On 4/19/2019 12:41 PM, dmitry.torokhov@gmail.com wrote:
> > Hi Mukesh,
> > 
> > On Fri, Apr 19, 2019 at 12:17:44PM +0530, Mukesh Ojha wrote:
> > > For some reason my last mail did not get delivered,  sending it again.
> > > 
> > > 
> > > On 4/18/2019 11:55 AM, Mukesh Ojha wrote:
> > > > 
> > > > On 4/18/2019 7:13 AM, dmitry.torokhov@gmail.com wrote:
> > > > > Hi Mukesh,
> > > > > 
> > > > > On Mon, Apr 15, 2019 at 03:35:51PM +0530, Mukesh Ojha wrote:
> > > > > > Hi Dmitry,
> > > > > > 
> > > > > > Can you please have a look at this patch ? as this seems to reproducing
> > > > > > quite frequently
> > > > > > 
> > > > > > Thanks,
> > > > > > Mukesh
> > > > > > 
> > > > > > On 4/10/2019 1:29 PM, Mukesh Ojha wrote:
> > > > > > > uinput_destroy_device() gets called from two places. In one place,
> > > > > > > uinput_ioctl_handler() where it is protected under a lock
> > > > > > > udev->mutex but there is no protection on udev device from freeing
> > > > > > > inside uinput_release().
> > > > > uinput_release() should be called when last file handle to the uinput
> > > > > instance is being dropped, so there should be no other users and thus we
> > > > > can't be racing with anyone.
> > > > Lets say an example where i am creating input device quite frequently
> > > > 
> > > > [   97.836603] input: syz0 as /devices/virtual/input/input262
> > > > [   97.845589] input: syz0 as /devices/virtual/input/input261
> > > > [   97.849415] input: syz0 as /devices/virtual/input/input263
> > > > [   97.856479] input: syz0 as /devices/virtual/input/input264
> > > > [   97.936128] input: syz0 as /devices/virtual/input/input265
> > > > 
> > > > e.g input265
> > > > 
> > > > while input265 gets created [1] and handlers are getting registered on
> > > > that device*fput* gets called on
> > > > that device as user space got to know that input265 is created and its
> > > > reference is still 1(rare but possible).
> > Are you saying that there are 2 threads sharing the same file
> > descriptor, one issuing the registration ioctl while the other closing
> > the same fd?
> 
> Dmitry,
> 
> I don't have a the exact look inside the app here, but this looks like the
> same as it is able to do
> fput on the uinput device.
> 
> FYI
> Syskaller app is running in userspace (which is for syscall fuzzing) on
> kernel which is enabled
> with various config fault injection, FAULT_INJECTION,FAIL_SLAB,
> FAIL_PAGEALLOC, KASAN etc.

Mukesh,

We need to understand exactly the failure mode. I do not think that
introducing another mutex into uinput actually fixes the issue, as we do
not order mutex acquisition, so I think it is still possible for the
release function to acquire the mutex and run first, and then ioctl
would run with freed object.

My guess that this needs to be fixed in VFS layer.

Thanks.
Dmitry Torokhov April 23, 2019, 8:49 a.m. UTC | #6
On Tue, Apr 23, 2019 at 12:51:13PM +0530, Mukesh Ojha wrote:
> On 4/23/2019 8:58 AM, dmitry.torokhov@gmail.com wrote:
> > On Fri, Apr 19, 2019 at 02:13:48PM +0530, Mukesh Ojha wrote:
> > > On 4/19/2019 12:41 PM, dmitry.torokhov@gmail.com wrote:
> > > > On Fri, Apr 19, 2019 at 12:17:44PM +0530, Mukesh Ojha wrote:
> > > > > On 4/18/2019 11:55 AM, Mukesh Ojha wrote:
> > > > > > On 4/18/2019 7:13 AM, dmitry.torokhov@gmail.com wrote:
> > > > > > > > On 4/10/2019 1:29 PM, Mukesh Ojha wrote:
> > > > > > > > > uinput_destroy_device() gets called from two places. In one place,
> > > > > > > > > uinput_ioctl_handler() where it is protected under a lock
> > > > > > > > > udev->mutex but there is no protection on udev device from freeing
> > > > > > > > > inside uinput_release().
> > > > > > > uinput_release() should be called when last file handle to the uinput
> > > > > > > instance is being dropped, so there should be no other users and thus we
> > > > > > > can't be racing with anyone.
> > > > > > Lets say an example where i am creating input device quite frequently
> > > > > > 
> > > > > > [   97.836603] input: syz0 as /devices/virtual/input/input262
> > > > > > [   97.845589] input: syz0 as /devices/virtual/input/input261
> > > > > > [   97.849415] input: syz0 as /devices/virtual/input/input263
> > > > > > [   97.856479] input: syz0 as /devices/virtual/input/input264
> > > > > > [   97.936128] input: syz0 as /devices/virtual/input/input265
> > > > > > 
> > > > > > e.g input265
> > > > > > 
> > > > > > while input265 gets created [1] and handlers are getting registered on
> > > > > > that device*fput* gets called on
> > > > > > that device as user space got to know that input265 is created and its
> > > > > > reference is still 1(rare but possible).
> > > > Are you saying that there are 2 threads sharing the same file
> > > > descriptor, one issuing the registration ioctl while the other closing
> > > > the same fd?
> > > Dmitry,
> > > 
> > > I don't have a the exact look inside the app here, but this looks like the
> > > same as it is able to do
> > > fput on the uinput device.
> > > 
> > > FYI
> > > Syskaller app is running in userspace (which is for syscall fuzzing) on
> > > kernel which is enabled
> > > with various config fault injection, FAULT_INJECTION,FAIL_SLAB,
> > > FAIL_PAGEALLOC, KASAN etc.
> > Mukesh,
> > 
> > We need to understand exactly the failure mode. I do not think that
> > introducing another mutex into uinput actually fixes the issue, as we do
> > not order mutex acquisition, so I think it is still possible for the
> > release function to acquire the mutex and run first, and then ioctl
> > would run with freed object.
> 
> I have taken care this case from ioctl and release point of view.
> 
> Even if the release gets called first it will make the
> file->private_data=NULL.
> and further call to ioctl will not be a problem as the check is already
> there.

Al, do we have any protections in VFS layer from userspace hanging onto
a file descriptor and calling ioctl() on it even as another thread
calls close() on the same fd?

Should the issue be solved by individual drivers, or more careful
accounting for currently running operations is needed at VFS layer?

Thanks.
Al Viro April 23, 2019, 11:06 a.m. UTC | #7
On Tue, Apr 23, 2019 at 08:49:44AM +0000, dmitry.torokhov@gmail.com wrote:
> On Tue, Apr 23, 2019 at 12:51:13PM +0530, Mukesh Ojha wrote:

> > I have taken care this case from ioctl and release point of view.
> > 
> > Even if the release gets called first it will make the
> > file->private_data=NULL.
> > and further call to ioctl will not be a problem as the check is already
> > there.
> 
> Al, do we have any protections in VFS layer from userspace hanging onto
> a file descriptor and calling ioctl() on it even as another thread
> calls close() on the same fd?
> 
> Should the issue be solved by individual drivers, or more careful
> accounting for currently running operations is needed at VFS layer?

Neither.  An overlap of ->release() and ->ioctl() is possible only
if you've got memory corruption somewhere.

close() overlapping ioctl() is certainly possible, and won't trigger
that at all - sys_ioctl() holds onto reference to struct file, so
its refcount won't reach zero until we are done with it.
Al Viro April 23, 2019, 12:15 p.m. UTC | #8
On Tue, Apr 23, 2019 at 12:06:12PM +0100, Al Viro wrote:
> On Tue, Apr 23, 2019 at 08:49:44AM +0000, dmitry.torokhov@gmail.com wrote:
> > On Tue, Apr 23, 2019 at 12:51:13PM +0530, Mukesh Ojha wrote:
> 
> > > I have taken care this case from ioctl and release point of view.
> > > 
> > > Even if the release gets called first it will make the
> > > file->private_data=NULL.
> > > and further call to ioctl will not be a problem as the check is already
> > > there.
> > 
> > Al, do we have any protections in VFS layer from userspace hanging onto
> > a file descriptor and calling ioctl() on it even as another thread
> > calls close() on the same fd?
> > 
> > Should the issue be solved by individual drivers, or more careful
> > accounting for currently running operations is needed at VFS layer?
> 
> Neither.  An overlap of ->release() and ->ioctl() is possible only
> if you've got memory corruption somewhere.
> 
> close() overlapping ioctl() is certainly possible, and won't trigger
> that at all - sys_ioctl() holds onto reference to struct file, so
> its refcount won't reach zero until we are done with it.

I'd like to see a reproducer; _if_ we are really getting such overlaps,
we have something buggering struct file refcounts (e.g. stray fput()
eventually leading to dangling pointer in descriptor table) or an
outright memory corruption mangling descriptor table/kernel stack/
refcount/->private_data/etc.

Details, please - I tried to google some context for that, but hadn't
found anything useful ;-/
Mukesh Ojha April 24, 2019, 12:10 p.m. UTC | #9
On 4/23/2019 4:36 PM, Al Viro wrote:
> On Tue, Apr 23, 2019 at 08:49:44AM +0000, dmitry.torokhov@gmail.com wrote:
>> On Tue, Apr 23, 2019 at 12:51:13PM +0530, Mukesh Ojha wrote:
>>> I have taken care this case from ioctl and release point of view.
>>>
>>> Even if the release gets called first it will make the
>>> file->private_data=NULL.
>>> and further call to ioctl will not be a problem as the check is already
>>> there.
>> Al, do we have any protections in VFS layer from userspace hanging onto
>> a file descriptor and calling ioctl() on it even as another thread
>> calls close() on the same fd?
>>
>> Should the issue be solved by individual drivers, or more careful
>> accounting for currently running operations is needed at VFS layer?
> Neither.  An overlap of ->release() and ->ioctl() is possible only
> if you've got memory corruption somewhere.
>
> close() overlapping ioctl() is certainly possible, and won't trigger
> that at all - sys_ioctl() holds onto reference to struct file, so
> its refcount won't reach zero until we are done with it.

Al,

i tried to put traceprintk inside ioctl after fdget and fdput on a 
simple call of open  => ioctl => close
on /dev/uinput.

           uinput-532   [002] ....    45.312044: SYSC_ioctl: 2     <= 
f_count >    <After fdget()
           uinput-532   [002] ....    45.312055: SYSC_ioctl: 
2            <After fdput()
           uinput-532   [004] ....    45.313766: uinput_open: uinput: 1
           uinput-532   [004] ....    45.313783: SYSC_ioctl: 1
           uinput-532   [004] ....    45.313788: uinput_ioctl_handler: 
uinput: uinput_ioctl_handler, 1
           uinput-532   [004] ....    45.313835: SYSC_ioctl: 1
           uinput-532   [004] ....    45.313843: uinput_release: uinput:  0


So while a ioctl is running the f_count is 1, so a fput could be run and 
do atomic_long_dec_and_test
this could call release right ?

-Mukesh
Al Viro April 24, 2019, 1:07 p.m. UTC | #10
On Wed, Apr 24, 2019 at 05:40:40PM +0530, Mukesh Ojha wrote:
> 
> Al,
> 
> i tried to put traceprintk inside ioctl after fdget and fdput on a simple
> call of open  => ioctl => close

in a loop, and multithreaded, presumably?

> on /dev/uinput.
> 
>           uinput-532   [002] ....    45.312044: SYSC_ioctl: 2     <= f_count
> >    <After fdget()
>           uinput-532   [002] ....    45.312055: SYSC_ioctl: 2           
> <After fdput()
>           uinput-532   [004] ....    45.313766: uinput_open: uinput: 1
>           uinput-532   [004] ....    45.313783: SYSC_ioctl: 1
>           uinput-532   [004] ....    45.313788: uinput_ioctl_handler:
> uinput: uinput_ioctl_handler, 1
>           uinput-532   [004] ....    45.313835: SYSC_ioctl: 1
>           uinput-532   [004] ....    45.313843: uinput_release: uinput:  0
> 
> 
> So while a ioctl is running the f_count is 1, so a fput could be run and do
> atomic_long_dec_and_test
> this could call release right ?

Look at ksys_ioctl():
int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
{
        int error;
        struct fd f = fdget(fd);
an error or refcount bumped
        if (!f.file)
                return -EBADF;
not an error, then.  We know that ->release() won't be called
until we drop the reference we've just acquired.
        error = security_file_ioctl(f.file, cmd, arg);
        if (!error)
                error = do_vfs_ioctl(f.file, fd, cmd, arg);
... and we are done with calling ->ioctl(), so
        fdput(f);
... we drop the reference we'd acquired.

Seeing refcount 1 inside ->ioctl() is possible, all right:

CPU1: ioctl(2) resolves fd to struct file *, refcount 2
CPU2: close(2) rips struct file * from descriptor table and does fput() to drop it;
	refcount reaches 1 and fput() is done; no call of ->release() yet.
CPU1: we get arouund to ->ioctl(), where your trace sees refcount 1
CPU1: done with ->ioctl(), drop our reference.  *NOW* refcount gets to 0, and
	->release() is called.

IOW, in your trace fput() has already been run by close(2); having somebody else
do that again while we are in ->ioctl() would be a bug (to start with, where
did they get that struct file * and why wasn't that reference contributing to
struct file refcount?)

In all cases we only call ->release() once all references gone - both
the one(s) in descriptor tables and any transient ones acquired by
fdget(), etc.

I would really like to see a reproducer for the original use-after-free report...
Mukesh Ojha April 24, 2019, 2:09 p.m. UTC | #11
On 4/24/2019 6:37 PM, Al Viro wrote:
> On Wed, Apr 24, 2019 at 05:40:40PM +0530, Mukesh Ojha wrote:
>> Al,
>>
>> i tried to put traceprintk inside ioctl after fdget and fdput on a simple
>> call of open  => ioctl => close
> in a loop, and multithreaded, presumably?
>
>> on /dev/uinput.
>>
>>            uinput-532   [002] ....    45.312044: SYSC_ioctl: 2     <= f_count
>>>      <After fdget()
>>            uinput-532   [002] ....    45.312055: SYSC_ioctl: 2
>> <After fdput()
>>            uinput-532   [004] ....    45.313766: uinput_open: uinput: 1
>>            uinput-532   [004] ....    45.313783: SYSC_ioctl: 1
>>            uinput-532   [004] ....    45.313788: uinput_ioctl_handler:
>> uinput: uinput_ioctl_handler, 1
>>            uinput-532   [004] ....    45.313835: SYSC_ioctl: 1
>>            uinput-532   [004] ....    45.313843: uinput_release: uinput:  0
>>
>>
>> So while a ioctl is running the f_count is 1, so a fput could be run and do
>> atomic_long_dec_and_test
>> this could call release right ?
> Look at ksys_ioctl():
> int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
> {
>          int error;
>          struct fd f = fdget(fd);
> an error or refcount bumped
>          if (!f.file)
>                  return -EBADF;
> not an error, then.  We know that ->release() won't be called
> until we drop the reference we've just acquired.
>          error = security_file_ioctl(f.file, cmd, arg);
>          if (!error)
>                  error = do_vfs_ioctl(f.file, fd, cmd, arg);
> ... and we are done with calling ->ioctl(), so
>          fdput(f);
> ... we drop the reference we'd acquired.
>
> Seeing refcount 1 inside ->ioctl() is possible, all right:
>
> CPU1: ioctl(2) resolves fd to struct file *, refcount 2
> CPU2: close(2) rips struct file * from descriptor table and does fput() to drop it;
> 	refcount reaches 1 and fput() is done; no call of ->release() yet.
> CPU1: we get arouund to ->ioctl(), where your trace sees refcount 1
> CPU1: done with ->ioctl(), drop our reference.  *NOW* refcount gets to 0, and
> 	->release() is called.

Thanks for the detail reply, Al

This was my simple program no multithreading just to understand f_counting

int main()
{
         int fd = open("/dev/uinput", O_WRONLY | O_NONBLOCK);
         ioctl(fd, UI_SET_EVBIT, EV_KEY);
         close(fd);
         return 0;
}

            uinput-532   [002] ....    45.312044: SYSC_ioctl: 2   <= 
f_count >    <After fdget()
           uinput-532   [002] ....    45.312055: SYSC_ioctl: 
2            <After fdput()
           uinput-532   [004] ....    45.313766: uinput_open: uinput: 
1   /* This is from the uinput driver uinput_open()*/

   =>>>>                         /* All the above calls happened for the 
open() in userspace*/

           uinput-532   [004] ....    45.313783: SYSC_ioctl: 1 /* This 
print is for the trace, i put after fdget */
           uinput-532   [004] ....    45.313788: uinput_ioctl_handler: 
uinput: uinput_ioctl_handler, 1 /* This print is from the uinput_ioctl 
driver */

           uinput-532   [004] ....    45.313835: SYSC_ioctl: 1 /* This 
print is for the trace, i put after fdput*/
           uinput-532   [004] ....    45.313843: uinput_release: 
uinput:  0 /* And this is from the close()  */


Should fdget not suppose to increment the f_count here, as it is coming 1 ?
This f_count to one is done at the open, but i have no idea how this  
below f_count 2 came before open() for
this simple program.

          uinput-532   [002] ....    45.312044: SYSC_ioctl: 2 <= f_count 
 >    <After fdget()
           uinput-532   [002] ....    45.312055: SYSC_ioctl: 
2            <After fdput()

-Mukesh

> IOW, in your trace fput() has already been run by close(2); having somebody else
> do that again while we are in ->ioctl() would be a bug (to start with, where
> did they get that struct file * and why wasn't that reference contributing to
> struct file refcount?)
>
> In all cases we only call ->release() once all references gone - both
> the one(s) in descriptor tables and any transient ones acquired by
> fdget(), etc.
>
> I would really like to see a reproducer for the original use-after-free report...
Al Viro April 24, 2019, 10:56 p.m. UTC | #12
On Wed, Apr 24, 2019 at 07:39:03PM +0530, Mukesh Ojha wrote:

> This was my simple program no multithreading just to understand f_counting
> 
> int main()
> {
>         int fd = open("/dev/uinput", O_WRONLY | O_NONBLOCK);
>         ioctl(fd, UI_SET_EVBIT, EV_KEY);
>         close(fd);
>         return 0;
> }
> 
>            uinput-532   [002] ....    45.312044: SYSC_ioctl: 2   <= f_count

Er...  So how does it manage to hit ioctl(2) before open(2)?  Confused...

> >    <After fdget()
>           uinput-532   [002] ....    45.312055: SYSC_ioctl: 2           
> <After fdput()
>           uinput-532   [004] ....    45.313766: uinput_open: uinput: 1   /*
> This is from the uinput driver uinput_open()*/
> 
>   =>>>>                         /* All the above calls happened for the
> open() in userspace*/
> 
>           uinput-532   [004] ....    45.313783: SYSC_ioctl: 1 /* This print
> is for the trace, i put after fdget */
>           uinput-532   [004] ....    45.313788: uinput_ioctl_handler:
> uinput: uinput_ioctl_handler, 1 /* This print is from the uinput_ioctl
> driver */
> 
>           uinput-532   [004] ....    45.313835: SYSC_ioctl: 1 /* This print
> is for the trace, i put after fdput*/
>           uinput-532   [004] ....    45.313843: uinput_release: uinput:  0
> /* And this is from the close()  */
> 
> 
> Should fdget not suppose to increment the f_count here, as it is coming 1 ?
> This f_count to one is done at the open, but i have no idea how this  below
> f_count 2 came before open() for
> this simple program.

If descriptor table is not shared, fdget() will simply return you the reference
from there, without bothering to bump the refcount.  _And_ having it marked
"don't drop refcount" in struct fd.

Rationale: since it's not shared, nobody other than our process can modify
it.  So unless we remove (and drop) the reference from it ourselves (which
we certainly have no business doing in ->ioctl() and do not do anywhere
in drivers/input), it will remain there until we return from syscall.

Nobody is allowed to modify descriptor table other than their own.
And if it's not shared, no other owners can appear while the only
existing one is in the middle of syscall other than clone() (with
CLONE_FILES in flags, at that).

For shared descriptor table fdget() bumps file refcount, since there
the reference in descriptor table itself could be removed and dropped
by another thread.

And fdget() marks whether fput() is needed in fd.flags, so that
fdput() does the right thing.
Mukesh Ojha May 1, 2019, 7:50 a.m. UTC | #13
Sorry to come late on this

On 4/25/2019 4:26 AM, Al Viro wrote:
> On Wed, Apr 24, 2019 at 07:39:03PM +0530, Mukesh Ojha wrote:
>
>> This was my simple program no multithreading just to understand f_counting
>>
>> int main()
>> {
>>          int fd = open("/dev/uinput", O_WRONLY | O_NONBLOCK);
>>          ioctl(fd, UI_SET_EVBIT, EV_KEY);
>>          close(fd);
>>          return 0;
>> }
>>
>>             uinput-532   [002] ....    45.312044: SYSC_ioctl: 2   <= f_count
> Er...  So how does it manage to hit ioctl(2) before open(2)?  Confused...

I was confused too about this earlier, but after printing fd got to know 
this is not for the same fd
opening for /dev/uinput, may it is for something while running the 
executable.

>
>>>      <After fdget()
>>            uinput-532   [002] ....    45.312055: SYSC_ioctl: 2
>> <After fdput()
>>            uinput-532   [004] ....    45.313766: uinput_open: uinput: 1   /*
>> This is from the uinput driver uinput_open()*/
>>
>>    =>>>>                         /* All the above calls happened for the
>> open() in userspace*/
>>
>>            uinput-532   [004] ....    45.313783: SYSC_ioctl: 1 /* This print
>> is for the trace, i put after fdget */
>>            uinput-532   [004] ....    45.313788: uinput_ioctl_handler:
>> uinput: uinput_ioctl_handler, 1 /* This print is from the uinput_ioctl
>> driver */
>>
>>            uinput-532   [004] ....    45.313835: SYSC_ioctl: 1 /* This print
>> is for the trace, i put after fdput*/
>>            uinput-532   [004] ....    45.313843: uinput_release: uinput:  0
>> /* And this is from the close()  */
>>
>>
>> Should fdget not suppose to increment the f_count here, as it is coming 1 ?
>> This f_count to one is done at the open, but i have no idea how this  below
>> f_count 2 came before open() for
>> this simple program.
> If descriptor table is not shared, fdget() will simply return you the reference
> from there, without bothering to bump the refcount.  _And_ having it marked
> "don't drop refcount" in struct fd.
>
> Rationale: since it's not shared, nobody other than our process can modify
> it.  So unless we remove (and drop) the reference from it ourselves (which
> we certainly have no business doing in ->ioctl() and do not do anywhere
> in drivers/input), it will remain there until we return from syscall.
>
> Nobody is allowed to modify descriptor table other than their own.
> And if it's not shared, no other owners can appear while the only
> existing one is in the middle of syscall other than clone() (with
> CLONE_FILES in flags, at that).
>
> For shared descriptor table fdget() bumps file refcount, since there
> the reference in descriptor table itself could be removed and dropped
> by another thread.
>
> And fdget() marks whether fput() is needed in fd.flags, so that
> fdput() does the right thing.


Thanks Al, it is quite clear that issue can't happen while a ioctl is in 
progress.
Actually the issue seems to be a race while glue_dir input is removed.

   114.339374] input: syz1 as /devices/virtual/input/input278
[  114.345619] input: syz1 as /devices/virtual/input/input279
[  114.353502] input: syz1 as /devices/virtual/input/input280
[  114.361907] input: syz1 as /devices/virtual/input/input281
[  114.367276] input: syz1 as /devices/virtual/input/input282
[  114.382292] input: syz1 as /devices/virtual/input/input283

in our case it is input which is getting removed while a inputxx is 
trying make node inside input.

Similar issue https://lkml.org/lkml/2019/5/1/3

Thanks,
Mukesh
diff mbox series

Patch

diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index 26ec603f..2e7e096 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -81,6 +81,8 @@  struct uinput_device {
 	spinlock_t		requests_lock;
 };
 
+static DEFINE_MUTEX(uinput_glb_mutex);
+
 static int uinput_dev_event(struct input_dev *dev,
 			    unsigned int type, unsigned int code, int value)
 {
@@ -714,10 +716,18 @@  static __poll_t uinput_poll(struct file *file, poll_table *wait)
 static int uinput_release(struct inode *inode, struct file *file)
 {
 	struct uinput_device *udev = file->private_data;
+	ssize_t retval;
+
+	retval = mutex_lock_interruptible(&uinput_glb_mutex);
+	if (retval)
+		return retval;
 
 	uinput_destroy_device(udev);
+	file->private_data = NULL;
 	kfree(udev);
 
+	mutex_unlock(&uinput_glb_mutex);
+
 	return 0;
 }
 
@@ -848,7 +858,7 @@  static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
 				 unsigned long arg, void __user *p)
 {
 	int			retval;
-	struct uinput_device	*udev = file->private_data;
+	struct uinput_device	*udev;
 	struct uinput_ff_upload ff_up;
 	struct uinput_ff_erase  ff_erase;
 	struct uinput_request   *req;
@@ -856,10 +866,20 @@  static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
 	const char		*name;
 	unsigned int		size;
 
-	retval = mutex_lock_interruptible(&udev->mutex);
+	retval = mutex_lock_interruptible(&uinput_glb_mutex);
 	if (retval)
 		return retval;
 
+	udev = file->private_data;
+	if (!udev) {
+		retval = -EINVAL;
+		goto unlock_glb_mutex;
+	}
+
+	retval = mutex_lock_interruptible(&udev->mutex);
+	if (retval)
+		goto unlock_glb_mutex;
+
 	if (!udev->dev) {
 		udev->dev = input_allocate_device();
 		if (!udev->dev) {
@@ -1039,8 +1059,12 @@  static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
 	}
 
 	retval = -EINVAL;
+
  out:
 	mutex_unlock(&udev->mutex);
+
+ unlock_glb_mutex:
+	mutex_unlock(&uinput_glb_mutex);
 	return retval;
 }