diff mbox

[PATCH/RFC,1/2] v4l: v4l2-dev: Add infrastructure to protect device unplug race

Message ID 20171116003349.19235-2-laurent.pinchart+renesas@ideasonboard.com (mailing list archive)
State New, archived
Headers show

Commit Message

Laurent Pinchart Nov. 16, 2017, 12:33 a.m. UTC
Device unplug being asynchronous, it naturally races with operations
performed by userspace through ioctls or other file operations on video
device nodes.

This leads to potential access to freed memory or to other resources
during device access if unplug occurs during device access. To solve
this, we need to wait until all device access completes when unplugging
the device, and block all further access when the device is being
unplugged.

Three new functions are introduced. The video_device_enter() and
video_device_exit() functions must be used to mark entry and exit from
all code sections where the device can be accessed. The
video_device_unplug() function is then used in the unplug handler to
mark the device as being unplugged and wait for all access to complete.

As an example mark the ioctl handler as a device access section. Other
file operations need to be protected too, and blocking ioctls (such as
VIDIOC_DQBUF) need to be handled as well.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
---
 drivers/media/v4l2-core/v4l2-dev.c | 57 ++++++++++++++++++++++++++++++++++++++
 include/media/v4l2-dev.h           | 47 +++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+)

Comments

Sakari Ailus Nov. 16, 2017, 12:32 p.m. UTC | #1
Hi Laurent,

Thank you for the initiative to bring up and address the matter!

On Thu, Nov 16, 2017 at 02:33:48AM +0200, Laurent Pinchart wrote:
> Device unplug being asynchronous, it naturally races with operations
> performed by userspace through ioctls or other file operations on video
> device nodes.
> 
> This leads to potential access to freed memory or to other resources
> during device access if unplug occurs during device access. To solve
> this, we need to wait until all device access completes when unplugging
> the device, and block all further access when the device is being
> unplugged.
> 
> Three new functions are introduced. The video_device_enter() and
> video_device_exit() functions must be used to mark entry and exit from
> all code sections where the device can be accessed. The

I wonder if it'd help splitting this patch into two: one that introduces
the mechanism and the other that uses it. Up to you.

Nevertheless, it'd be better to have other system calls covered as well.

> video_device_unplug() function is then used in the unplug handler to
> mark the device as being unplugged and wait for all access to complete.
> 
> As an example mark the ioctl handler as a device access section. Other
> file operations need to be protected too, and blocking ioctls (such as
> VIDIOC_DQBUF) need to be handled as well.
> 
> Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
> ---
>  drivers/media/v4l2-core/v4l2-dev.c | 57 ++++++++++++++++++++++++++++++++++++++
>  include/media/v4l2-dev.h           | 47 +++++++++++++++++++++++++++++++
>  2 files changed, 104 insertions(+)
> 
> diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
> index c647ba648805..c73c6d49e7cf 100644
> --- a/drivers/media/v4l2-core/v4l2-dev.c
> +++ b/drivers/media/v4l2-core/v4l2-dev.c
> @@ -156,6 +156,52 @@ void video_device_release_empty(struct video_device *vdev)
>  }
>  EXPORT_SYMBOL(video_device_release_empty);
>  
> +int video_device_enter(struct video_device *vdev)
> +{
> +	bool unplugged;
> +
> +	spin_lock(&vdev->unplug_lock);
> +	unplugged = vdev->unplugged;
> +	if (!unplugged)
> +		vdev->access_refcount++;
> +	spin_unlock(&vdev->unplug_lock);
> +
> +	return unplugged ? -ENODEV : 0;
> +}
> +EXPORT_SYMBOL_GPL(video_device_enter);
> +
> +void video_device_exit(struct video_device *vdev)
> +{
> +	bool wake_up;
> +
> +	spin_lock(&vdev->unplug_lock);
> +	WARN_ON(--vdev->access_refcount < 0);
> +	wake_up = vdev->access_refcount == 0;
> +	spin_unlock(&vdev->unplug_lock);
> +
> +	if (wake_up)
> +		wake_up(&vdev->unplug_wait);
> +}
> +EXPORT_SYMBOL_GPL(video_device_exit);

Is there a need to export the two, i.e. wouldn't you only call them from
the framework, or the same module?

> +
> +void video_device_unplug(struct video_device *vdev)
> +{
> +	bool unplug_blocked;
> +
> +	spin_lock(&vdev->unplug_lock);
> +	unplug_blocked = vdev->access_refcount > 0;
> +	vdev->unplugged = true;

Shouldn't this be set to false in video_register_device()?

> +	spin_unlock(&vdev->unplug_lock);
> +
> +	if (!unplug_blocked)
> +		return;

Not necessary, wait_event_timeout() handles this already.

> +
> +	if (!wait_event_timeout(vdev->unplug_wait, !vdev->access_refcount,
> +				msecs_to_jiffies(150000)))
> +		WARN(1, "Timeout waiting for device access to complete\n");

Why a timeout? Does this get somehow less problematic over time? :-)

> +}
> +EXPORT_SYMBOL_GPL(video_device_unplug);
> +
>  static inline void video_get(struct video_device *vdev)
>  {
>  	get_device(&vdev->dev);
> @@ -351,6 +397,10 @@ static long v4l2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  	struct video_device *vdev = video_devdata(filp);
>  	int ret = -ENODEV;

You could leave ret unassigned here.

>  
> +	ret = video_device_enter(vdev);
> +	if (ret < 0)
> +		return ret;
> +
>  	if (vdev->fops->unlocked_ioctl) {
>  		struct mutex *lock = v4l2_ioctl_get_lock(vdev, cmd);
>  
> @@ -358,11 +408,14 @@ static long v4l2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  			return -ERESTARTSYS;
>  		if (video_is_registered(vdev))
>  			ret = vdev->fops->unlocked_ioctl(filp, cmd, arg);
> +		else
> +			ret = -ENODEV;
>  		if (lock)
>  			mutex_unlock(lock);
>  	} else
>  		ret = -ENOTTY;
>  
> +	video_device_exit(vdev);
>  	return ret;
>  }
>  
> @@ -841,6 +894,10 @@ int __video_register_device(struct video_device *vdev, int type, int nr,
>  	if (WARN_ON(!vdev->v4l2_dev))
>  		return -EINVAL;
>  
> +	/* unplug support */
> +	spin_lock_init(&vdev->unplug_lock);
> +	init_waitqueue_head(&vdev->unplug_wait);
> +
>  	/* v4l2_fh support */
>  	spin_lock_init(&vdev->fh_lock);
>  	INIT_LIST_HEAD(&vdev->fh_list);
> diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
> index e657614521e3..365a94f91dc9 100644
> --- a/include/media/v4l2-dev.h
> +++ b/include/media/v4l2-dev.h
> @@ -15,6 +15,7 @@
>  #include <linux/cdev.h>
>  #include <linux/mutex.h>
>  #include <linux/videodev2.h>
> +#include <linux/wait.h>
>  
>  #include <media/media-entity.h>
>  
> @@ -178,6 +179,12 @@ struct v4l2_file_operations {
>   * @pipe: &struct media_pipeline
>   * @fops: pointer to &struct v4l2_file_operations for the video device
>   * @device_caps: device capabilities as used in v4l2_capabilities
> + * @unplugged: when set the device has been unplugged and no device access
> + *	section can be entered
> + * @access_refcount: number of device access section currently running for the
> + *	device
> + * @unplug_lock: protects unplugged and access_refcount
> + * @unplug_wait: wait queue to wait for device access sections to complete
>   * @dev: &struct device for the video device
>   * @cdev: character device
>   * @v4l2_dev: pointer to &struct v4l2_device parent
> @@ -221,6 +228,12 @@ struct video_device
>  
>  	u32 device_caps;
>  
> +	/* unplug handling */
> +	bool unplugged;
> +	int access_refcount;

Could you use refcount_t instead, to avoid integer overflow issues?

> +	spinlock_t unplug_lock;
> +	wait_queue_head_t unplug_wait;
> +
>  	/* sysfs */
>  	struct device dev;
>  	struct cdev *cdev;
> @@ -506,4 +519,38 @@ static inline int video_is_registered(struct video_device *vdev)
>  	return test_bit(V4L2_FL_REGISTERED, &vdev->flags);
>  }
>  
> +/**
> + * video_device_enter - enter a device access section
> + * @vdev: the video device
> + *
> + * This function marks and protects the beginning of a section that should not
> + * be entered after the device has been unplugged. The section end is marked
> + * with a call to video_device_exit(). Calls to this function can be nested.
> + *
> + * Returns:
> + * 0 on success or a negative error code if the device has been unplugged.
> + */
> +int video_device_enter(struct video_device *vdev);
> +
> +/**
> + * video_device_exit - exit a device access section
> + * @vdev: the video device
> + *
> + * This function marks the end of a section entered with video_device_enter().
> + * It wakes up all tasks waiting on video_device_unplug() for device access
> + * sections to be exited.
> + */
> +void video_device_exit(struct video_device *vdev);
> +
> +/**
> + * video_device_unplug - mark a device as unplugged
> + * @vdev: the video device
> + *
> + * Mark a device as unplugged, causing all subsequent calls to
> + * video_device_enter() to return an error. If a device access section is
> + * currently being executed the function waits until the section is exited as
> + * marked by a call to video_device_exit().
> + */
> +void video_device_unplug(struct video_device *vdev);
> +
>  #endif /* _V4L2_DEV_H */
> -- 
> Regards,
> 
> Laurent Pinchart
>
Kieran Bingham Nov. 16, 2017, 2:47 p.m. UTC | #2
Hi Laurent,

On 16/11/17 12:32, Sakari Ailus wrote:
> Hi Laurent,
> 
> Thank you for the initiative to bring up and address the matter!

I concur - this looks like a good start towards managing the issue.

One potential thing spotted on top of Sakari's review inline below, of course I
suspect this was more of a prototype/consideration patch.

> On Thu, Nov 16, 2017 at 02:33:48AM +0200, Laurent Pinchart wrote:
>> Device unplug being asynchronous, it naturally races with operations
>> performed by userspace through ioctls or other file operations on video
>> device nodes.
>>
>> This leads to potential access to freed memory or to other resources
>> during device access if unplug occurs during device access. To solve
>> this, we need to wait until all device access completes when unplugging
>> the device, and block all further access when the device is being
>> unplugged.
>>
>> Three new functions are introduced. The video_device_enter() and
>> video_device_exit() functions must be used to mark entry and exit from
>> all code sections where the device can be accessed. The
> 
> I wonder if it'd help splitting this patch into two: one that introduces
> the mechanism and the other that uses it. Up to you.
> 
> Nevertheless, it'd be better to have other system calls covered as well.
> 
>> video_device_unplug() function is then used in the unplug handler to
>> mark the device as being unplugged and wait for all access to complete.
>>
>> As an example mark the ioctl handler as a device access section. Other
>> file operations need to be protected too, and blocking ioctls (such as
>> VIDIOC_DQBUF) need to be handled as well.
>>
>> Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
>> ---
>>  drivers/media/v4l2-core/v4l2-dev.c | 57 ++++++++++++++++++++++++++++++++++++++
>>  include/media/v4l2-dev.h           | 47 +++++++++++++++++++++++++++++++
>>  2 files changed, 104 insertions(+)
>>
>> diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
>> index c647ba648805..c73c6d49e7cf 100644
>> --- a/drivers/media/v4l2-core/v4l2-dev.c
>> +++ b/drivers/media/v4l2-core/v4l2-dev.c
>> @@ -156,6 +156,52 @@ void video_device_release_empty(struct video_device *vdev)
>>  }
>>  EXPORT_SYMBOL(video_device_release_empty);
>>  
>> +int video_device_enter(struct video_device *vdev)
>> +{
>> +	bool unplugged;
>> +
>> +	spin_lock(&vdev->unplug_lock);
>> +	unplugged = vdev->unplugged;
>> +	if (!unplugged)
>> +		vdev->access_refcount++;
>> +	spin_unlock(&vdev->unplug_lock);
>> +
>> +	return unplugged ? -ENODEV : 0;
>> +}
>> +EXPORT_SYMBOL_GPL(video_device_enter);
>> +
>> +void video_device_exit(struct video_device *vdev)
>> +{
>> +	bool wake_up;
>> +
>> +	spin_lock(&vdev->unplug_lock);
>> +	WARN_ON(--vdev->access_refcount < 0);
>> +	wake_up = vdev->access_refcount == 0;
>> +	spin_unlock(&vdev->unplug_lock);
>> +
>> +	if (wake_up)
>> +		wake_up(&vdev->unplug_wait);
>> +}
>> +EXPORT_SYMBOL_GPL(video_device_exit);
> 
> Is there a need to export the two, i.e. wouldn't you only call them from
> the framework, or the same module?
> 
>> +
>> +void video_device_unplug(struct video_device *vdev)
>> +{
>> +	bool unplug_blocked;
>> +
>> +	spin_lock(&vdev->unplug_lock);
>> +	unplug_blocked = vdev->access_refcount > 0;
>> +	vdev->unplugged = true;
> 
> Shouldn't this be set to false in video_register_device()?
> 
>> +	spin_unlock(&vdev->unplug_lock);
>> +
>> +	if (!unplug_blocked)
>> +		return;
> 
> Not necessary, wait_event_timeout() handles this already.
> 
>> +
>> +	if (!wait_event_timeout(vdev->unplug_wait, !vdev->access_refcount,
>> +				msecs_to_jiffies(150000)))
>> +		WARN(1, "Timeout waiting for device access to complete\n");
> 
> Why a timeout? Does this get somehow less problematic over time? :-)
> 
>> +}
>> +EXPORT_SYMBOL_GPL(video_device_unplug);
>> +
>>  static inline void video_get(struct video_device *vdev)
>>  {
>>  	get_device(&vdev->dev);
>> @@ -351,6 +397,10 @@ static long v4l2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>>  	struct video_device *vdev = video_devdata(filp);
>>  	int ret = -ENODEV;
> 
> You could leave ret unassigned here.
> 
>>  
>> +	ret = video_device_enter(vdev);
>> +	if (ret < 0)
>> +		return ret;
>> +
>>  	if (vdev->fops->unlocked_ioctl) {
>>  		struct mutex *lock = v4l2_ioctl_get_lock(vdev, cmd);
>>  
>> @@ -358,11 +408,14 @@ static long v4l2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>>  			return -ERESTARTSYS;


It looks like that return -ERESTARTSYS might need a video_device_exit() too?


>>  		if (video_is_registered(vdev))
>>  			ret = vdev->fops->unlocked_ioctl(filp, cmd, arg);
>> +		else
>> +			ret = -ENODEV;
>>  		if (lock)
>>  			mutex_unlock(lock);
>>  	} else
>>  		ret = -ENOTTY;
>>  
>> +	video_device_exit(vdev);
>>  	return ret;
>>  }
>>  
>> @@ -841,6 +894,10 @@ int __video_register_device(struct video_device *vdev, int type, int nr,
>>  	if (WARN_ON(!vdev->v4l2_dev))
>>  		return -EINVAL;
>>  
>> +	/* unplug support */
>> +	spin_lock_init(&vdev->unplug_lock);
>> +	init_waitqueue_head(&vdev->unplug_wait);
>> +
>>  	/* v4l2_fh support */
>>  	spin_lock_init(&vdev->fh_lock);
>>  	INIT_LIST_HEAD(&vdev->fh_list);
>> diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
>> index e657614521e3..365a94f91dc9 100644
>> --- a/include/media/v4l2-dev.h
>> +++ b/include/media/v4l2-dev.h
>> @@ -15,6 +15,7 @@
>>  #include <linux/cdev.h>
>>  #include <linux/mutex.h>
>>  #include <linux/videodev2.h>
>> +#include <linux/wait.h>
>>  
>>  #include <media/media-entity.h>
>>  
>> @@ -178,6 +179,12 @@ struct v4l2_file_operations {
>>   * @pipe: &struct media_pipeline
>>   * @fops: pointer to &struct v4l2_file_operations for the video device
>>   * @device_caps: device capabilities as used in v4l2_capabilities
>> + * @unplugged: when set the device has been unplugged and no device access
>> + *	section can be entered
>> + * @access_refcount: number of device access section currently running for the
>> + *	device
>> + * @unplug_lock: protects unplugged and access_refcount
>> + * @unplug_wait: wait queue to wait for device access sections to complete
>>   * @dev: &struct device for the video device
>>   * @cdev: character device
>>   * @v4l2_dev: pointer to &struct v4l2_device parent
>> @@ -221,6 +228,12 @@ struct video_device
>>  
>>  	u32 device_caps;
>>  
>> +	/* unplug handling */
>> +	bool unplugged;
>> +	int access_refcount;
> 
> Could you use refcount_t instead, to avoid integer overflow issues?
> 
>> +	spinlock_t unplug_lock;
>> +	wait_queue_head_t unplug_wait;
>> +
>>  	/* sysfs */
>>  	struct device dev;
>>  	struct cdev *cdev;
>> @@ -506,4 +519,38 @@ static inline int video_is_registered(struct video_device *vdev)
>>  	return test_bit(V4L2_FL_REGISTERED, &vdev->flags);
>>  }
>>  
>> +/**
>> + * video_device_enter - enter a device access section
>> + * @vdev: the video device
>> + *
>> + * This function marks and protects the beginning of a section that should not
>> + * be entered after the device has been unplugged. The section end is marked
>> + * with a call to video_device_exit(). Calls to this function can be nested.
>> + *
>> + * Returns:
>> + * 0 on success or a negative error code if the device has been unplugged.
>> + */
>> +int video_device_enter(struct video_device *vdev);
>> +
>> +/**
>> + * video_device_exit - exit a device access section
>> + * @vdev: the video device
>> + *
>> + * This function marks the end of a section entered with video_device_enter().
>> + * It wakes up all tasks waiting on video_device_unplug() for device access
>> + * sections to be exited.
>> + */
>> +void video_device_exit(struct video_device *vdev);
>> +
>> +/**
>> + * video_device_unplug - mark a device as unplugged
>> + * @vdev: the video device
>> + *
>> + * Mark a device as unplugged, causing all subsequent calls to
>> + * video_device_enter() to return an error. If a device access section is
>> + * currently being executed the function waits until the section is exited as
>> + * marked by a call to video_device_exit().
>> + */
>> +void video_device_unplug(struct video_device *vdev);
>> +
>>  #endif /* _V4L2_DEV_H */
>> -- 
>> Regards,
>>
>> Laurent Pinchart
>>
>
Hans Verkuil Nov. 17, 2017, 11:09 a.m. UTC | #3
Hi Laurent,

On 16/11/17 01:33, Laurent Pinchart wrote:
> Device unplug being asynchronous, it naturally races with operations
> performed by userspace through ioctls or other file operations on video
> device nodes.
> 
> This leads to potential access to freed memory or to other resources
> during device access if unplug occurs during device access. To solve
> this, we need to wait until all device access completes when unplugging
> the device, and block all further access when the device is being
> unplugged.
> 
> Three new functions are introduced. The video_device_enter() and
> video_device_exit() functions must be used to mark entry and exit from
> all code sections where the device can be accessed. The
> video_device_unplug() function is then used in the unplug handler to
> mark the device as being unplugged and wait for all access to complete.
> 
> As an example mark the ioctl handler as a device access section. Other
> file operations need to be protected too, and blocking ioctls (such as
> VIDIOC_DQBUF) need to be handled as well.

As long as the queue field in struct video_device is filled in properly
this shouldn't be a problem.

This looks pretty good, simple and straightforward.

Do we need something similar for media_device? Other devices?

Regards,

	Hans

> 
> Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
> ---
>  drivers/media/v4l2-core/v4l2-dev.c | 57 ++++++++++++++++++++++++++++++++++++++
>  include/media/v4l2-dev.h           | 47 +++++++++++++++++++++++++++++++
>  2 files changed, 104 insertions(+)
> 
> diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
> index c647ba648805..c73c6d49e7cf 100644
> --- a/drivers/media/v4l2-core/v4l2-dev.c
> +++ b/drivers/media/v4l2-core/v4l2-dev.c
> @@ -156,6 +156,52 @@ void video_device_release_empty(struct video_device *vdev)
>  }
>  EXPORT_SYMBOL(video_device_release_empty);
>  
> +int video_device_enter(struct video_device *vdev)
> +{
> +	bool unplugged;
> +
> +	spin_lock(&vdev->unplug_lock);
> +	unplugged = vdev->unplugged;
> +	if (!unplugged)
> +		vdev->access_refcount++;
> +	spin_unlock(&vdev->unplug_lock);
> +
> +	return unplugged ? -ENODEV : 0;
> +}
> +EXPORT_SYMBOL_GPL(video_device_enter);
> +
> +void video_device_exit(struct video_device *vdev)
> +{
> +	bool wake_up;
> +
> +	spin_lock(&vdev->unplug_lock);
> +	WARN_ON(--vdev->access_refcount < 0);
> +	wake_up = vdev->access_refcount == 0;
> +	spin_unlock(&vdev->unplug_lock);
> +
> +	if (wake_up)
> +		wake_up(&vdev->unplug_wait);
> +}
> +EXPORT_SYMBOL_GPL(video_device_exit);
> +
> +void video_device_unplug(struct video_device *vdev)
> +{
> +	bool unplug_blocked;
> +
> +	spin_lock(&vdev->unplug_lock);
> +	unplug_blocked = vdev->access_refcount > 0;
> +	vdev->unplugged = true;
> +	spin_unlock(&vdev->unplug_lock);
> +
> +	if (!unplug_blocked)
> +		return;
> +
> +	if (!wait_event_timeout(vdev->unplug_wait, !vdev->access_refcount,
> +				msecs_to_jiffies(150000)))
> +		WARN(1, "Timeout waiting for device access to complete\n");
> +}
> +EXPORT_SYMBOL_GPL(video_device_unplug);
> +
>  static inline void video_get(struct video_device *vdev)
>  {
>  	get_device(&vdev->dev);
> @@ -351,6 +397,10 @@ static long v4l2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  	struct video_device *vdev = video_devdata(filp);
>  	int ret = -ENODEV;
>  
> +	ret = video_device_enter(vdev);
> +	if (ret < 0)
> +		return ret;
> +
>  	if (vdev->fops->unlocked_ioctl) {
>  		struct mutex *lock = v4l2_ioctl_get_lock(vdev, cmd);
>  
> @@ -358,11 +408,14 @@ static long v4l2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  			return -ERESTARTSYS;
>  		if (video_is_registered(vdev))
>  			ret = vdev->fops->unlocked_ioctl(filp, cmd, arg);
> +		else
> +			ret = -ENODEV;
>  		if (lock)
>  			mutex_unlock(lock);
>  	} else
>  		ret = -ENOTTY;
>  
> +	video_device_exit(vdev);
>  	return ret;
>  }
>  
> @@ -841,6 +894,10 @@ int __video_register_device(struct video_device *vdev, int type, int nr,
>  	if (WARN_ON(!vdev->v4l2_dev))
>  		return -EINVAL;
>  
> +	/* unplug support */
> +	spin_lock_init(&vdev->unplug_lock);
> +	init_waitqueue_head(&vdev->unplug_wait);
> +
>  	/* v4l2_fh support */
>  	spin_lock_init(&vdev->fh_lock);
>  	INIT_LIST_HEAD(&vdev->fh_list);
> diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
> index e657614521e3..365a94f91dc9 100644
> --- a/include/media/v4l2-dev.h
> +++ b/include/media/v4l2-dev.h
> @@ -15,6 +15,7 @@
>  #include <linux/cdev.h>
>  #include <linux/mutex.h>
>  #include <linux/videodev2.h>
> +#include <linux/wait.h>
>  
>  #include <media/media-entity.h>
>  
> @@ -178,6 +179,12 @@ struct v4l2_file_operations {
>   * @pipe: &struct media_pipeline
>   * @fops: pointer to &struct v4l2_file_operations for the video device
>   * @device_caps: device capabilities as used in v4l2_capabilities
> + * @unplugged: when set the device has been unplugged and no device access
> + *	section can be entered
> + * @access_refcount: number of device access section currently running for the
> + *	device
> + * @unplug_lock: protects unplugged and access_refcount
> + * @unplug_wait: wait queue to wait for device access sections to complete
>   * @dev: &struct device for the video device
>   * @cdev: character device
>   * @v4l2_dev: pointer to &struct v4l2_device parent
> @@ -221,6 +228,12 @@ struct video_device
>  
>  	u32 device_caps;
>  
> +	/* unplug handling */
> +	bool unplugged;
> +	int access_refcount;
> +	spinlock_t unplug_lock;
> +	wait_queue_head_t unplug_wait;
> +
>  	/* sysfs */
>  	struct device dev;
>  	struct cdev *cdev;
> @@ -506,4 +519,38 @@ static inline int video_is_registered(struct video_device *vdev)
>  	return test_bit(V4L2_FL_REGISTERED, &vdev->flags);
>  }
>  
> +/**
> + * video_device_enter - enter a device access section
> + * @vdev: the video device
> + *
> + * This function marks and protects the beginning of a section that should not
> + * be entered after the device has been unplugged. The section end is marked
> + * with a call to video_device_exit(). Calls to this function can be nested.
> + *
> + * Returns:
> + * 0 on success or a negative error code if the device has been unplugged.
> + */
> +int video_device_enter(struct video_device *vdev);
> +
> +/**
> + * video_device_exit - exit a device access section
> + * @vdev: the video device
> + *
> + * This function marks the end of a section entered with video_device_enter().
> + * It wakes up all tasks waiting on video_device_unplug() for device access
> + * sections to be exited.
> + */
> +void video_device_exit(struct video_device *vdev);
> +
> +/**
> + * video_device_unplug - mark a device as unplugged
> + * @vdev: the video device
> + *
> + * Mark a device as unplugged, causing all subsequent calls to
> + * video_device_enter() to return an error. If a device access section is
> + * currently being executed the function waits until the section is exited as
> + * marked by a call to video_device_exit().
> + */
> +void video_device_unplug(struct video_device *vdev);
> +
>  #endif /* _V4L2_DEV_H */
>
Mauro Carvalho Chehab Nov. 23, 2017, 1:07 p.m. UTC | #4
Hi Laurent,

Em Thu, 16 Nov 2017 02:33:48 +0200
Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com> escreveu:

> Device unplug being asynchronous, it naturally races with operations
> performed by userspace through ioctls or other file operations on video
> device nodes.
> 
> This leads to potential access to freed memory or to other resources
> during device access if unplug occurs during device access. To solve
> this, we need to wait until all device access completes when unplugging
> the device, and block all further access when the device is being
> unplugged.
> 
> Three new functions are introduced. The video_device_enter() and
> video_device_exit() functions must be used to mark entry and exit from
> all code sections where the device can be accessed. The
> video_device_unplug() function is then used in the unplug handler to
> mark the device as being unplugged and wait for all access to complete.
> 
> As an example mark the ioctl handler as a device access section. Other
> file operations need to be protected too, and blocking ioctls (such as
> VIDIOC_DQBUF) need to be handled as well.
> 
> Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
> ---
>  drivers/media/v4l2-core/v4l2-dev.c | 57 ++++++++++++++++++++++++++++++++++++++
>  include/media/v4l2-dev.h           | 47 +++++++++++++++++++++++++++++++
>  2 files changed, 104 insertions(+)
> 
> diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
> index c647ba648805..c73c6d49e7cf 100644
> --- a/drivers/media/v4l2-core/v4l2-dev.c
> +++ b/drivers/media/v4l2-core/v4l2-dev.c
> @@ -156,6 +156,52 @@ void video_device_release_empty(struct video_device *vdev)
>  }
>  EXPORT_SYMBOL(video_device_release_empty);
>  
> +int video_device_enter(struct video_device *vdev)
> +{
> +	bool unplugged;
> +
> +	spin_lock(&vdev->unplug_lock);
> +	unplugged = vdev->unplugged;
> +	if (!unplugged)
> +		vdev->access_refcount++;
> +	spin_unlock(&vdev->unplug_lock);
> +
> +	return unplugged ? -ENODEV : 0;
> +}
> +EXPORT_SYMBOL_GPL(video_device_enter);
> +
> +void video_device_exit(struct video_device *vdev)
> +{
> +	bool wake_up;
> +
> +	spin_lock(&vdev->unplug_lock);
> +	WARN_ON(--vdev->access_refcount < 0);
> +	wake_up = vdev->access_refcount == 0;
> +	spin_unlock(&vdev->unplug_lock);
> +
> +	if (wake_up)
> +		wake_up(&vdev->unplug_wait);
> +}
> +EXPORT_SYMBOL_GPL(video_device_exit);
> +
> +void video_device_unplug(struct video_device *vdev)
> +{
> +	bool unplug_blocked;
> +
> +	spin_lock(&vdev->unplug_lock);
> +	unplug_blocked = vdev->access_refcount > 0;
> +	vdev->unplugged = true;
> +	spin_unlock(&vdev->unplug_lock);
> +
> +	if (!unplug_blocked)
> +		return;
> +
> +	if (!wait_event_timeout(vdev->unplug_wait, !vdev->access_refcount,
> +				msecs_to_jiffies(150000)))
> +		WARN(1, "Timeout waiting for device access to complete\n");
> +}
> +EXPORT_SYMBOL_GPL(video_device_unplug);
> +
>  static inline void video_get(struct video_device *vdev)
>  {
>  	get_device(&vdev->dev);
> @@ -351,6 +397,10 @@ static long v4l2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  	struct video_device *vdev = video_devdata(filp);
>  	int ret = -ENODEV;
>  
> +	ret = video_device_enter(vdev);
> +	if (ret < 0)
> +		return ret;
> +
>  	if (vdev->fops->unlocked_ioctl) {
>  		struct mutex *lock = v4l2_ioctl_get_lock(vdev, cmd);
>  
> @@ -358,11 +408,14 @@ static long v4l2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  			return -ERESTARTSYS;
>  		if (video_is_registered(vdev))
>  			ret = vdev->fops->unlocked_ioctl(filp, cmd, arg);
> +		else
> +			ret = -ENODEV;
>  		if (lock)
>  			mutex_unlock(lock);
>  	} else
>  		ret = -ENOTTY;
>  
> +	video_device_exit(vdev);
>  	return ret;
>  }
>  
> @@ -841,6 +894,10 @@ int __video_register_device(struct video_device *vdev, int type, int nr,
>  	if (WARN_ON(!vdev->v4l2_dev))
>  		return -EINVAL;
>  
> +	/* unplug support */
> +	spin_lock_init(&vdev->unplug_lock);
> +	init_waitqueue_head(&vdev->unplug_wait);
> +

I'm c/c Greg here, as I don't think, that, the way it is, it
belongs at V4L2 core.

I mean: if this is a problem that affects all drivers, it would should, 
instead, be sitting at the driver's core.

If, otherwise, this is specific to rcar-vin (and other platform drivers),
that's likely should be inside the drivers that require it.

That's said, I remember we had to add some things in the past for
USB drivers hot unplug to happen softly. I don't remember the specifics
anymore, but it was solved by both a V4L2 core and changes at USB
drivers.

One of the things that it was added, on that time, was this patch:

	commit ae6cfaace120f4330715b56265ce0e4a710e1276
	Author: Hans Verkuil <hverkuil@xs4all.nl>
	Date:   Sat Mar 14 08:28:45 2009 -0300

	    V4L/DVB (11044): v4l2-device: add v4l2_device_disconnect

So, I would expect that a change at V4L2 core (or at driver core) that
would be applied would also be affecting USB drivers disconnect logic
and v4l2_device_disconnect() function.

>  	/* v4l2_fh support */
>  	spin_lock_init(&vdev->fh_lock);
>  	INIT_LIST_HEAD(&vdev->fh_list);
> diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
> index e657614521e3..365a94f91dc9 100644
> --- a/include/media/v4l2-dev.h
> +++ b/include/media/v4l2-dev.h
> @@ -15,6 +15,7 @@
>  #include <linux/cdev.h>
>  #include <linux/mutex.h>
>  #include <linux/videodev2.h>
> +#include <linux/wait.h>
>  
>  #include <media/media-entity.h>
>  
> @@ -178,6 +179,12 @@ struct v4l2_file_operations {
>   * @pipe: &struct media_pipeline
>   * @fops: pointer to &struct v4l2_file_operations for the video device
>   * @device_caps: device capabilities as used in v4l2_capabilities
> + * @unplugged: when set the device has been unplugged and no device access
> + *	section can be entered
> + * @access_refcount: number of device access section currently running for the
> + *	device
> + * @unplug_lock: protects unplugged and access_refcount
> + * @unplug_wait: wait queue to wait for device access sections to complete
>   * @dev: &struct device for the video device
>   * @cdev: character device
>   * @v4l2_dev: pointer to &struct v4l2_device parent
> @@ -221,6 +228,12 @@ struct video_device
>  
>  	u32 device_caps;
>  
> +	/* unplug handling */
> +	bool unplugged;
> +	int access_refcount;
> +	spinlock_t unplug_lock;
> +	wait_queue_head_t unplug_wait;
> +
>  	/* sysfs */
>  	struct device dev;
>  	struct cdev *cdev;
> @@ -506,4 +519,38 @@ static inline int video_is_registered(struct video_device *vdev)
>  	return test_bit(V4L2_FL_REGISTERED, &vdev->flags);
>  }
>  
> +/**
> + * video_device_enter - enter a device access section
> + * @vdev: the video device
> + *
> + * This function marks and protects the beginning of a section that should not
> + * be entered after the device has been unplugged. The section end is marked
> + * with a call to video_device_exit(). Calls to this function can be nested.
> + *
> + * Returns:
> + * 0 on success or a negative error code if the device has been unplugged.
> + */
> +int video_device_enter(struct video_device *vdev);
> +
> +/**
> + * video_device_exit - exit a device access section
> + * @vdev: the video device
> + *
> + * This function marks the end of a section entered with video_device_enter().
> + * It wakes up all tasks waiting on video_device_unplug() for device access
> + * sections to be exited.
> + */
> +void video_device_exit(struct video_device *vdev);
> +
> +/**
> + * video_device_unplug - mark a device as unplugged
> + * @vdev: the video device
> + *
> + * Mark a device as unplugged, causing all subsequent calls to
> + * video_device_enter() to return an error. If a device access section is
> + * currently being executed the function waits until the section is exited as
> + * marked by a call to video_device_exit().
> + */
> +void video_device_unplug(struct video_device *vdev);
> +
>  #endif /* _V4L2_DEV_H */

Thanks,
Mauro
Greg Kroah-Hartman Nov. 23, 2017, 2:21 p.m. UTC | #5
On Thu, Nov 23, 2017 at 11:07:51AM -0200, Mauro Carvalho Chehab wrote:
> Hi Laurent,
> 
> Em Thu, 16 Nov 2017 02:33:48 +0200
> Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com> escreveu:
> 
> > Device unplug being asynchronous, it naturally races with operations
> > performed by userspace through ioctls or other file operations on video
> > device nodes.
> > 
> > This leads to potential access to freed memory or to other resources
> > during device access if unplug occurs during device access. To solve
> > this, we need to wait until all device access completes when unplugging
> > the device, and block all further access when the device is being
> > unplugged.
> > 
> > Three new functions are introduced. The video_device_enter() and
> > video_device_exit() functions must be used to mark entry and exit from
> > all code sections where the device can be accessed. The
> > video_device_unplug() function is then used in the unplug handler to
> > mark the device as being unplugged and wait for all access to complete.
> > 
> > As an example mark the ioctl handler as a device access section. Other
> > file operations need to be protected too, and blocking ioctls (such as
> > VIDIOC_DQBUF) need to be handled as well.
> > 
> > Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
> > ---
> >  drivers/media/v4l2-core/v4l2-dev.c | 57 ++++++++++++++++++++++++++++++++++++++
> >  include/media/v4l2-dev.h           | 47 +++++++++++++++++++++++++++++++
> >  2 files changed, 104 insertions(+)
> > 
> > diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
> > index c647ba648805..c73c6d49e7cf 100644
> > --- a/drivers/media/v4l2-core/v4l2-dev.c
> > +++ b/drivers/media/v4l2-core/v4l2-dev.c
> > @@ -156,6 +156,52 @@ void video_device_release_empty(struct video_device *vdev)
> >  }
> >  EXPORT_SYMBOL(video_device_release_empty);
> >  
> > +int video_device_enter(struct video_device *vdev)
> > +{
> > +	bool unplugged;
> > +
> > +	spin_lock(&vdev->unplug_lock);
> > +	unplugged = vdev->unplugged;
> > +	if (!unplugged)
> > +		vdev->access_refcount++;
> > +	spin_unlock(&vdev->unplug_lock);
> > +
> > +	return unplugged ? -ENODEV : 0;
> > +}
> > +EXPORT_SYMBOL_GPL(video_device_enter);
> > +
> > +void video_device_exit(struct video_device *vdev)
> > +{
> > +	bool wake_up;
> > +
> > +	spin_lock(&vdev->unplug_lock);
> > +	WARN_ON(--vdev->access_refcount < 0);
> > +	wake_up = vdev->access_refcount == 0;
> > +	spin_unlock(&vdev->unplug_lock);
> > +
> > +	if (wake_up)
> > +		wake_up(&vdev->unplug_wait);
> > +}
> > +EXPORT_SYMBOL_GPL(video_device_exit);
> > +
> > +void video_device_unplug(struct video_device *vdev)
> > +{
> > +	bool unplug_blocked;
> > +
> > +	spin_lock(&vdev->unplug_lock);
> > +	unplug_blocked = vdev->access_refcount > 0;
> > +	vdev->unplugged = true;
> > +	spin_unlock(&vdev->unplug_lock);
> > +
> > +	if (!unplug_blocked)
> > +		return;
> > +
> > +	if (!wait_event_timeout(vdev->unplug_wait, !vdev->access_refcount,
> > +				msecs_to_jiffies(150000)))
> > +		WARN(1, "Timeout waiting for device access to complete\n");
> > +}
> > +EXPORT_SYMBOL_GPL(video_device_unplug);
> > +
> >  static inline void video_get(struct video_device *vdev)
> >  {
> >  	get_device(&vdev->dev);
> > @@ -351,6 +397,10 @@ static long v4l2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> >  	struct video_device *vdev = video_devdata(filp);
> >  	int ret = -ENODEV;
> >  
> > +	ret = video_device_enter(vdev);
> > +	if (ret < 0)
> > +		return ret;
> > +
> >  	if (vdev->fops->unlocked_ioctl) {
> >  		struct mutex *lock = v4l2_ioctl_get_lock(vdev, cmd);
> >  
> > @@ -358,11 +408,14 @@ static long v4l2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> >  			return -ERESTARTSYS;
> >  		if (video_is_registered(vdev))
> >  			ret = vdev->fops->unlocked_ioctl(filp, cmd, arg);
> > +		else
> > +			ret = -ENODEV;
> >  		if (lock)
> >  			mutex_unlock(lock);
> >  	} else
> >  		ret = -ENOTTY;
> >  
> > +	video_device_exit(vdev);
> >  	return ret;
> >  }
> >  
> > @@ -841,6 +894,10 @@ int __video_register_device(struct video_device *vdev, int type, int nr,
> >  	if (WARN_ON(!vdev->v4l2_dev))
> >  		return -EINVAL;
> >  
> > +	/* unplug support */
> > +	spin_lock_init(&vdev->unplug_lock);
> > +	init_waitqueue_head(&vdev->unplug_wait);
> > +
> 
> I'm c/c Greg here, as I don't think, that, the way it is, it
> belongs at V4L2 core.
> 
> I mean: if this is a problem that affects all drivers, it would should, 
> instead, be sitting at the driver's core.

What "problem" is trying to be solved here?  One where your specific
device type races with your specific user api?  Doesn't sound very
driver-core specific to me :)

As an example, what other bus/device type needs this?  If you can see
others that do, then sure, move it into the core.  But for just one, I
don't know if that's really needed here, do you?

thanks,

greg k-h
Mauro Carvalho Chehab Dec. 12, 2017, 12:39 p.m. UTC | #6
Em Thu, 23 Nov 2017 15:21:01 +0100
Greg Kroah-Hartman <gregkh@linuxfoundation.org> escreveu:

> On Thu, Nov 23, 2017 at 11:07:51AM -0200, Mauro Carvalho Chehab wrote:
> > Hi Laurent,
> > 
> > Em Thu, 16 Nov 2017 02:33:48 +0200
> > Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com> escreveu:
> >   
> > > Device unplug being asynchronous, it naturally races with operations
> > > performed by userspace through ioctls or other file operations on video
> > > device nodes.
> > > 
> > > This leads to potential access to freed memory or to other resources
> > > during device access if unplug occurs during device access. To solve
> > > this, we need to wait until all device access completes when unplugging
> > > the device, and block all further access when the device is being
> > > unplugged.
> > > 
> > > Three new functions are introduced. The video_device_enter() and
> > > video_device_exit() functions must be used to mark entry and exit from
> > > all code sections where the device can be accessed. The
> > > video_device_unplug() function is then used in the unplug handler to
> > > mark the device as being unplugged and wait for all access to complete.
> > > 
> > > As an example mark the ioctl handler as a device access section. Other
> > > file operations need to be protected too, and blocking ioctls (such as
> > > VIDIOC_DQBUF) need to be handled as well.
> > > 
> > > Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
> > > ---
> > >  drivers/media/v4l2-core/v4l2-dev.c | 57 ++++++++++++++++++++++++++++++++++++++
> > >  include/media/v4l2-dev.h           | 47 +++++++++++++++++++++++++++++++
> > >  2 files changed, 104 insertions(+)
> > > 
> > > diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
> > > index c647ba648805..c73c6d49e7cf 100644
> > > --- a/drivers/media/v4l2-core/v4l2-dev.c
> > > +++ b/drivers/media/v4l2-core/v4l2-dev.c
> > > @@ -156,6 +156,52 @@ void video_device_release_empty(struct video_device *vdev)
> > >  }
> > >  EXPORT_SYMBOL(video_device_release_empty);
> > >  
> > > +int video_device_enter(struct video_device *vdev)
> > > +{
> > > +	bool unplugged;
> > > +
> > > +	spin_lock(&vdev->unplug_lock);
> > > +	unplugged = vdev->unplugged;
> > > +	if (!unplugged)
> > > +		vdev->access_refcount++;
> > > +	spin_unlock(&vdev->unplug_lock);
> > > +
> > > +	return unplugged ? -ENODEV : 0;
> > > +}
> > > +EXPORT_SYMBOL_GPL(video_device_enter);
> > > +
> > > +void video_device_exit(struct video_device *vdev)
> > > +{
> > > +	bool wake_up;
> > > +
> > > +	spin_lock(&vdev->unplug_lock);
> > > +	WARN_ON(--vdev->access_refcount < 0);
> > > +	wake_up = vdev->access_refcount == 0;
> > > +	spin_unlock(&vdev->unplug_lock);
> > > +
> > > +	if (wake_up)
> > > +		wake_up(&vdev->unplug_wait);
> > > +}
> > > +EXPORT_SYMBOL_GPL(video_device_exit);
> > > +
> > > +void video_device_unplug(struct video_device *vdev)
> > > +{
> > > +	bool unplug_blocked;
> > > +
> > > +	spin_lock(&vdev->unplug_lock);
> > > +	unplug_blocked = vdev->access_refcount > 0;
> > > +	vdev->unplugged = true;
> > > +	spin_unlock(&vdev->unplug_lock);
> > > +
> > > +	if (!unplug_blocked)
> > > +		return;
> > > +
> > > +	if (!wait_event_timeout(vdev->unplug_wait, !vdev->access_refcount,
> > > +				msecs_to_jiffies(150000)))
> > > +		WARN(1, "Timeout waiting for device access to complete\n");
> > > +}
> > > +EXPORT_SYMBOL_GPL(video_device_unplug);
> > > +
> > >  static inline void video_get(struct video_device *vdev)
> > >  {
> > >  	get_device(&vdev->dev);
> > > @@ -351,6 +397,10 @@ static long v4l2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> > >  	struct video_device *vdev = video_devdata(filp);
> > >  	int ret = -ENODEV;
> > >  
> > > +	ret = video_device_enter(vdev);
> > > +	if (ret < 0)
> > > +		return ret;
> > > +
> > >  	if (vdev->fops->unlocked_ioctl) {
> > >  		struct mutex *lock = v4l2_ioctl_get_lock(vdev, cmd);
> > >  
> > > @@ -358,11 +408,14 @@ static long v4l2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> > >  			return -ERESTARTSYS;
> > >  		if (video_is_registered(vdev))
> > >  			ret = vdev->fops->unlocked_ioctl(filp, cmd, arg);
> > > +		else
> > > +			ret = -ENODEV;
> > >  		if (lock)
> > >  			mutex_unlock(lock);
> > >  	} else
> > >  		ret = -ENOTTY;
> > >  
> > > +	video_device_exit(vdev);
> > >  	return ret;
> > >  }
> > >  
> > > @@ -841,6 +894,10 @@ int __video_register_device(struct video_device *vdev, int type, int nr,
> > >  	if (WARN_ON(!vdev->v4l2_dev))
> > >  		return -EINVAL;
> > >  
> > > +	/* unplug support */
> > > +	spin_lock_init(&vdev->unplug_lock);
> > > +	init_waitqueue_head(&vdev->unplug_wait);
> > > +  
> > 
> > I'm c/c Greg here, as I don't think, that, the way it is, it
> > belongs at V4L2 core.
> > 
> > I mean: if this is a problem that affects all drivers, it would should, 
> > instead, be sitting at the driver's core.  
> 
> What "problem" is trying to be solved here?  One where your specific
> device type races with your specific user api?  Doesn't sound very
> driver-core specific to me :)
> 
> As an example, what other bus/device type needs this?  If you can see
> others that do, then sure, move it into the core.  But for just one, I
> don't know if that's really needed here, do you?

The problem that this patch is trying to solve is related to
hot-unplugging a platform device[1]. Quoting Laurent's comments about
it on IRC:

	"it applies to all platform devices at least"

	"I'm actually considering moving that code to the device core as
	 it applies to all drivers that have device nodes, but I'm not
	 sure that will be feasible it won't hurt other devices
	 it applies to I2C and SPI as well at least and PCI too"

[1] https://linuxtv.org/irc/irclogger_log/media-maint?date=2017-11-23,Thu

For USB drivers, hot-unplug seems to work fine for media drivers,
although keeping it working require tests from time to time, as
it is not hard to break hotplug support. so, currently, I don't see
the need of anything like that for non-platform drivers. 

My point here is that adding a new lock inside the media core that 
would be used for all media drivers, including the ones that don't need
doesn't sound a good idea.

So, if this is something that applies to all platform drivers (including
non-media ones), or if are there anything that can be done at driver's core
that would improve hotplug support for all buses, making it more stable or
easier to implement, then it would make sense to improve the driver's core. 
If not, this sounds a driver-specific issue whose fix doesn't belong to the
media core.

Thanks,
Mauro
Laurent Pinchart Dec. 12, 2017, 2:42 p.m. UTC | #7
Hi Sakari,

On Thursday, 16 November 2017 14:32:37 EET Sakari Ailus wrote:
> On Thu, Nov 16, 2017 at 02:33:48AM +0200, Laurent Pinchart wrote:
> > Device unplug being asynchronous, it naturally races with operations
> > performed by userspace through ioctls or other file operations on video
> > device nodes.
> > 
> > This leads to potential access to freed memory or to other resources
> > during device access if unplug occurs during device access. To solve
> > this, we need to wait until all device access completes when unplugging
> > the device, and block all further access when the device is being
> > unplugged.
> > 
> > Three new functions are introduced. The video_device_enter() and
> > video_device_exit() functions must be used to mark entry and exit from
> > all code sections where the device can be accessed. The
> 
> I wonder if it'd help splitting this patch into two: one that introduces
> the mechanism and the other that uses it. Up to you.

Sure, I'm not opposed to that. The second patch would be a bit small, but that 
will change when I'll add support for more system calls.

> Nevertheless, it'd be better to have other system calls covered as well.
> 
> > video_device_unplug() function is then used in the unplug handler to
> > mark the device as being unplugged and wait for all access to complete.
> > 
> > As an example mark the ioctl handler as a device access section. Other
> > file operations need to be protected too, and blocking ioctls (such as
> > VIDIOC_DQBUF) need to be handled as well.
> > 
> > Signed-off-by: Laurent Pinchart
> > <laurent.pinchart+renesas@ideasonboard.com>
> > ---
> > 
> >  drivers/media/v4l2-core/v4l2-dev.c | 57 +++++++++++++++++++++++++++++++++
> >  include/media/v4l2-dev.h           | 47 +++++++++++++++++++++++++++++++
> >  2 files changed, 104 insertions(+)
> > 
> > diff --git a/drivers/media/v4l2-core/v4l2-dev.c
> > b/drivers/media/v4l2-core/v4l2-dev.c index c647ba648805..c73c6d49e7cf
> > 100644
> > --- a/drivers/media/v4l2-core/v4l2-dev.c
> > +++ b/drivers/media/v4l2-core/v4l2-dev.c
> > @@ -156,6 +156,52 @@ void video_device_release_empty(struct video_device
> > *vdev)> 
> >  }
> >  EXPORT_SYMBOL(video_device_release_empty);
> > 
> > +int video_device_enter(struct video_device *vdev)
> > +{
> > +	bool unplugged;
> > +
> > +	spin_lock(&vdev->unplug_lock);
> > +	unplugged = vdev->unplugged;
> > +	if (!unplugged)
> > +		vdev->access_refcount++;
> > +	spin_unlock(&vdev->unplug_lock);
> > +
> > +	return unplugged ? -ENODEV : 0;
> > +}
> > +EXPORT_SYMBOL_GPL(video_device_enter);
> > +
> > +void video_device_exit(struct video_device *vdev)
> > +{
> > +	bool wake_up;
> > +
> > +	spin_lock(&vdev->unplug_lock);
> > +	WARN_ON(--vdev->access_refcount < 0);
> > +	wake_up = vdev->access_refcount == 0;
> > +	spin_unlock(&vdev->unplug_lock);
> > +
> > +	if (wake_up)
> > +		wake_up(&vdev->unplug_wait);
> > +}
> > +EXPORT_SYMBOL_GPL(video_device_exit);
> 
> Is there a need to export the two, i.e. wouldn't you only call them from
> the framework, or the same module?

There could be a need to call these functions from entry points that are not 
controlled by the V4L2 core, such as sysfs or debugfs. We could keep the 
functions internal for now and only export them when the need arises, but if 
we want to document how drivers need to handle race conditions between device 
access and device unbind, we need to have them exported.

> > +
> > +void video_device_unplug(struct video_device *vdev)
> > +{
> > +	bool unplug_blocked;
> > +
> > +	spin_lock(&vdev->unplug_lock);
> > +	unplug_blocked = vdev->access_refcount > 0;
> > +	vdev->unplugged = true;
> 
> Shouldn't this be set to false in video_register_device()?

Yes it should. I currently rely on the fact that the memory is zeroed when 
allocated, but I shouldn't. I'll fix that.

> > +	spin_unlock(&vdev->unplug_lock);
> > +
> > +	if (!unplug_blocked)
> > +		return;
> 
> Not necessary, wait_event_timeout() handles this already.

I'll fix this as well.

> > +
> > +	if (!wait_event_timeout(vdev->unplug_wait, !vdev->access_refcount,
> > +				msecs_to_jiffies(150000)))
> > +		WARN(1, "Timeout waiting for device access to complete\n");
> 
> Why a timeout? Does this get somehow less problematic over time? :-)

Not quite :-) This should never happen, but driver and/or core bugs could 
cause a timeout. In that case I think proceeding after a timeout is a better 
option than deadlocking forever.

> > +}
> > +EXPORT_SYMBOL_GPL(video_device_unplug);
> > +
> > 
> >  static inline void video_get(struct video_device *vdev)
> >  {
> >  
> >  	get_device(&vdev->dev);
> > 
> > @@ -351,6 +397,10 @@ static long v4l2_ioctl(struct file *filp, unsigned
> > int cmd, unsigned long arg)> 
> >  	struct video_device *vdev = video_devdata(filp);
> >  	int ret = -ENODEV;
> 
> You could leave ret unassigned here.

I'll fix that.

> > +	ret = video_device_enter(vdev);
> > +	if (ret < 0)
> > +		return ret;
> > +
> >  	if (vdev->fops->unlocked_ioctl) {
> >  		struct mutex *lock = v4l2_ioctl_get_lock(vdev, cmd);
> > 
> > @@ -358,11 +408,14 @@ static long v4l2_ioctl(struct file *filp, unsigned
> > int cmd, unsigned long arg)
> >  			return -ERESTARTSYS;
> >  		if (video_is_registered(vdev))
> >  			ret = vdev->fops->unlocked_ioctl(filp, cmd, arg);
> > +		else
> > +			ret = -ENODEV;
> >  		if (lock)
> >  			mutex_unlock(lock);
> >  	} else
> >  		ret = -ENOTTY;
> > 
> > +	video_device_exit(vdev);
> >  	return ret;
> >  }
> > 
> > @@ -841,6 +894,10 @@ int __video_register_device(struct video_device
> > *vdev, int type, int nr,> 
> >  	if (WARN_ON(!vdev->v4l2_dev))
> >  		return -EINVAL;
> > 
> > +	/* unplug support */
> > +	spin_lock_init(&vdev->unplug_lock);
> > +	init_waitqueue_head(&vdev->unplug_wait);
> > +
> >  	/* v4l2_fh support */
> >  	spin_lock_init(&vdev->fh_lock);
> >  	INIT_LIST_HEAD(&vdev->fh_list);
> > diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
> > index e657614521e3..365a94f91dc9 100644
> > --- a/include/media/v4l2-dev.h
> > +++ b/include/media/v4l2-dev.h
> > @@ -15,6 +15,7 @@
> >  #include <linux/cdev.h>
> >  #include <linux/mutex.h>
> >  #include <linux/videodev2.h>
> > +#include <linux/wait.h>
> > 
> >  #include <media/media-entity.h>
> > 
> > @@ -178,6 +179,12 @@ struct v4l2_file_operations {
> >   * @pipe: &struct media_pipeline
> >   * @fops: pointer to &struct v4l2_file_operations for the video device
> >   * @device_caps: device capabilities as used in v4l2_capabilities
> > + * @unplugged: when set the device has been unplugged and no device
> > access
> > + *	section can be entered
> > + * @access_refcount: number of device access section currently running
> > for the
> > + *	device
> > + * @unplug_lock: protects unplugged and access_refcount
> > + * @unplug_wait: wait queue to wait for device access sections to
> > complete
> >   * @dev: &struct device for the video device
> >   * @cdev: character device
> >   * @v4l2_dev: pointer to &struct v4l2_device parent
> > @@ -221,6 +228,12 @@ struct video_device
> > 
> >  	u32 device_caps;
> > 
> > +	/* unplug handling */
> > +	bool unplugged;
> > +	int access_refcount;
> 
> Could you use refcount_t instead, to avoid integer overflow issues?

I'd love to, but refcount_t has no provision for refcounts that start at 0.

void refcount_inc(refcount_t *r)
{
        WARN_ONCE(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-
after-free.\n");
}
EXPORT_SYMBOL(refcount_inc);

> > +	spinlock_t unplug_lock;
> > +	wait_queue_head_t unplug_wait;
> > +
> >  	/* sysfs */
> >  	struct device dev;
> >  	struct cdev *cdev;
> > @@ -506,4 +519,38 @@ static inline int video_is_registered(struct
> > video_device *vdev)
> >  	return test_bit(V4L2_FL_REGISTERED, &vdev->flags);
> >  }
> > 
> > +/**
> > + * video_device_enter - enter a device access section
> > + * @vdev: the video device
> > + *
> > + * This function marks and protects the beginning of a section that
> > should not
> > + * be entered after the device has been unplugged. The section end is
> > marked
> > + * with a call to video_device_exit(). Calls to this function can be
> > nested.
> > + *
> > + * Returns:
> > + * 0 on success or a negative error code if the device has been
> > unplugged.
> > + */
> > +int video_device_enter(struct video_device *vdev);
> > +
> > +/**
> > + * video_device_exit - exit a device access section
> > + * @vdev: the video device
> > + *
> > + * This function marks the end of a section entered with
> > video_device_enter().
> > + * It wakes up all tasks waiting on video_device_unplug() for device
> > access
> > + * sections to be exited.
> > + */
> > +void video_device_exit(struct video_device *vdev);
> > +
> > +/**
> > + * video_device_unplug - mark a device as unplugged
> > + * @vdev: the video device
> > + *
> > + * Mark a device as unplugged, causing all subsequent calls to
> > + * video_device_enter() to return an error. If a device access section is
> > + * currently being executed the function waits until the section is
> > exited as
> > + * marked by a call to video_device_exit().
> > + */
> > +void video_device_unplug(struct video_device *vdev);
> > +
> > 
> >  #endif /* _V4L2_DEV_H */
Laurent Pinchart Dec. 12, 2017, 2:44 p.m. UTC | #8
Hi Kieran,

On Thursday, 16 November 2017 16:47:11 EET Kieran Bingham wrote:
> On 16/11/17 12:32, Sakari Ailus wrote:
> > Hi Laurent,
> > 
> > Thank you for the initiative to bring up and address the matter!
> 
> I concur - this looks like a good start towards managing the issue.
> 
> One potential thing spotted on top of Sakari's review inline below, of
> course I suspect this was more of a prototype/consideration patch.
> 
> > On Thu, Nov 16, 2017 at 02:33:48AM +0200, Laurent Pinchart wrote:
> >> Device unplug being asynchronous, it naturally races with operations
> >> performed by userspace through ioctls or other file operations on video
> >> device nodes.
> >> 
> >> This leads to potential access to freed memory or to other resources
> >> during device access if unplug occurs during device access. To solve
> >> this, we need to wait until all device access completes when unplugging
> >> the device, and block all further access when the device is being
> >> unplugged.
> >> 
> >> Three new functions are introduced. The video_device_enter() and
> >> video_device_exit() functions must be used to mark entry and exit from
> >> all code sections where the device can be accessed. The
> > 
> > I wonder if it'd help splitting this patch into two: one that introduces
> > the mechanism and the other that uses it. Up to you.
> > 
> > Nevertheless, it'd be better to have other system calls covered as well.
> > 
> >> video_device_unplug() function is then used in the unplug handler to
> >> mark the device as being unplugged and wait for all access to complete.
> >> 
> >> As an example mark the ioctl handler as a device access section. Other
> >> file operations need to be protected too, and blocking ioctls (such as
> >> VIDIOC_DQBUF) need to be handled as well.
> >> 
> >> Signed-off-by: Laurent Pinchart
> >> <laurent.pinchart+renesas@ideasonboard.com>
> >> ---
> >> 
> >>  drivers/media/v4l2-core/v4l2-dev.c | 57 ++++++++++++++++++++++++++++++++
> >>  include/media/v4l2-dev.h           | 47 +++++++++++++++++++++++++++++++
> >>  2 files changed, 104 insertions(+)
> >> 
> >> diff --git a/drivers/media/v4l2-core/v4l2-dev.c
> >> b/drivers/media/v4l2-core/v4l2-dev.c index c647ba648805..c73c6d49e7cf
> >> 100644
> >> --- a/drivers/media/v4l2-core/v4l2-dev.c
> >> +++ b/drivers/media/v4l2-core/v4l2-dev.c

[snip]

> >> @@ -351,6 +397,10 @@ static long v4l2_ioctl(struct file *filp, unsigned
> >> int cmd, unsigned long arg)
> >>  	struct video_device *vdev = video_devdata(filp);
> >>  	int ret = -ENODEV;
> > 
> > You could leave ret unassigned here.
> > 
> >> +	ret = video_device_enter(vdev);
> >> +	if (ret < 0)
> >> +		return ret;
> >> +
> >>  	if (vdev->fops->unlocked_ioctl) {
> >>  		struct mutex *lock = v4l2_ioctl_get_lock(vdev, cmd);
> >> 
> >> @@ -358,11 +408,14 @@ static long v4l2_ioctl(struct file *filp, unsigned
> >> int cmd, unsigned long arg)
> >>  			return -ERESTARTSYS;
> 
> It looks like that return -ERESTARTSYS might need a video_device_exit() too?

Oops. Of course. I'll fix that. Thanks for catching the issue.

> >>  		if (video_is_registered(vdev))
> >>  			ret = vdev->fops->unlocked_ioctl(filp, cmd, arg);
> >> +		else
> >> +			ret = -ENODEV;
> >>  		if (lock)
> >>  			mutex_unlock(lock);
> >>  	} else
> >>  		ret = -ENOTTY;
> >> 
> >> +	video_device_exit(vdev);
> >>  	return ret;
> >>  }
Laurent Pinchart Dec. 12, 2017, 2:49 p.m. UTC | #9
Hi Hans,

On Friday, 17 November 2017 13:09:20 EET Hans Verkuil wrote:
> On 16/11/17 01:33, Laurent Pinchart wrote:
> > Device unplug being asynchronous, it naturally races with operations
> > performed by userspace through ioctls or other file operations on video
> > device nodes.
> > 
> > This leads to potential access to freed memory or to other resources
> > during device access if unplug occurs during device access. To solve
> > this, we need to wait until all device access completes when unplugging
> > the device, and block all further access when the device is being
> > unplugged.
> > 
> > Three new functions are introduced. The video_device_enter() and
> > video_device_exit() functions must be used to mark entry and exit from
> > all code sections where the device can be accessed. The
> > video_device_unplug() function is then used in the unplug handler to
> > mark the device as being unplugged and wait for all access to complete.
> > 
> > As an example mark the ioctl handler as a device access section. Other
> > file operations need to be protected too, and blocking ioctls (such as
> > VIDIOC_DQBUF) need to be handled as well.
> 
> As long as the queue field in struct video_device is filled in properly
> this shouldn't be a problem.
> 
> This looks pretty good, simple and straightforward.

Thank you.

> Do we need something similar for media_device? Other devices?

I believe so, which is why I'm wondering whether this shouldn't somehow go to 
the device core (and in the cdev core). Not all devices will need such an 
infrastructure as some subsystems already protect against device access after 
unbind (USB is one of them if I'm not mistaken), but it certainly shouldn't 
hurt.

DRM is also considering a similar implementation, but based on srcu to lower 
the performance penalty. I feel that's going a bit overboard but I have no 
numbers yet to confirm or infirm the suspicion.

> > Signed-off-by: Laurent Pinchart
> > <laurent.pinchart+renesas@ideasonboard.com>
> > ---
> > 
> >  drivers/media/v4l2-core/v4l2-dev.c | 57 +++++++++++++++++++++++++++++++++
> >  include/media/v4l2-dev.h           | 47 +++++++++++++++++++++++++++++++
> >  2 files changed, 104 insertions(+)

[snip]
Laurent Pinchart Dec. 12, 2017, 2:54 p.m. UTC | #10
Hi Mauro,

On Thursday, 23 November 2017 15:07:51 EET Mauro Carvalho Chehab wrote:
> Em Thu, 16 Nov 2017 02:33:48 +0200 Laurent Pinchart escreveu:
> > Device unplug being asynchronous, it naturally races with operations
> > performed by userspace through ioctls or other file operations on video
> > device nodes.
> > 
> > This leads to potential access to freed memory or to other resources
> > during device access if unplug occurs during device access. To solve
> > this, we need to wait until all device access completes when unplugging
> > the device, and block all further access when the device is being
> > unplugged.
> > 
> > Three new functions are introduced. The video_device_enter() and
> > video_device_exit() functions must be used to mark entry and exit from
> > all code sections where the device can be accessed. The
> > video_device_unplug() function is then used in the unplug handler to
> > mark the device as being unplugged and wait for all access to complete.
> > 
> > As an example mark the ioctl handler as a device access section. Other
> > file operations need to be protected too, and blocking ioctls (such as
> > VIDIOC_DQBUF) need to be handled as well.
> > 
> > Signed-off-by: Laurent Pinchart
> > <laurent.pinchart+renesas@ideasonboard.com>
> > ---
> > 
> >  drivers/media/v4l2-core/v4l2-dev.c | 57 +++++++++++++++++++++++++++++++++
> >  include/media/v4l2-dev.h           | 47 +++++++++++++++++++++++++++++++
> >  2 files changed, 104 insertions(+)

[snip]

> I'm c/c Greg here, as I don't think, that, the way it is, it
> belongs at V4L2 core.
> 
> I mean: if this is a problem that affects all drivers, it would should,
> instead, be sitting at the driver's core.
> 
> If, otherwise, this is specific to rcar-vin (and other platform drivers),
> that's likely should be inside the drivers that require it.
> 
> That's said, I remember we had to add some things in the past for
> USB drivers hot unplug to happen softly. I don't remember the specifics
> anymore, but it was solved by both a V4L2 core and changes at USB
> drivers.
> 
> One of the things that it was added, on that time, was this patch:
> 
> 	commit ae6cfaace120f4330715b56265ce0e4a710e1276
> 	Author: Hans Verkuil <hverkuil@xs4all.nl>
> 	Date:   Sat Mar 14 08:28:45 2009 -0300
> 
> 	    V4L/DVB (11044): v4l2-device: add v4l2_device_disconnect
> 
> So, I would expect that a change at V4L2 core (or at driver core) that
> would be applied would also be affecting USB drivers disconnect logic
> and v4l2_device_disconnect() function.

I wasn't aware of v4l2_device_disconnect(), thank you for pointing it out.

I don't know what the full history behind that function is, but I don't see 
why it's needed. struct device instances are refcounted, the struct device 
corresponding to a USB device or USB interface doesn't get freed when a device 
is disconnected as long as a reference is present. We take such a reference in 
v4l2_device_register(), so there should be no problem.

[snip]
Laurent Pinchart Dec. 12, 2017, 3:24 p.m. UTC | #11
Hi Greg and Mauro,

On Thursday, 23 November 2017 16:21:01 EET Greg Kroah-Hartman wrote:
> On Thu, Nov 23, 2017 at 11:07:51AM -0200, Mauro Carvalho Chehab wrote:
> > Em Thu, 16 Nov 2017 02:33:48 +0200 Laurent Pinchart escreveu:
> >> Device unplug being asynchronous, it naturally races with operations
> >> performed by userspace through ioctls or other file operations on video
> >> device nodes.
> >> 
> >> This leads to potential access to freed memory or to other resources
> >> during device access if unplug occurs during device access. To solve
> >> this, we need to wait until all device access completes when unplugging
> >> the device, and block all further access when the device is being
> >> unplugged.
> >> 
> >> Three new functions are introduced. The video_device_enter() and
> >> video_device_exit() functions must be used to mark entry and exit from
> >> all code sections where the device can be accessed. The
> >> video_device_unplug() function is then used in the unplug handler to
> >> mark the device as being unplugged and wait for all access to complete.
> >> 
> >> As an example mark the ioctl handler as a device access section. Other
> >> file operations need to be protected too, and blocking ioctls (such as
> >> VIDIOC_DQBUF) need to be handled as well.
> >> 
> >> Signed-off-by: Laurent Pinchart
> >> <laurent.pinchart+renesas@ideasonboard.com>
> >> ---
> >> 
> >>  drivers/media/v4l2-core/v4l2-dev.c | 57 ++++++++++++++++++++++++++++++++
> >>  include/media/v4l2-dev.h           | 47 +++++++++++++++++++++++++++++++
> >>  2 files changed, 104 insertions(+)
> >> 
> >> diff --git a/drivers/media/v4l2-core/v4l2-dev.c
> >> b/drivers/media/v4l2-core/v4l2-dev.c index c647ba648805..c73c6d49e7cf
> >> 100644
> >> --- a/drivers/media/v4l2-core/v4l2-dev.c
> >> +++ b/drivers/media/v4l2-core/v4l2-dev.c
> >> @@ -156,6 +156,52 @@ void video_device_release_empty(struct video_device
> >> *vdev)
> >>  }
> >>  EXPORT_SYMBOL(video_device_release_empty);
> >> 
> >> +int video_device_enter(struct video_device *vdev)
> >> +{
> >> +	bool unplugged;
> >> +
> >> +	spin_lock(&vdev->unplug_lock);
> >> +	unplugged = vdev->unplugged;
> >> +	if (!unplugged)
> >> +		vdev->access_refcount++;
> >> +	spin_unlock(&vdev->unplug_lock);
> >> +
> >> +	return unplugged ? -ENODEV : 0;
> >> +}
> >> +EXPORT_SYMBOL_GPL(video_device_enter);
> >> +
> >> +void video_device_exit(struct video_device *vdev)
> >> +{
> >> +	bool wake_up;
> >> +
> >> +	spin_lock(&vdev->unplug_lock);
> >> +	WARN_ON(--vdev->access_refcount < 0);
> >> +	wake_up = vdev->access_refcount == 0;
> >> +	spin_unlock(&vdev->unplug_lock);
> >> +
> >> +	if (wake_up)
> >> +		wake_up(&vdev->unplug_wait);
> >> +}
> >> +EXPORT_SYMBOL_GPL(video_device_exit);
> >> +
> >> +void video_device_unplug(struct video_device *vdev)
> >> +{
> >> +	bool unplug_blocked;
> >> +
> >> +	spin_lock(&vdev->unplug_lock);
> >> +	unplug_blocked = vdev->access_refcount > 0;
> >> +	vdev->unplugged = true;
> >> +	spin_unlock(&vdev->unplug_lock);
> >> +
> >> +	if (!unplug_blocked)
> >> +		return;
> >> +
> >> +	if (!wait_event_timeout(vdev->unplug_wait, !vdev->access_refcount,
> >> +				msecs_to_jiffies(150000)))
> >> +		WARN(1, "Timeout waiting for device access to complete\n");
> >> +}
> >> +EXPORT_SYMBOL_GPL(video_device_unplug);
> >> +
> >>  static inline void video_get(struct video_device *vdev)
> >>  {
> >>  	get_device(&vdev->dev);
> >> @@ -351,6 +397,10 @@ static long v4l2_ioctl(struct file *filp, unsigned
> >> int cmd, unsigned long arg)
> >>  	struct video_device *vdev = video_devdata(filp);
> >>  	int ret = -ENODEV;
> >> 
> >> +	ret = video_device_enter(vdev);
> >> +	if (ret < 0)
> >> +		return ret;
> >> +
> >>  	if (vdev->fops->unlocked_ioctl) {
> >>  		struct mutex *lock = v4l2_ioctl_get_lock(vdev, cmd);
> >> 
> >> @@ -358,11 +408,14 @@ static long v4l2_ioctl(struct file *filp, unsigned
> >> int cmd, unsigned long arg)
> >>  			return -ERESTARTSYS;
> >>  		if (video_is_registered(vdev))
> >>  			ret = vdev->fops->unlocked_ioctl(filp, cmd, arg);
> >> +		else
> >> +			ret = -ENODEV;
> >>  		if (lock)
> >>  			mutex_unlock(lock);
> >>  	} else
> >>  		ret = -ENOTTY;
> >> 
> >> +	video_device_exit(vdev);
> >>  	return ret;
> >>  }
> >> 
> >> @@ -841,6 +894,10 @@ int __video_register_device(struct video_device
> >> *vdev, int type, int nr,
> >>  	if (WARN_ON(!vdev->v4l2_dev))
> >>  		return -EINVAL;
> >> 
> >> +	/* unplug support */
> >> +	spin_lock_init(&vdev->unplug_lock);
> >> +	init_waitqueue_head(&vdev->unplug_wait);
> >> +
> > 
> > I'm c/c Greg here, as I don't think, that, the way it is, it
> > belongs at V4L2 core.
> > 
> > I mean: if this is a problem that affects all drivers, it would should,
> > instead, be sitting at the driver's core.
> 
> What "problem" is trying to be solved here?  One where your specific
> device type races with your specific user api?  Doesn't sound very
> driver-core specific to me :)
> 
> As an example, what other bus/device type needs this?  If you can see
> others that do, then sure, move it into the core.  But for just one, I
> don't know if that's really needed here, do you?

This patch attempts to fix the race between a device being unbound from its 
driver (through sysfs or device unplug) and the driver accessing the device 
resources (either directly, for instance using MMIO for platform devices, or 
indirectly through bus-specific APIs, for instance for USB or I2C).

Drivers are not allowed to access device resources after the device is unbound 
from the driver. This is a requirement set by the device core, and on which 
the devres API relies. For instance an attempt to perform MMIO after unbind 
will oops as the MMIO memory mapped by devm_ioremap* will have been unmapped.

Some bus types already protect against such races, at least partially. The USB 
core, for instance, returns an error from usb_submit_urb() is the USB device 
associated with the URB has been disconnected. However, even there the API 
seems to be subject to race conditions as locking appears to be missing. Other 
bus types don't attempt to offer any protection (such as I2C), or simply can't 
when resources are accessed directly (such as platform devices).

I can see two ways to fix this issue. One of them is to protect individual 
device accesses. For USB or I2C devices, that would mean protecting every API 
call that can access the device against disconnection. For platform devices, 
that would mean wrapping MMIO access with functions offering similar 
protection. I don't think this is viable as it would introduce performance 
issues.

The other option is to protect device access in the entry points. For 
character devices, the entry points are the file operations. This is the 
approach I have selected for this patch series.

This series implements the protection for V4L2 ioctls in the v4l2_ioctl() 
entry point. It needs to be extended to other file operations, which I will do 
in the next version. However, the race condition is in no way specific to V4L2 
but is common to all devices that expose a character device to userspace. We 
could fix it in V4L2, and separately in DRM (https://lists.freedesktop.org/
archives/dri-devel/2017-September/152115.html), and separately in every 
subsystem, with a slightly different method each time, but that raises the 
question of whether a common implementation at the driver core (for the unbind 
part) and cdev (for the access part) wouldn't be better.
Laurent Pinchart Dec. 12, 2017, 3:32 p.m. UTC | #12
Hi Mauro,

On Tuesday, 12 December 2017 14:39:32 EET Mauro Carvalho Chehab wrote:
> Em Thu, 23 Nov 2017 15:21:01 +0100 Greg Kroah-Hartman escreveu:
> > On Thu, Nov 23, 2017 at 11:07:51AM -0200, Mauro Carvalho Chehab wrote:
> >> Em Thu, 16 Nov 2017 02:33:48 +0200 Laurent Pinchart escreveu:
> >>> Device unplug being asynchronous, it naturally races with operations
> >>> performed by userspace through ioctls or other file operations on
> >>> video device nodes.
> >>> 
> >>> This leads to potential access to freed memory or to other resources
> >>> during device access if unplug occurs during device access. To solve
> >>> this, we need to wait until all device access completes when
> >>> unplugging the device, and block all further access when the device is
> >>> being unplugged.
> >>> 
> >>> Three new functions are introduced. The video_device_enter() and
> >>> video_device_exit() functions must be used to mark entry and exit from
> >>> all code sections where the device can be accessed. The
> >>> video_device_unplug() function is then used in the unplug handler to
> >>> mark the device as being unplugged and wait for all access to
> >>> complete.
> >>> 
> >>> As an example mark the ioctl handler as a device access section. Other
> >>> file operations need to be protected too, and blocking ioctls (such as
> >>> VIDIOC_DQBUF) need to be handled as well.
> >>> 
> >>> Signed-off-by: Laurent Pinchart
> >>> <laurent.pinchart+renesas@ideasonboard.com>
> >>> ---
> >>> 
> >>>  drivers/media/v4l2-core/v4l2-dev.c | 57 ++++++++++++++++++++++++++++++
> >>>  include/media/v4l2-dev.h           | 47 ++++++++++++++++++++++++++++++
> >>>  2 files changed, 104 insertions(+)

[snip]

> >> I'm c/c Greg here, as I don't think, that, the way it is, it
> >> belongs at V4L2 core.
> >> 
> >> I mean: if this is a problem that affects all drivers, it would should,
> >> instead, be sitting at the driver's core.
> > 
> > What "problem" is trying to be solved here?  One where your specific
> > device type races with your specific user api?  Doesn't sound very
> > driver-core specific to me :)
> > 
> > As an example, what other bus/device type needs this?  If you can see
> > others that do, then sure, move it into the core.  But for just one, I
> > don't know if that's really needed here, do you?
> 
> The problem that this patch is trying to solve is related to
> hot-unplugging a platform device[1]. Quoting Laurent's comments about
> it on IRC:
> 
> 	"it applies to all platform devices at least"

Note how I said "at least" :-) I2C, SPI and PCI devices are affected too, and 
after a closer look at USB today I believe USB devices are affected as well.

> 	"I'm actually considering moving that code to the device core as
> 	 it applies to all drivers that have device nodes, but I'm not
> 	 sure that will be feasible it won't hurt other devices
> 	 it applies to I2C and SPI as well at least and PCI too"
> 
> [1] https://linuxtv.org/irc/irclogger_log/media-maint?date=2017-11-23,Thu
> 
> For USB drivers, hot-unplug seems to work fine for media drivers,
> although keeping it working require tests from time to time, as
> it is not hard to break hotplug support. so, currently, I don't see
> the need of anything like that for non-platform drivers.

I2C, SPI and PCI are non-platform drivers, and USB seems to be affected too. 
The race window is small, making it difficult to reproduce the problem, but 
with carefully placed delays it gets much easier to hit the race.

> My point here is that adding a new lock inside the media core that
> would be used for all media drivers, including the ones that don't need
> doesn't sound a good idea.

Why not, if it doesn't affect performances (or anything else) negatively ?

> So, if this is something that applies to all platform drivers (including
> non-media ones), or if are there anything that can be done at driver's core
> that would improve hotplug support for all buses, making it more stable or
> easier to implement, then it would make sense to improve the driver's core.
> If not, this sounds a driver-specific issue whose fix doesn't belong to the
> media core.

It's clearly not a driver-specific issue as most, if not all, drivers are 
affected.

I've replied to Greg's e-mail in this thread with more details, let's try to 
keep the discussion there to avoid splitting it in multiple sub-threads.
Sakari Ailus Dec. 14, 2017, 12:42 p.m. UTC | #13
Hi Laurent,

On Tue, Dec 12, 2017 at 04:42:23PM +0200, Laurent Pinchart wrote:
...
> > > diff --git a/drivers/media/v4l2-core/v4l2-dev.c
> > > b/drivers/media/v4l2-core/v4l2-dev.c index c647ba648805..c73c6d49e7cf
> > > 100644
> > > --- a/drivers/media/v4l2-core/v4l2-dev.c
> > > +++ b/drivers/media/v4l2-core/v4l2-dev.c
> > > @@ -156,6 +156,52 @@ void video_device_release_empty(struct video_device
> > > *vdev)> 
> > >  }
> > >  EXPORT_SYMBOL(video_device_release_empty);
> > > 
> > > +int video_device_enter(struct video_device *vdev)
> > > +{
> > > +	bool unplugged;
> > > +
> > > +	spin_lock(&vdev->unplug_lock);
> > > +	unplugged = vdev->unplugged;
> > > +	if (!unplugged)
> > > +		vdev->access_refcount++;
> > > +	spin_unlock(&vdev->unplug_lock);
> > > +
> > > +	return unplugged ? -ENODEV : 0;
> > > +}
> > > +EXPORT_SYMBOL_GPL(video_device_enter);
> > > +
> > > +void video_device_exit(struct video_device *vdev)
> > > +{
> > > +	bool wake_up;
> > > +
> > > +	spin_lock(&vdev->unplug_lock);
> > > +	WARN_ON(--vdev->access_refcount < 0);
> > > +	wake_up = vdev->access_refcount == 0;
> > > +	spin_unlock(&vdev->unplug_lock);
> > > +
> > > +	if (wake_up)
> > > +		wake_up(&vdev->unplug_wait);
> > > +}
> > > +EXPORT_SYMBOL_GPL(video_device_exit);
> > 
> > Is there a need to export the two, i.e. wouldn't you only call them from
> > the framework, or the same module?
> 
> There could be a need to call these functions from entry points that are not 
> controlled by the V4L2 core, such as sysfs or debugfs. We could keep the 
> functions internal for now and only export them when the need arises, but if 
> we want to document how drivers need to handle race conditions between device 
> access and device unbind, we need to have them exported.

Ack.

> 
> > > +
> > > +void video_device_unplug(struct video_device *vdev)
> > > +{
> > > +	bool unplug_blocked;
> > > +
> > > +	spin_lock(&vdev->unplug_lock);
> > > +	unplug_blocked = vdev->access_refcount > 0;
> > > +	vdev->unplugged = true;
> > 
> > Shouldn't this be set to false in video_register_device()?
> 
> Yes it should. I currently rely on the fact that the memory is zeroed when 
> allocated, but I shouldn't. I'll fix that.
> 
> > > +	spin_unlock(&vdev->unplug_lock);
> > > +
> > > +	if (!unplug_blocked)
> > > +		return;
> > 
> > Not necessary, wait_event_timeout() handles this already.
> 
> I'll fix this as well.
> 
> > > +
> > > +	if (!wait_event_timeout(vdev->unplug_wait, !vdev->access_refcount,
> > > +				msecs_to_jiffies(150000)))
> > > +		WARN(1, "Timeout waiting for device access to complete\n");
> > 
> > Why a timeout? Does this get somehow less problematic over time? :-)
> 
> Not quite :-) This should never happen, but driver and/or core bugs could 
> cause a timeout. In that case I think proceeding after a timeout is a better 
> option than deadlocking forever.

This also depends on the frame rate; you could have a very low frame rate
configured on a sensor and the device could be actually in middle of a DMA
operation while the timeout is hit.

I'm not sure if there's a number that can be said to be safe here.

Wouldn't it be better to kill the user space process using the device
instead? Naturally the wait will have to be interruptible.

...

> > > @@ -221,6 +228,12 @@ struct video_device
> > > 
> > >  	u32 device_caps;
> > > 
> > > +	/* unplug handling */
> > > +	bool unplugged;
> > > +	int access_refcount;
> > 
> > Could you use refcount_t instead, to avoid integer overflow issues?
> 
> I'd love to, but refcount_t has no provision for refcounts that start at 0.
> 
> void refcount_inc(refcount_t *r)
> {
>         WARN_ONCE(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-
> after-free.\n");
> }
> EXPORT_SYMBOL(refcount_inc);

Ah. I wonder if you could simply initialise in probe and decrement it again
in remove?

You could use refcount_inc_not_zero directly, too.

> 
> > > +	spinlock_t unplug_lock;
> > > +	wait_queue_head_t unplug_wait;
> > > +
> > >  	/* sysfs */
> > >  	struct device dev;
> > >  	struct cdev *cdev;
diff mbox

Patch

diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
index c647ba648805..c73c6d49e7cf 100644
--- a/drivers/media/v4l2-core/v4l2-dev.c
+++ b/drivers/media/v4l2-core/v4l2-dev.c
@@ -156,6 +156,52 @@  void video_device_release_empty(struct video_device *vdev)
 }
 EXPORT_SYMBOL(video_device_release_empty);
 
+int video_device_enter(struct video_device *vdev)
+{
+	bool unplugged;
+
+	spin_lock(&vdev->unplug_lock);
+	unplugged = vdev->unplugged;
+	if (!unplugged)
+		vdev->access_refcount++;
+	spin_unlock(&vdev->unplug_lock);
+
+	return unplugged ? -ENODEV : 0;
+}
+EXPORT_SYMBOL_GPL(video_device_enter);
+
+void video_device_exit(struct video_device *vdev)
+{
+	bool wake_up;
+
+	spin_lock(&vdev->unplug_lock);
+	WARN_ON(--vdev->access_refcount < 0);
+	wake_up = vdev->access_refcount == 0;
+	spin_unlock(&vdev->unplug_lock);
+
+	if (wake_up)
+		wake_up(&vdev->unplug_wait);
+}
+EXPORT_SYMBOL_GPL(video_device_exit);
+
+void video_device_unplug(struct video_device *vdev)
+{
+	bool unplug_blocked;
+
+	spin_lock(&vdev->unplug_lock);
+	unplug_blocked = vdev->access_refcount > 0;
+	vdev->unplugged = true;
+	spin_unlock(&vdev->unplug_lock);
+
+	if (!unplug_blocked)
+		return;
+
+	if (!wait_event_timeout(vdev->unplug_wait, !vdev->access_refcount,
+				msecs_to_jiffies(150000)))
+		WARN(1, "Timeout waiting for device access to complete\n");
+}
+EXPORT_SYMBOL_GPL(video_device_unplug);
+
 static inline void video_get(struct video_device *vdev)
 {
 	get_device(&vdev->dev);
@@ -351,6 +397,10 @@  static long v4l2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	struct video_device *vdev = video_devdata(filp);
 	int ret = -ENODEV;
 
+	ret = video_device_enter(vdev);
+	if (ret < 0)
+		return ret;
+
 	if (vdev->fops->unlocked_ioctl) {
 		struct mutex *lock = v4l2_ioctl_get_lock(vdev, cmd);
 
@@ -358,11 +408,14 @@  static long v4l2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			return -ERESTARTSYS;
 		if (video_is_registered(vdev))
 			ret = vdev->fops->unlocked_ioctl(filp, cmd, arg);
+		else
+			ret = -ENODEV;
 		if (lock)
 			mutex_unlock(lock);
 	} else
 		ret = -ENOTTY;
 
+	video_device_exit(vdev);
 	return ret;
 }
 
@@ -841,6 +894,10 @@  int __video_register_device(struct video_device *vdev, int type, int nr,
 	if (WARN_ON(!vdev->v4l2_dev))
 		return -EINVAL;
 
+	/* unplug support */
+	spin_lock_init(&vdev->unplug_lock);
+	init_waitqueue_head(&vdev->unplug_wait);
+
 	/* v4l2_fh support */
 	spin_lock_init(&vdev->fh_lock);
 	INIT_LIST_HEAD(&vdev->fh_list);
diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
index e657614521e3..365a94f91dc9 100644
--- a/include/media/v4l2-dev.h
+++ b/include/media/v4l2-dev.h
@@ -15,6 +15,7 @@ 
 #include <linux/cdev.h>
 #include <linux/mutex.h>
 #include <linux/videodev2.h>
+#include <linux/wait.h>
 
 #include <media/media-entity.h>
 
@@ -178,6 +179,12 @@  struct v4l2_file_operations {
  * @pipe: &struct media_pipeline
  * @fops: pointer to &struct v4l2_file_operations for the video device
  * @device_caps: device capabilities as used in v4l2_capabilities
+ * @unplugged: when set the device has been unplugged and no device access
+ *	section can be entered
+ * @access_refcount: number of device access section currently running for the
+ *	device
+ * @unplug_lock: protects unplugged and access_refcount
+ * @unplug_wait: wait queue to wait for device access sections to complete
  * @dev: &struct device for the video device
  * @cdev: character device
  * @v4l2_dev: pointer to &struct v4l2_device parent
@@ -221,6 +228,12 @@  struct video_device
 
 	u32 device_caps;
 
+	/* unplug handling */
+	bool unplugged;
+	int access_refcount;
+	spinlock_t unplug_lock;
+	wait_queue_head_t unplug_wait;
+
 	/* sysfs */
 	struct device dev;
 	struct cdev *cdev;
@@ -506,4 +519,38 @@  static inline int video_is_registered(struct video_device *vdev)
 	return test_bit(V4L2_FL_REGISTERED, &vdev->flags);
 }
 
+/**
+ * video_device_enter - enter a device access section
+ * @vdev: the video device
+ *
+ * This function marks and protects the beginning of a section that should not
+ * be entered after the device has been unplugged. The section end is marked
+ * with a call to video_device_exit(). Calls to this function can be nested.
+ *
+ * Returns:
+ * 0 on success or a negative error code if the device has been unplugged.
+ */
+int video_device_enter(struct video_device *vdev);
+
+/**
+ * video_device_exit - exit a device access section
+ * @vdev: the video device
+ *
+ * This function marks the end of a section entered with video_device_enter().
+ * It wakes up all tasks waiting on video_device_unplug() for device access
+ * sections to be exited.
+ */
+void video_device_exit(struct video_device *vdev);
+
+/**
+ * video_device_unplug - mark a device as unplugged
+ * @vdev: the video device
+ *
+ * Mark a device as unplugged, causing all subsequent calls to
+ * video_device_enter() to return an error. If a device access section is
+ * currently being executed the function waits until the section is exited as
+ * marked by a call to video_device_exit().
+ */
+void video_device_unplug(struct video_device *vdev);
+
 #endif /* _V4L2_DEV_H */