diff mbox

[v2,1/4] vfio-mdev: Fix remove race

Message ID 20161222202157.15541.12925.stgit@gimli.home (mailing list archive)
State New, archived
Headers show

Commit Message

Alex Williamson Dec. 22, 2016, 8:21 p.m. UTC
Using the mtty mdev sample driver we can generate a remove race by
starting one shell that continuously creates mtty devices and several
other shells all attempting to remove devices, in my case four remove
shells.  The fault occurs in mdev_remove_sysfs_files() where the
passed type arg is NULL, which suggests we've received a struct device
in mdev_device_remove() but it's in some sort of teardown state.  The
solution here is to make use of the accidentally unused list_head on
the mdev_device such that the mdev core keeps a list of all the mdev
devices.  This allows us to validate that we have a valid mdev before
we start removal, remove it from the list to prevent others from
working on it, and if the vendor driver refuses to remove, we can
re-add it to the list.

Cc: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/mdev/mdev_core.c |   36 ++++++++++++++++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Kirti Wankhede Dec. 25, 2016, 5:09 p.m. UTC | #1
On 12/23/2016 1:51 AM, Alex Williamson wrote:
> Using the mtty mdev sample driver we can generate a remove race by
> starting one shell that continuously creates mtty devices and several
> other shells all attempting to remove devices, in my case four remove
> shells.  The fault occurs in mdev_remove_sysfs_files() where the
> passed type arg is NULL, which suggests we've received a struct device
> in mdev_device_remove() but it's in some sort of teardown state.  The
> solution here is to make use of the accidentally unused list_head on
> the mdev_device such that the mdev core keeps a list of all the mdev
> devices.  This allows us to validate that we have a valid mdev before
> we start removal, remove it from the list to prevent others from
> working on it, and if the vendor driver refuses to remove, we can
> re-add it to the list.
> 

Alex,

Writing 1 on 'remove' first removes itself, i.e. calls
device_remove_file_self(dev, attr). So if the file is removed then
device_remove_file_self() should return false, isn't that returns false?
kernfs_remove_self() hold the mutex that should handle this condition.

Thanks,
Kirti.


> Cc: Kirti Wankhede <kwankhede@nvidia.com>
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> ---
>  drivers/vfio/mdev/mdev_core.c |   36 ++++++++++++++++++++++++++++++++++--
>  1 file changed, 34 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
> index be1ee89..6bb4d4c 100644
> --- a/drivers/vfio/mdev/mdev_core.c
> +++ b/drivers/vfio/mdev/mdev_core.c
> @@ -27,6 +27,9 @@
>  static DEFINE_MUTEX(parent_list_lock);
>  static struct class_compat *mdev_bus_compat_class;
>  
> +static LIST_HEAD(mdev_list);
> +static DEFINE_MUTEX(mdev_list_lock);
> +
>  static int _find_mdev_device(struct device *dev, void *data)
>  {
>  	struct mdev_device *mdev;
> @@ -316,6 +319,11 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
>  	dev_dbg(&mdev->dev, "MDEV: created\n");
>  
>  	mutex_unlock(&parent->lock);
> +
> +	mutex_lock(&mdev_list_lock);
> +	list_add(&mdev->next, &mdev_list);
> +	mutex_unlock(&mdev_list_lock);
> +
>  	return ret;
>  
>  create_failed:
> @@ -329,12 +337,30 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
>  
>  int mdev_device_remove(struct device *dev, bool force_remove)
>  {
> -	struct mdev_device *mdev;
> +	struct mdev_device *mdev, *tmp;
>  	struct parent_device *parent;
>  	struct mdev_type *type;
>  	int ret;
> +	bool found = false;
>  
>  	mdev = to_mdev_device(dev);
> +
> +	mutex_lock(&mdev_list_lock);
> +	list_for_each_entry(tmp, &mdev_list, next) {
> +		if (tmp == mdev) {
> +			found = true;
> +			break;
> +		}
> +	}
> +
> +	if (found)
> +		list_del(&mdev->next);
> +
> +	mutex_unlock(&mdev_list_lock);
> +
> +	if (!found)
> +		return -ENODEV;
> +
>  	type = to_mdev_type(mdev->type_kobj);
>  	parent = mdev->parent;
>  	mutex_lock(&parent->lock);
> @@ -342,6 +368,11 @@ int mdev_device_remove(struct device *dev, bool force_remove)
>  	ret = mdev_device_remove_ops(mdev, force_remove);
>  	if (ret) {
>  		mutex_unlock(&parent->lock);
> +
> +		mutex_lock(&mdev_list_lock);
> +		list_add(&mdev->next, &mdev_list);
> +		mutex_unlock(&mdev_list_lock);
> +
>  		return ret;
>  	}
>  
> @@ -349,7 +380,8 @@ int mdev_device_remove(struct device *dev, bool force_remove)
>  	device_unregister(dev);
>  	mutex_unlock(&parent->lock);
>  	mdev_put_parent(parent);
> -	return ret;
> +
> +	return 0;
>  }
>  
>  static int __init mdev_init(void)
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alex Williamson Dec. 25, 2016, 7:40 p.m. UTC | #2
On Sun, 25 Dec 2016 22:39:47 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

> On 12/23/2016 1:51 AM, Alex Williamson wrote:
> > Using the mtty mdev sample driver we can generate a remove race by
> > starting one shell that continuously creates mtty devices and several
> > other shells all attempting to remove devices, in my case four remove
> > shells.  The fault occurs in mdev_remove_sysfs_files() where the
> > passed type arg is NULL, which suggests we've received a struct device
> > in mdev_device_remove() but it's in some sort of teardown state.  The
> > solution here is to make use of the accidentally unused list_head on
> > the mdev_device such that the mdev core keeps a list of all the mdev
> > devices.  This allows us to validate that we have a valid mdev before
> > we start removal, remove it from the list to prevent others from
> > working on it, and if the vendor driver refuses to remove, we can
> > re-add it to the list.
> >   
> 
> Alex,
> 
> Writing 1 on 'remove' first removes itself, i.e. calls
> device_remove_file_self(dev, attr). So if the file is removed then
> device_remove_file_self() should return false, isn't that returns false?
> kernfs_remove_self() hold the mutex that should handle this condition.

In theory, I agree.  In practice I was able to generate the race
described.  We're getting through to call mdev_device_remove with
a struct device that resolves to an mdev where the type_kobj is
NULL, presumably it's been freed.  Maybe there's a better fix
within kernfs, but this sanitizes the mdev on our end to resolve
it.  To see the issue, simply run 'while true; do uuidgen >
create; done', then from a few other shells loop finding mdev
devices and remove any that are found.  Set dmesg to only print
critical messages or else it'll slow create and delete to the
point where it'll be difficult to get the race.  Thanks,

Alex
 
> > Cc: Kirti Wankhede <kwankhede@nvidia.com>
> > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > ---
> >  drivers/vfio/mdev/mdev_core.c |   36 ++++++++++++++++++++++++++++++++++--
> >  1 file changed, 34 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
> > index be1ee89..6bb4d4c 100644
> > --- a/drivers/vfio/mdev/mdev_core.c
> > +++ b/drivers/vfio/mdev/mdev_core.c
> > @@ -27,6 +27,9 @@
> >  static DEFINE_MUTEX(parent_list_lock);
> >  static struct class_compat *mdev_bus_compat_class;
> >  
> > +static LIST_HEAD(mdev_list);
> > +static DEFINE_MUTEX(mdev_list_lock);
> > +
> >  static int _find_mdev_device(struct device *dev, void *data)
> >  {
> >  	struct mdev_device *mdev;
> > @@ -316,6 +319,11 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
> >  	dev_dbg(&mdev->dev, "MDEV: created\n");
> >  
> >  	mutex_unlock(&parent->lock);
> > +
> > +	mutex_lock(&mdev_list_lock);
> > +	list_add(&mdev->next, &mdev_list);
> > +	mutex_unlock(&mdev_list_lock);
> > +
> >  	return ret;
> >  
> >  create_failed:
> > @@ -329,12 +337,30 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
> >  
> >  int mdev_device_remove(struct device *dev, bool force_remove)
> >  {
> > -	struct mdev_device *mdev;
> > +	struct mdev_device *mdev, *tmp;
> >  	struct parent_device *parent;
> >  	struct mdev_type *type;
> >  	int ret;
> > +	bool found = false;
> >  
> >  	mdev = to_mdev_device(dev);
> > +
> > +	mutex_lock(&mdev_list_lock);
> > +	list_for_each_entry(tmp, &mdev_list, next) {
> > +		if (tmp == mdev) {
> > +			found = true;
> > +			break;
> > +		}
> > +	}
> > +
> > +	if (found)
> > +		list_del(&mdev->next);
> > +
> > +	mutex_unlock(&mdev_list_lock);
> > +
> > +	if (!found)
> > +		return -ENODEV;
> > +
> >  	type = to_mdev_type(mdev->type_kobj);
> >  	parent = mdev->parent;
> >  	mutex_lock(&parent->lock);
> > @@ -342,6 +368,11 @@ int mdev_device_remove(struct device *dev, bool force_remove)
> >  	ret = mdev_device_remove_ops(mdev, force_remove);
> >  	if (ret) {
> >  		mutex_unlock(&parent->lock);
> > +
> > +		mutex_lock(&mdev_list_lock);
> > +		list_add(&mdev->next, &mdev_list);
> > +		mutex_unlock(&mdev_list_lock);
> > +
> >  		return ret;
> >  	}
> >  
> > @@ -349,7 +380,8 @@ int mdev_device_remove(struct device *dev, bool force_remove)
> >  	device_unregister(dev);
> >  	mutex_unlock(&parent->lock);
> >  	mdev_put_parent(parent);
> > -	return ret;
> > +
> > +	return 0;
> >  }
> >  
> >  static int __init mdev_init(void)
> >   

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kirti Wankhede Dec. 26, 2016, 3:28 a.m. UTC | #3
On 12/26/2016 1:10 AM, Alex Williamson wrote:
> On Sun, 25 Dec 2016 22:39:47 +0530
> Kirti Wankhede <kwankhede@nvidia.com> wrote:
> 
>> On 12/23/2016 1:51 AM, Alex Williamson wrote:
>>> Using the mtty mdev sample driver we can generate a remove race by
>>> starting one shell that continuously creates mtty devices and several
>>> other shells all attempting to remove devices, in my case four remove
>>> shells.  The fault occurs in mdev_remove_sysfs_files() where the
>>> passed type arg is NULL, which suggests we've received a struct device
>>> in mdev_device_remove() but it's in some sort of teardown state.  The
>>> solution here is to make use of the accidentally unused list_head on
>>> the mdev_device such that the mdev core keeps a list of all the mdev
>>> devices.  This allows us to validate that we have a valid mdev before
>>> we start removal, remove it from the list to prevent others from
>>> working on it, and if the vendor driver refuses to remove, we can
>>> re-add it to the list.
>>>   
>>
>> Alex,
>>
>> Writing 1 on 'remove' first removes itself, i.e. calls
>> device_remove_file_self(dev, attr). So if the file is removed then
>> device_remove_file_self() should return false, isn't that returns false?
>> kernfs_remove_self() hold the mutex that should handle this condition.
> 
> In theory, I agree.  In practice I was able to generate the race
> described.  We're getting through to call mdev_device_remove with
> a struct device that resolves to an mdev where the type_kobj is
> NULL, presumably it's been freed.  Maybe there's a better fix
> within kernfs, but this sanitizes the mdev on our end to resolve
> it.  To see the issue, simply run 'while true; do uuidgen >
> create; done', then from a few other shells loop finding mdev
> devices and remove any that are found.  Set dmesg to only print
> critical messages or else it'll slow create and delete to the
> point where it'll be difficult to get the race.  Thanks,
> 

I see. pci-sysfs too uses mutex around its remove function even after
device_remove_file_self() returned true. Yes, probably kernfs might have
better fix.
This change looks good to me.

Thanks,
Kirti
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index be1ee89..6bb4d4c 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -27,6 +27,9 @@ 
 static DEFINE_MUTEX(parent_list_lock);
 static struct class_compat *mdev_bus_compat_class;
 
+static LIST_HEAD(mdev_list);
+static DEFINE_MUTEX(mdev_list_lock);
+
 static int _find_mdev_device(struct device *dev, void *data)
 {
 	struct mdev_device *mdev;
@@ -316,6 +319,11 @@  int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
 	dev_dbg(&mdev->dev, "MDEV: created\n");
 
 	mutex_unlock(&parent->lock);
+
+	mutex_lock(&mdev_list_lock);
+	list_add(&mdev->next, &mdev_list);
+	mutex_unlock(&mdev_list_lock);
+
 	return ret;
 
 create_failed:
@@ -329,12 +337,30 @@  int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
 
 int mdev_device_remove(struct device *dev, bool force_remove)
 {
-	struct mdev_device *mdev;
+	struct mdev_device *mdev, *tmp;
 	struct parent_device *parent;
 	struct mdev_type *type;
 	int ret;
+	bool found = false;
 
 	mdev = to_mdev_device(dev);
+
+	mutex_lock(&mdev_list_lock);
+	list_for_each_entry(tmp, &mdev_list, next) {
+		if (tmp == mdev) {
+			found = true;
+			break;
+		}
+	}
+
+	if (found)
+		list_del(&mdev->next);
+
+	mutex_unlock(&mdev_list_lock);
+
+	if (!found)
+		return -ENODEV;
+
 	type = to_mdev_type(mdev->type_kobj);
 	parent = mdev->parent;
 	mutex_lock(&parent->lock);
@@ -342,6 +368,11 @@  int mdev_device_remove(struct device *dev, bool force_remove)
 	ret = mdev_device_remove_ops(mdev, force_remove);
 	if (ret) {
 		mutex_unlock(&parent->lock);
+
+		mutex_lock(&mdev_list_lock);
+		list_add(&mdev->next, &mdev_list);
+		mutex_unlock(&mdev_list_lock);
+
 		return ret;
 	}
 
@@ -349,7 +380,8 @@  int mdev_device_remove(struct device *dev, bool force_remove)
 	device_unregister(dev);
 	mutex_unlock(&parent->lock);
 	mdev_put_parent(parent);
-	return ret;
+
+	return 0;
 }
 
 static int __init mdev_init(void)