Message ID | 20170609220031.31986.80141.stgit@gimli.home (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Alex, On 10/06/2017 00:00, Alex Williamson wrote: > If a device is bound to a non-vfio, non-whitelisted driver while a > group is in use, then the integrity of the group is compromised and > will result in hitting a BUG_ON. This code tries to avoid this case > by mangling driver_override to force a no-match for the driver. The > driver-core will either follow-up with a DRIVER_NOT_BOUND (preferred) > or BOUND_DRIVER, at which point we can remove the driver_override > mangling. > > A complication here is that even though the window between these > notifications is expected to be extremely small, the vfio group could > be removed, which would prevent us from finding the group again to > remove the driver_override. We therefore take a group reference when > adding to driver_override and release it when removed. A second > complication is that driver_override can be modified by the system > admin through sysfs. To avoid trivial interference, we add a non- > user-visible UUID to the group and use this as part of the mangle > string. > > The above blocks binding to a driver that would compromise the host, > but we'd also like to avoid reaching that step if possible. For this > we add a wait_event_timeout() with a short, 1 second timeout, which is > highly effective in allowing empty groups to finish cleanup. > > Signed-off-by: Alex Williamson <alex.williamson@redhat.com> > --- > drivers/vfio/vfio.c | 151 +++++++++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 144 insertions(+), 7 deletions(-) > > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c > index a25ee4930200..ea786d512faa 100644 > --- a/drivers/vfio/vfio.c > +++ b/drivers/vfio/vfio.c > @@ -25,6 +25,7 @@ > #include <linux/miscdevice.h> > #include <linux/module.h> > #include <linux/mutex.h> > +#include <linux/platform_device.h> > #include <linux/pci.h> > #include <linux/rwsem.h> > #include <linux/sched.h> > @@ -32,8 +33,10 @@ > #include <linux/stat.h> > #include <linux/string.h> > #include <linux/uaccess.h> > +#include <linux/uuid.h> > #include <linux/vfio.h> > #include <linux/wait.h> > +#include <linux/amba/bus.h> > > #define DRIVER_VERSION "0.3" > #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" > @@ -95,6 +98,7 @@ struct vfio_group { > bool noiommu; > struct kvm *kvm; > struct blocking_notifier_head notifier; > + unsigned char uuid[16]; > }; > > struct vfio_device { > @@ -352,6 +356,8 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) > > group->nb.notifier_call = vfio_iommu_group_notifier; > > + generate_random_uuid(group->uuid); > + > /* > * blocking notifiers acquire a rwsem around registering and hold > * it around callback. Therefore, need to register outside of > @@ -728,6 +734,116 @@ static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev) > return vfio_dev_viable(dev, group); > } > > +#define VFIO_TAG_PREFIX "#vfio_group:" > + > +static char **vfio_find_driver_override(struct device *dev) > +{ > + if (dev_is_pci(dev)) { > + struct pci_dev *pdev = to_pci_dev(dev); > + return &pdev->driver_override; > + } else if (dev->bus == &platform_bus_type) { > + struct platform_device *pdev = to_platform_device(dev); > + return &pdev->driver_override; > +#ifdef CONFIG_ARM_AMBA > + } else if (dev->bus == &amba_bustype) { EXPORT_SYMBOL_GPL(amba_bustype); needs to be added in drivers/amba/bus.c otherwise this causes a link error ERROR: "amba_bustype" [drivers/vfio/vfio.ko] undefined! Otherwise it looks good to me and I 've tested this with dual port igb on ARM. Reviewed-by: Eric Auger <eric.auger@redhat.com> Tested-by: Eric Auger <eric.auger@redhat.com> Thanks Eric > + struct amba_device *adev = to_amba_device(dev); > + return &adev->driver_override; > +#endif > + } > + > + return NULL; > +} > + > +/* > + * If we're about to bind to something other than a known whitelisted driver > + * or known vfio bus driver, try to avert it with driver_override. > + */ > +static void vfio_group_nb_pre_bind(struct vfio_group *group, struct device *dev) > +{ > + struct vfio_bus_driver *driver; > + struct device_driver *drv = ACCESS_ONCE(dev->driver); > + char **driver_override; > + > + if (vfio_dev_whitelisted(dev, drv)) > + return; /* Binding to known "innocuous" device/driver */ > + > + mutex_lock(&vfio.bus_drivers_lock); > + list_for_each_entry(driver, &vfio.bus_drivers_list, vfio_next) { > + if (driver->drv == drv) { > + mutex_unlock(&vfio.bus_drivers_lock); > + return; /* Binding to known vfio bus driver, ok */ > + } > + } > + mutex_unlock(&vfio.bus_drivers_lock); > + > + /* Can we stall slightly to let users fall off? */ > + if (list_empty(&group->device_list)) { > + if (wait_event_timeout(vfio.release_q, > + !atomic_read(&group->container_users), HZ)) > + return; > + } > + > + driver_override = vfio_find_driver_override(dev); > + if (driver_override) { > + char tag[50], *new = NULL, *old = *driver_override; > + > + snprintf(tag, sizeof(tag), "%s%pU", > + VFIO_TAG_PREFIX, group->uuid); > + > + if (old && strstr(old, tag)) > + return; /* block already in place */ > + > + new = kasprintf(GFP_KERNEL, "%s%s", old ? old : "", tag); > + if (new) { > + *driver_override = new; > + kfree(old); > + vfio_group_get(group); > + dev_warn(dev, "vfio: Blocking unsafe driver bind\n"); > + return; > + } > + } > + > + dev_warn(dev, "vfio: Unsafe driver binding to in-use group!\n"); > +} > + > +/* If we've mangled driver_override, remove it */ > +static void vfio_group_nb_post_bind(struct vfio_group *group, > + struct device *dev) > +{ > + char **driver_override = vfio_find_driver_override(dev); > + > + if (driver_override && *driver_override) { > + char tag[50], *new, *start, *end, *old = *driver_override; > + > + snprintf(tag, sizeof(tag), "%s%pU", > + VFIO_TAG_PREFIX, group->uuid); > + > + start = strstr(old, tag); > + if (start) { > + end = start + strlen(tag); > + > + if (old + strlen(old) > end) > + memmove(start, end, > + strlen(old) - (end - old) + 1); > + else > + *start = 0; > + > + if (strlen(old)) { > + new = kasprintf(GFP_KERNEL, "%s", old); > + if (new) { > + *driver_override = new; > + kfree(old); > + } /* else, in-place terminated, ok */ > + } else { > + *driver_override = NULL; > + kfree(old); > + } > + > + vfio_group_put(group); > + } > + } > +} > + > static int vfio_iommu_group_notifier(struct notifier_block *nb, > unsigned long action, void *data) > { > @@ -757,14 +873,23 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb, > */ > break; > case IOMMU_GROUP_NOTIFY_BIND_DRIVER: > - pr_debug("%s: Device %s, group %d binding to driver\n", > + pr_debug("%s: Device %s, group %d binding to driver %s\n", > __func__, dev_name(dev), > - iommu_group_id(group->iommu_group)); > + iommu_group_id(group->iommu_group), dev->driver->name); > + if (vfio_group_nb_verify(group, dev)) > + vfio_group_nb_pre_bind(group, dev); > + break; > + case IOMMU_GROUP_NOTIFY_DRIVER_NOT_BOUND: > + pr_debug("%s: Device %s, group %d binding fail to driver %s\n", > + __func__, dev_name(dev), > + iommu_group_id(group->iommu_group), dev->driver->name); > + vfio_group_nb_post_bind(group, dev); > break; > case IOMMU_GROUP_NOTIFY_BOUND_DRIVER: > pr_debug("%s: Device %s, group %d bound to driver %s\n", > __func__, dev_name(dev), > iommu_group_id(group->iommu_group), dev->driver->name); > + vfio_group_nb_post_bind(group, dev); > BUG_ON(vfio_group_nb_verify(group, dev)); > break; > case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER: > @@ -1351,6 +1476,7 @@ static int vfio_group_unset_container(struct vfio_group *group) > if (users != 1) > return -EBUSY; > > + wake_up(&vfio.release_q); > __vfio_group_unset_container(group); > > return 0; > @@ -1364,7 +1490,11 @@ static int vfio_group_unset_container(struct vfio_group *group) > */ > static void vfio_group_try_dissolve_container(struct vfio_group *group) > { > - if (0 == atomic_dec_if_positive(&group->container_users)) > + int users = atomic_dec_if_positive(&group->container_users); > + > + wake_up(&vfio.release_q); > + > + if (!users) > __vfio_group_unset_container(group); > } > > @@ -1433,19 +1563,26 @@ static bool vfio_group_viable(struct vfio_group *group) > > static int vfio_group_add_container_user(struct vfio_group *group) > { > + int ret; > + > if (!atomic_inc_not_zero(&group->container_users)) > return -EINVAL; > > if (group->noiommu) { > - atomic_dec(&group->container_users); > - return -EPERM; > + ret = -EPERM; > + goto out; > } > if (!group->container->iommu_driver || !vfio_group_viable(group)) { > - atomic_dec(&group->container_users); > - return -EINVAL; > + ret = -EINVAL; > + goto out; > } > > return 0; > + > +out: > + atomic_dec(&group->container_users); > + wake_up(&vfio.release_q); > + return ret; > } > > static const struct file_operations vfio_device_fops; >
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index a25ee4930200..ea786d512faa 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -25,6 +25,7 @@ #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/mutex.h> +#include <linux/platform_device.h> #include <linux/pci.h> #include <linux/rwsem.h> #include <linux/sched.h> @@ -32,8 +33,10 @@ #include <linux/stat.h> #include <linux/string.h> #include <linux/uaccess.h> +#include <linux/uuid.h> #include <linux/vfio.h> #include <linux/wait.h> +#include <linux/amba/bus.h> #define DRIVER_VERSION "0.3" #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" @@ -95,6 +98,7 @@ struct vfio_group { bool noiommu; struct kvm *kvm; struct blocking_notifier_head notifier; + unsigned char uuid[16]; }; struct vfio_device { @@ -352,6 +356,8 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) group->nb.notifier_call = vfio_iommu_group_notifier; + generate_random_uuid(group->uuid); + /* * blocking notifiers acquire a rwsem around registering and hold * it around callback. Therefore, need to register outside of @@ -728,6 +734,116 @@ static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev) return vfio_dev_viable(dev, group); } +#define VFIO_TAG_PREFIX "#vfio_group:" + +static char **vfio_find_driver_override(struct device *dev) +{ + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + return &pdev->driver_override; + } else if (dev->bus == &platform_bus_type) { + struct platform_device *pdev = to_platform_device(dev); + return &pdev->driver_override; +#ifdef CONFIG_ARM_AMBA + } else if (dev->bus == &amba_bustype) { + struct amba_device *adev = to_amba_device(dev); + return &adev->driver_override; +#endif + } + + return NULL; +} + +/* + * If we're about to bind to something other than a known whitelisted driver + * or known vfio bus driver, try to avert it with driver_override. + */ +static void vfio_group_nb_pre_bind(struct vfio_group *group, struct device *dev) +{ + struct vfio_bus_driver *driver; + struct device_driver *drv = ACCESS_ONCE(dev->driver); + char **driver_override; + + if (vfio_dev_whitelisted(dev, drv)) + return; /* Binding to known "innocuous" device/driver */ + + mutex_lock(&vfio.bus_drivers_lock); + list_for_each_entry(driver, &vfio.bus_drivers_list, vfio_next) { + if (driver->drv == drv) { + mutex_unlock(&vfio.bus_drivers_lock); + return; /* Binding to known vfio bus driver, ok */ + } + } + mutex_unlock(&vfio.bus_drivers_lock); + + /* Can we stall slightly to let users fall off? */ + if (list_empty(&group->device_list)) { + if (wait_event_timeout(vfio.release_q, + !atomic_read(&group->container_users), HZ)) + return; + } + + driver_override = vfio_find_driver_override(dev); + if (driver_override) { + char tag[50], *new = NULL, *old = *driver_override; + + snprintf(tag, sizeof(tag), "%s%pU", + VFIO_TAG_PREFIX, group->uuid); + + if (old && strstr(old, tag)) + return; /* block already in place */ + + new = kasprintf(GFP_KERNEL, "%s%s", old ? old : "", tag); + if (new) { + *driver_override = new; + kfree(old); + vfio_group_get(group); + dev_warn(dev, "vfio: Blocking unsafe driver bind\n"); + return; + } + } + + dev_warn(dev, "vfio: Unsafe driver binding to in-use group!\n"); +} + +/* If we've mangled driver_override, remove it */ +static void vfio_group_nb_post_bind(struct vfio_group *group, + struct device *dev) +{ + char **driver_override = vfio_find_driver_override(dev); + + if (driver_override && *driver_override) { + char tag[50], *new, *start, *end, *old = *driver_override; + + snprintf(tag, sizeof(tag), "%s%pU", + VFIO_TAG_PREFIX, group->uuid); + + start = strstr(old, tag); + if (start) { + end = start + strlen(tag); + + if (old + strlen(old) > end) + memmove(start, end, + strlen(old) - (end - old) + 1); + else + *start = 0; + + if (strlen(old)) { + new = kasprintf(GFP_KERNEL, "%s", old); + if (new) { + *driver_override = new; + kfree(old); + } /* else, in-place terminated, ok */ + } else { + *driver_override = NULL; + kfree(old); + } + + vfio_group_put(group); + } + } +} + static int vfio_iommu_group_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -757,14 +873,23 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb, */ break; case IOMMU_GROUP_NOTIFY_BIND_DRIVER: - pr_debug("%s: Device %s, group %d binding to driver\n", + pr_debug("%s: Device %s, group %d binding to driver %s\n", __func__, dev_name(dev), - iommu_group_id(group->iommu_group)); + iommu_group_id(group->iommu_group), dev->driver->name); + if (vfio_group_nb_verify(group, dev)) + vfio_group_nb_pre_bind(group, dev); + break; + case IOMMU_GROUP_NOTIFY_DRIVER_NOT_BOUND: + pr_debug("%s: Device %s, group %d binding fail to driver %s\n", + __func__, dev_name(dev), + iommu_group_id(group->iommu_group), dev->driver->name); + vfio_group_nb_post_bind(group, dev); break; case IOMMU_GROUP_NOTIFY_BOUND_DRIVER: pr_debug("%s: Device %s, group %d bound to driver %s\n", __func__, dev_name(dev), iommu_group_id(group->iommu_group), dev->driver->name); + vfio_group_nb_post_bind(group, dev); BUG_ON(vfio_group_nb_verify(group, dev)); break; case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER: @@ -1351,6 +1476,7 @@ static int vfio_group_unset_container(struct vfio_group *group) if (users != 1) return -EBUSY; + wake_up(&vfio.release_q); __vfio_group_unset_container(group); return 0; @@ -1364,7 +1490,11 @@ static int vfio_group_unset_container(struct vfio_group *group) */ static void vfio_group_try_dissolve_container(struct vfio_group *group) { - if (0 == atomic_dec_if_positive(&group->container_users)) + int users = atomic_dec_if_positive(&group->container_users); + + wake_up(&vfio.release_q); + + if (!users) __vfio_group_unset_container(group); } @@ -1433,19 +1563,26 @@ static bool vfio_group_viable(struct vfio_group *group) static int vfio_group_add_container_user(struct vfio_group *group) { + int ret; + if (!atomic_inc_not_zero(&group->container_users)) return -EINVAL; if (group->noiommu) { - atomic_dec(&group->container_users); - return -EPERM; + ret = -EPERM; + goto out; } if (!group->container->iommu_driver || !vfio_group_viable(group)) { - atomic_dec(&group->container_users); - return -EINVAL; + ret = -EINVAL; + goto out; } return 0; + +out: + atomic_dec(&group->container_users); + wake_up(&vfio.release_q); + return ret; } static const struct file_operations vfio_device_fops;
If a device is bound to a non-vfio, non-whitelisted driver while a group is in use, then the integrity of the group is compromised and will result in hitting a BUG_ON. This code tries to avoid this case by mangling driver_override to force a no-match for the driver. The driver-core will either follow-up with a DRIVER_NOT_BOUND (preferred) or BOUND_DRIVER, at which point we can remove the driver_override mangling. A complication here is that even though the window between these notifications is expected to be extremely small, the vfio group could be removed, which would prevent us from finding the group again to remove the driver_override. We therefore take a group reference when adding to driver_override and release it when removed. A second complication is that driver_override can be modified by the system admin through sysfs. To avoid trivial interference, we add a non- user-visible UUID to the group and use this as part of the mangle string. The above blocks binding to a driver that would compromise the host, but we'd also like to avoid reaching that step if possible. For this we add a wait_event_timeout() with a short, 1 second timeout, which is highly effective in allowing empty groups to finish cleanup. Signed-off-by: Alex Williamson <alex.williamson@redhat.com> --- drivers/vfio/vfio.c | 151 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 144 insertions(+), 7 deletions(-)