@@ -369,14 +369,17 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops)
if (ret)
goto err_release;
+ mutex_lock(&dev_iommu_group_lock);
group = ops->device_group(dev);
if (WARN_ON_ONCE(group == NULL))
group = ERR_PTR(-EINVAL);
if (IS_ERR(group)) {
+ mutex_unlock(&dev_iommu_group_lock);
ret = PTR_ERR(group);
goto err_unlink;
}
dev->iommu_group = group;
+ mutex_unlock(&dev_iommu_group_lock);
dev->iommu->iommu_dev = iommu_dev;
dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
@@ -434,7 +437,9 @@ static void iommu_deinit_device(struct device *dev)
}
/* Caller must put iommu_group */
+ mutex_lock(&dev_iommu_group_lock);
dev->iommu_group = NULL;
+ mutex_unlock(&dev_iommu_group_lock);
module_put(ops->owner);
dev_iommu_free(dev);
}
@@ -449,13 +454,11 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
if (!ops)
return -ENODEV;
/*
- * Serialise to avoid races between IOMMU drivers registering in
- * parallel and/or the "replay" calls from ACPI/OF code via client
- * driver probe. Once the latter have been cleaned up we should
- * probably be able to use device_lock() here to minimise the scope,
- * but for now enforcing a simple global ordering is fine.
+ * Allow __iommu_probe_device() to be safely called in parallel,
+ * both dev->iommu_group and the initial setup of dev->iommu are
+ * protected this way.
*/
- mutex_lock(&dev_iommu_group_lock);
+ device_lock(dev);
/* Device is probed already if in a group */
if (dev->iommu_group) {
@@ -501,7 +504,7 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
list_add_tail(&group->entry, group_list);
}
mutex_unlock(&group->mutex);
- mutex_unlock(&dev_iommu_group_lock);
+ device_unlock(dev);
if (dev_is_pci(dev))
iommu_dma_set_pci_32bit_workaround(dev);
@@ -516,8 +519,7 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
mutex_unlock(&group->mutex);
iommu_group_put(group);
out_unlock:
- mutex_unlock(&dev_iommu_group_lock);
-
+ device_unlock(dev);
return ret;
}
@@ -566,6 +568,7 @@ static void __iommu_group_remove_device(struct device *dev)
struct iommu_group *group = dev->iommu_group;
struct group_device *device;
+ device_lock_assert(dev);
mutex_lock(&group->mutex);
for_each_group_device(group, device) {
if (device->dev != dev)
@@ -590,14 +593,23 @@ static void __iommu_group_remove_device(struct device *dev)
static void iommu_release_device(struct device *dev)
{
- struct iommu_group *group = dev->iommu_group;
+ struct iommu_group *group;
+ /*
+ * This locking for dev->iommu_group is overkill when this is called
+ * from the BUS_NOTIFY_REMOVED_DEVICE, as we don't expect any other
+ * threads to have a reference to the device at that point. Keep it
+ * because this isn't a performance path and helps lockdep analysis.
+ */
+ device_lock(dev);
+ group = dev->iommu_group;
if (group)
__iommu_group_remove_device(dev);
/* Free any fwspec if no iommu_driver was ever attached */
if (dev->iommu)
dev_iommu_free(dev);
+ device_unlock(dev);
}
static int __init iommu_set_def_domain_type(char *str)
@@ -1080,6 +1092,8 @@ static struct group_device *iommu_group_alloc_device(struct iommu_group *group,
int ret, i = 0;
struct group_device *device;
+ device_lock_assert(dev);
+
device = kzalloc(sizeof(*device), GFP_KERNEL);
if (!device)
return ERR_PTR(-ENOMEM);
@@ -1141,9 +1155,12 @@ int iommu_group_add_device(struct iommu_group *group, struct device *dev)
{
struct group_device *gdev;
+ device_lock(dev);
gdev = iommu_group_alloc_device(group, dev);
- if (IS_ERR(gdev))
+ if (IS_ERR(gdev)) {
+ device_unlock(dev);
return PTR_ERR(gdev);
+ }
iommu_group_ref_get(group);
dev->iommu_group = group;
@@ -1151,6 +1168,7 @@ int iommu_group_add_device(struct iommu_group *group, struct device *dev)
mutex_lock(&group->mutex);
list_add_tail(&gdev->list, &group->devices);
mutex_unlock(&group->mutex);
+ device_unlock(dev);
return 0;
}
EXPORT_SYMBOL_GPL(iommu_group_add_device);
@@ -1164,14 +1182,16 @@ EXPORT_SYMBOL_GPL(iommu_group_add_device);
*/
void iommu_group_remove_device(struct device *dev)
{
- struct iommu_group *group = dev->iommu_group;
+ struct iommu_group *group;
- if (!group)
- return;
+ device_lock(dev);
+ group = dev->iommu_group;
+ if (group) {
+ dev_info(dev, "Removing from iommu group %d\n", group->id);
+ __iommu_group_remove_device(dev);
+ }
+ device_unlock(dev);
- dev_info(dev, "Removing from iommu group %d\n", group->id);
-
- __iommu_group_remove_device(dev);
}
EXPORT_SYMBOL_GPL(iommu_group_remove_device);
Revise the locking for dev->iommu_group so that it has three safe ways to access it: - It is read by a probe'd device driver. So long as a device driver is probed the dev->iommu_group will be guaranteed stable without further locking. - Read under the device_lock(), this primarily protects against parallel probe of the same device, and parallel probe/remove - Read/Write under the global dev_iommu_group_lock. This is used during probe time discovery of groups. Device drivers will scan unlocked portions of the device tree to locate an already existing group. These scans can access the dev->iommu_group under the global lock to single thread determining and installing the group. This ensures that groups are reliably formed. Narrow the scope of the global dev_iommu_group_lock to be only during the dev->iommu_group setup, and not for the entire probing. Prior patches removed the various races inherent to the probe process by consolidating all the work under the group->mutex. In this configuration it is fine if two devices race to the group_device step of a new iommu_group, the group->mutex locking will ensure the group_device and domain setup part remains properly ordered. Add the missing locking on the remove paths. For iommu_deinit_device() it is necessary to hold the dev_iommu_group_lock due to possible races during probe error unwind. Fully lock the iommu_group_add/remove_device() path so we can use lockdep assertions. Other than lockdep this is redundant, VFIO no-iommu doesn't use group clustering. For iommu_release_device() it is redundant, as we expect no external references to the struct device by this point, but it is harmless so add the missing lock to allow lockdep assertions to work. This resolves the remarks of the comment in __iommu_probe_device(). Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> --- drivers/iommu/iommu.c | 54 +++++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 17 deletions(-)