diff mbox series

[12/12] drm/tegra: Optionally attach clients to the IOMMU

Message ID 20191028123718.3890217-13-thierry.reding@gmail.com (mailing list archive)
State New, archived
Headers show
Series drm/tegra: Support IOMMU-backed DMA API | expand

Commit Message

Thierry Reding Oct. 28, 2019, 12:37 p.m. UTC
From: Thierry Reding <treding@nvidia.com>

If a client is already attached to an IOMMU domain that is not the
shared domain, don't try to attach it again. This allows using the
IOMMU-backed DMA API.

Since the IOMMU-backed DMA API is now supported and there's no way
to detach from it on 64-bit ARM, don't bother to detach from it on
32-bit ARM either.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.c | 66 +++++++++++++++++++++++++++----------
 drivers/gpu/drm/tegra/drm.h |  1 +
 2 files changed, 49 insertions(+), 18 deletions(-)
diff mbox series

Patch

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index efc8a27b9e6a..56e5e7a5c108 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -20,10 +20,6 @@ 
 #include <drm/drm_prime.h>
 #include <drm/drm_vblank.h>
 
-#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
-#include <asm/dma-iommu.h>
-#endif
-
 #include "drm.h"
 #include "gem.h"
 
@@ -908,30 +904,27 @@  int tegra_drm_unregister_client(struct tegra_drm *tegra,
 
 int host1x_client_iommu_attach(struct host1x_client *client)
 {
+	struct iommu_domain *domain = iommu_get_domain_for_dev(client->dev);
 	struct drm_device *drm = dev_get_drvdata(client->parent);
 	struct tegra_drm *tegra = drm->dev_private;
 	struct iommu_group *group = NULL;
 	int err;
 
-	if (tegra->domain) {
-		struct iommu_domain *domain;
+	/*
+	 * If the host1x client is already attached to an IOMMU domain that is
+	 * not the shared IOMMU domain, don't try to attach it to a different
+	 * domain. This allows using the IOMMU-backed DMA API.
+	 */
+	if (domain && domain != tegra->domain)
+		return 0;
 
+	if (tegra->domain) {
 		group = iommu_group_get(client->dev);
 		if (!group) {
 			dev_err(client->dev, "failed to get IOMMU group\n");
 			return -ENODEV;
 		}
 
-#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
-		if (client->dev->archdata.mapping) {
-			struct dma_iommu_mapping *mapping =
-				to_dma_iommu_mapping(client->dev);
-			arm_iommu_detach_device(client->dev);
-			arm_iommu_release_mapping(mapping);
-		}
-#endif
-
-		domain = iommu_get_domain_for_dev(client->dev);
 		if (domain != tegra->domain) {
 			err = iommu_attach_group(tegra->domain, group);
 			if (err < 0) {
@@ -939,6 +932,8 @@  int host1x_client_iommu_attach(struct host1x_client *client)
 				return err;
 			}
 		}
+
+		tegra->use_explicit_iommu = true;
 	}
 
 	client->group = group;
@@ -963,6 +958,7 @@  void host1x_client_iommu_detach(struct host1x_client *client)
 			iommu_detach_group(tegra->domain, client->group);
 
 		iommu_group_put(client->group);
+		client->group = NULL;
 	}
 }
 
@@ -1046,6 +1042,7 @@  void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt,
 static int host1x_drm_probe(struct host1x_device *dev)
 {
 	struct drm_driver *driver = &tegra_drm_driver;
+	struct iommu_domain *domain;
 	struct tegra_drm *tegra;
 	struct drm_device *drm;
 	int err;
@@ -1060,7 +1057,36 @@  static int host1x_drm_probe(struct host1x_device *dev)
 		goto put;
 	}
 
-	if (iommu_present(&platform_bus_type)) {
+	/*
+	 * If the Tegra DRM clients are backed by an IOMMU, push buffers are
+	 * likely to be allocated beyond the 32-bit boundary if sufficient
+	 * system memory is available. This is problematic on earlier Tegra
+	 * generations where host1x supports a maximum of 32 address bits in
+	 * the GATHER opcode. In this case, unless host1x is behind an IOMMU
+	 * as well it won't be able to process buffers allocated beyond the
+	 * 32-bit boundary.
+	 *
+	 * The DMA API will use bounce buffers in this case, so that could
+	 * perhaps still be made to work, even if less efficient, but there
+	 * is another catch: in order to perform cache maintenance on pages
+	 * allocated for discontiguous buffers we need to map and unmap the
+	 * SG table representing these buffers. This is fine for something
+	 * small like a push buffer, but it exhausts the bounce buffer pool
+	 * (typically on the order of a few MiB) for framebuffers (many MiB
+	 * for any modern resolution).
+	 *
+	 * Work around this by making sure that Tegra DRM clients only use
+	 * an IOMMU if the parent host1x also uses an IOMMU.
+	 *
+	 * Note that there's still a small gap here that we don't cover: if
+	 * the DMA API is backed by an IOMMU there's no way to control which
+	 * device is attached to an IOMMU and which isn't, except via wiring
+	 * up the device tree appropriately. This is considered an problem
+	 * of integration, so care must be taken for the DT to be consistent.
+	 */
+	domain = iommu_get_domain_for_dev(drm->dev->parent);
+
+	if (domain && iommu_present(&platform_bus_type)) {
 		tegra->domain = iommu_domain_alloc(&platform_bus_type);
 		if (!tegra->domain) {
 			err = -ENOMEM;
@@ -1104,7 +1130,7 @@  static int host1x_drm_probe(struct host1x_device *dev)
 	if (err < 0)
 		goto fbdev;
 
-	if (tegra->domain) {
+	if (tegra->use_explicit_iommu) {
 		u64 carveout_start, carveout_end, gem_start, gem_end;
 		u64 dma_mask = dma_get_mask(&dev->dev);
 		dma_addr_t start, end;
@@ -1132,6 +1158,10 @@  static int host1x_drm_probe(struct host1x_device *dev)
 		DRM_DEBUG_DRIVER("  GEM: %#llx-%#llx\n", gem_start, gem_end);
 		DRM_DEBUG_DRIVER("  Carveout: %#llx-%#llx\n", carveout_start,
 				 carveout_end);
+	} else if (tegra->domain) {
+		iommu_domain_free(tegra->domain);
+		tegra->domain = NULL;
+		iova_cache_put();
 	}
 
 	if (tegra->hub) {
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 28f2820a7371..d941553f7a3d 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -36,6 +36,7 @@  struct tegra_drm {
 	struct drm_device *drm;
 
 	struct iommu_domain *domain;
+	bool use_explicit_iommu;
 	struct mutex mm_lock;
 	struct drm_mm mm;