@@ -867,7 +867,9 @@
qcom,skip-init;
qcom,register-save;
+
arm,smmu-enable-stall;
+ qcom,dynamic;
status = "okay";
};
@@ -18,7 +18,7 @@
#include <linux/dma-mapping.h>
#include <linux/of_reserved_mem.h>
#include "msm_gem.h"
-#include "msm_mmu.h"
+#include "msm_iommu.h"
#include "a5xx_gpu.h"
extern bool hang_debug;
@@ -209,6 +209,66 @@ static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
}
+static void a5xx_set_pagetable(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
+ struct msm_file_private *ctx)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct msm_mmu *mmu = ctx->aspace->mmu;
+ struct msm_iommu *iommu = to_msm_iommu(mmu);
+
+ if (!iommu->ttbr0)
+ return;
+
+ /* Turn off protected mode */
+ OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+ OUT_RING(ring, 0);
+
+ /* Turn on APIV mode to access critical regions */
+ OUT_PKT4(ring, REG_A5XX_CP_CNTL, 1);
+ OUT_RING(ring, 1);
+
+ /* Make sure the ME is syncronized before staring the update */
+ OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
+
+ /* Execute the table update */
+ OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 3);
+ OUT_RING(ring, lower_32_bits(iommu->ttbr0));
+ OUT_RING(ring, upper_32_bits(iommu->ttbr0));
+ OUT_RING(ring, iommu->contextidr);
+
+ /*
+ * Write the new TTBR0 to the preemption records - this will be used to
+ * reload the pagetable if the current ring gets preempted out.
+ */
+ OUT_PKT7(ring, CP_MEM_WRITE, 4);
+ OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, ring->id, ttbr0)));
+ OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, ring->id, ttbr0)));
+ OUT_RING(ring, lower_32_bits(iommu->ttbr0));
+ OUT_RING(ring, upper_32_bits(iommu->ttbr0));
+
+ /* Also write the current contextidr (ASID) */
+ OUT_PKT7(ring, CP_MEM_WRITE, 3);
+ OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, ring->id,
+ contextidr)));
+ OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, ring->id,
+ contextidr)));
+ OUT_RING(ring, iommu->contextidr);
+
+ /* Invalidate the draw state so we start off fresh */
+ OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
+ OUT_RING(ring, 0x40000);
+ OUT_RING(ring, 1);
+ OUT_RING(ring, 0);
+
+ /* Turn off APRIV */
+ OUT_PKT4(ring, REG_A5XX_CP_CNTL, 1);
+ OUT_RING(ring, 0);
+
+ /* Turn off protected mode */
+ OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+ OUT_RING(ring, 1);
+}
+
static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
struct msm_file_private *ctx)
{
@@ -219,6 +279,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
struct msm_ringbuffer *ring = submit->ring;
unsigned int i, ibs = 0;
+ a5xx_set_pagetable(gpu, ring, ctx);
+
OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
OUT_RING(ring, 0x02);
@@ -47,6 +47,9 @@ struct a5xx_gpu {
atomic_t preempt_state;
struct timer_list preempt_timer;
+ struct a5xx_smmu_info *smmu_info;
+ struct drm_gem_object *smmu_info_bo;
+ uint64_t smmu_info_iova;
};
#define to_a5xx_gpu(x) container_of(x, struct a5xx_gpu, base)
@@ -127,6 +130,20 @@ struct a5xx_preempt_record {
*/
#define A5XX_PREEMPT_COUNTER_SIZE (16 * 4)
+/*
+ * This is a global structure that the preemption code uses to switch in the
+ * pagetable for the preempted process - the code switches in whatever we
+ * after preempting in a new ring.
+ */
+struct a5xx_smmu_info {
+ uint32_t magic;
+ uint32_t _pad4;
+ uint64_t ttbr0;
+ uint32_t asid;
+ uint32_t contextidr;
+};
+
+#define A5XX_SMMU_INFO_MAGIC 0x3618CDA3UL
int a5xx_power_init(struct msm_gpu *gpu);
void a5xx_gpmu_ucode_init(struct msm_gpu *gpu);
@@ -12,6 +12,7 @@
*/
#include "msm_gem.h"
+#include "msm_iommu.h"
#include "a5xx_gpu.h"
static void *alloc_kernel_bo(struct drm_device *drm, struct msm_gpu *gpu,
@@ -172,6 +173,17 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu)
a5xx_gpu->preempt[ring->id]->wptr = get_wptr(ring);
spin_unlock_irqrestore(&ring->lock, flags);
+ /* Do read barrier to make sure we have updated pagetable info */
+ rmb();
+
+ /* Set the SMMU info for the preemption */
+ if (a5xx_gpu->smmu_info) {
+ a5xx_gpu->smmu_info->ttbr0 =
+ adreno_gpu->memptrs->ttbr0[ring->id];
+ a5xx_gpu->smmu_info->contextidr =
+ adreno_gpu->memptrs->contextidr[ring->id];
+ }
+
/* Set the address of the incoming preemption record */
gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO,
REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI,
@@ -247,9 +259,10 @@ void a5xx_preempt_hw_init(struct msm_gpu *gpu)
}
}
- /* Write a 0 to signal that we aren't switching pagetables */
+ /* Tell the CP where to find the smmu_info buffer */
gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
- REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, 0);
+ REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+ a5xx_gpu->smmu_info_iova);
/* Reset the preemption state */
set_preempt_state(a5xx_gpu, PREEMPT_NONE);
@@ -311,6 +324,13 @@ void a5xx_preempt_fini(struct msm_gpu *gpu)
a5xx_gpu->preempt_bo[i] = NULL;
}
+
+ if (a5xx_gpu->smmu_info_bo) {
+ if (a5xx_gpu->smmu_info_iova)
+ msm_gem_put_iova(a5xx_gpu->smmu_info_bo, gpu->aspace);
+ drm_gem_object_unreference_unlocked(a5xx_gpu->smmu_info_bo);
+ a5xx_gpu->smmu_info_bo = NULL;
+ }
}
void a5xx_preempt_init(struct msm_gpu *gpu)
@@ -318,6 +338,9 @@ void a5xx_preempt_init(struct msm_gpu *gpu)
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
struct msm_ringbuffer *ring;
+ struct a5xx_smmu_info *ptr;
+ struct drm_gem_object *bo;
+ uint64_t iova;
int i;
/* No preemption if we only have one ring */
@@ -328,18 +351,34 @@ void a5xx_preempt_init(struct msm_gpu *gpu)
if (!ring)
continue;
- if (preempt_init_ring(a5xx_gpu, ring)) {
- /*
- * On any failure our adventure is over. Clean up and
- * set nr_rings to 1 to force preemption off
- */
- a5xx_preempt_fini(gpu);
- gpu->nr_rings = 1;
+ if (preempt_init_ring(a5xx_gpu, ring))
+ goto fail;
+ }
+
+ if (msm_iommu_allow_dynamic(gpu->aspace->mmu)) {
+ ptr = alloc_kernel_bo(gpu->dev, gpu,
+ sizeof(struct a5xx_smmu_info),
+ MSM_BO_UNCACHED, &bo, &iova);
- return;
- }
+ if (IS_ERR(ptr))
+ goto fail;
+
+ ptr->magic = A5XX_SMMU_INFO_MAGIC;
+
+ a5xx_gpu->smmu_info_bo = bo;
+ a5xx_gpu->smmu_info_iova = iova;
+ a5xx_gpu->smmu_info = ptr;
}
setup_timer(&a5xx_gpu->preempt_timer, a5xx_preempt_timer,
(unsigned long) a5xx_gpu);
+
+ return;
+fail:
+ /*
+ * On any failure our adventure is over. Clean up and
+ * set nr_rings to 1 to force preemption off
+ */
+ a5xx_preempt_fini(gpu);
+ gpu->nr_rings = 1;
}
@@ -93,6 +93,8 @@ struct adreno_info {
struct adreno_rbmemptrs {
volatile uint32_t rptr[MSM_GPU_MAX_RINGS];
volatile uint32_t fence[MSM_GPU_MAX_RINGS];
+ volatile uint64_t ttbr0[MSM_GPU_MAX_RINGS];
+ volatile unsigned int contextidr[MSM_GPU_MAX_RINGS];
};
struct adreno_gpu {
@@ -22,6 +22,8 @@
#include "msm_fence.h"
#include "msm_gpu.h"
#include "msm_kms.h"
+#include "msm_gem.h"
+#include "msm_mmu.h"
/*
@@ -515,11 +517,37 @@ static int msm_open(struct drm_device *dev, struct drm_file *file)
*/
load_gpu(dev);
+ if (!priv->gpu)
+ return 0;
+
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
- ctx->aspace = priv->gpu->aspace;
+ /*
+ * FIXME: we will want to use a dynamic name of some sort
+ * FIXME: WE will need a smarter way to set the range based on target
+ */
+ ctx->aspace = msm_gem_address_space_create_instance(
+ priv->gpu->aspace->mmu, "gpu", 0x100000000, 0x1ffffffff);
+
+ if (IS_ERR(ctx->aspace)) {
+ int ret = PTR_ERR(ctx->aspace);
+
+ /*
+ * If dynamic domains are not supported, everybody uses the same
+ * pagetable
+ */
+ if (ret == -EOPNOTSUPP)
+ ctx->aspace = priv->gpu->aspace;
+ else {
+ kfree(ctx);
+ return ret;
+ }
+ } else {
+ ctx->aspace->mmu->funcs->attach(ctx->aspace->mmu, NULL, 0);
+ }
+
file->driver_priv = ctx;
return 0;
@@ -534,10 +562,25 @@ static void msm_preclose(struct drm_device *dev, struct drm_file *file)
if (ctx == priv->lastctx)
priv->lastctx = NULL;
mutex_unlock(&dev->struct_mutex);
+}
+
+static void msm_postclose(struct drm_device *dev, struct drm_file *file)
+{
+ struct msm_drm_private *priv = dev->dev_private;
+ struct msm_file_private *ctx = file->driver_priv;
+
+
+ mutex_lock(&dev->struct_mutex);
+ if (ctx && ctx->aspace && ctx->aspace != priv->gpu->aspace) {
+ ctx->aspace->mmu->funcs->detach(ctx->aspace->mmu, NULL, 0);
+ msm_gem_address_space_put(ctx->aspace);
+ }
+ mutex_unlock(&dev->struct_mutex);
kfree(ctx);
}
+
static void msm_lastclose(struct drm_device *dev)
{
struct msm_drm_private *priv = dev->dev_private;
@@ -684,17 +727,6 @@ static int msm_ioctl_gem_cpu_fini(struct drm_device *dev, void *data,
return ret;
}
-static int msm_ioctl_gem_info_iova(struct drm_device *dev,
- struct drm_gem_object *obj, uint64_t *iova)
-{
- struct msm_drm_private *priv = dev->dev_private;
-
- if (!priv->gpu)
- return -EINVAL;
-
- return msm_gem_get_iova(obj, priv->gpu->aspace, iova);
-}
-
static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
struct drm_file *file)
{
@@ -710,9 +742,10 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
return -ENOENT;
if (args->flags & MSM_INFO_IOVA) {
+ struct msm_file_private *ctx = file->driver_priv;
uint64_t iova;
- ret = msm_ioctl_gem_info_iova(dev, obj, &iova);
+ ret = msm_gem_get_iova(obj, ctx->aspace, &iova);
if (!ret)
args->offset = iova;
} else {
@@ -818,6 +851,7 @@ static int msm_ioctl_gem_madvise(struct drm_device *dev, void *data,
DRIVER_MODESET,
.open = msm_open,
.preclose = msm_preclose,
+ .postclose = msm_postclose,
.lastclose = msm_lastclose,
.irq_handler = msm_irq,
.irq_preinstall = msm_irq_preinstall,
@@ -191,6 +191,9 @@ int msm_gem_map_vma(struct msm_gem_address_space *aspace,
struct msm_gem_address_space *
msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain,
const char *name);
+struct msm_gem_address_space *
+msm_gem_address_space_create_instance(struct msm_mmu *parent, const char *name,
+ uint64_t start, uint64_t end);
void msm_gem_submit_free(struct msm_gem_submit *submit);
int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
@@ -85,9 +85,9 @@ void msm_gem_address_space_put(struct msm_gem_address_space *aspace)
return ret;
}
-struct msm_gem_address_space *
-msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain,
- const char *name)
+static struct msm_gem_address_space *
+msm_gem_address_space_new(struct msm_mmu *mmu, const char *name,
+ uint64_t start, uint64_t end)
{
struct msm_gem_address_space *aspace;
@@ -96,12 +96,38 @@ struct msm_gem_address_space *
return ERR_PTR(-ENOMEM);
aspace->name = name;
- aspace->mmu = msm_iommu_new(dev, domain);
+ aspace->mmu = mmu;
- drm_mm_init(&aspace->mm, (domain->geometry.aperture_start >> PAGE_SHIFT),
- (domain->geometry.aperture_end >> PAGE_SHIFT) - 1);
+ drm_mm_init(&aspace->mm, (start >> PAGE_SHIFT),
+ (end >> PAGE_SHIFT) - 1);
kref_init(&aspace->kref);
return aspace;
}
+
+struct msm_gem_address_space *
+msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain,
+ const char *name)
+{
+ struct msm_mmu *mmu = msm_iommu_new(dev, domain);
+
+ if (IS_ERR(mmu))
+ return (struct msm_gem_address_space *) mmu;
+
+ return msm_gem_address_space_new(mmu, name,
+ domain->geometry.aperture_start,
+ domain->geometry.aperture_end);
+}
+
+struct msm_gem_address_space *
+msm_gem_address_space_create_instance(struct msm_mmu *parent, const char *name,
+ uint64_t start, uint64_t end)
+{
+ struct msm_mmu *child = msm_iommu_new_dynamic(parent);
+
+ if (IS_ERR(child))
+ return (struct msm_gem_address_space *) child;
+
+ return msm_gem_address_space_new(child, name, start, end);
+}
Support per-instance pagetables for 5XX targets. Per-instance pagetables allow each open DRM instance to have its own VM memory space to prevent accidently or maliciously copying or overwriting buffers from other instances. It also opens the door for SVM since any given CPU side address can be more reliably mapped into the instance's GPU VM space without conflict. To support this create a new dynamic domain (pagetable) for each open DRM file and map buffer objects for each instance into that pagetable. Use the GPU to switch to the pagetable for the instance while doing a submit. Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org> --- arch/arm64/boot/dts/qcom/msm8996.dtsi | 2 + drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 64 ++++++++++++++++++++++++++++++- drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 17 ++++++++ drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 61 +++++++++++++++++++++++------ drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 + drivers/gpu/drm/msm/msm_drv.c | 60 ++++++++++++++++++++++------- drivers/gpu/drm/msm/msm_drv.h | 3 ++ drivers/gpu/drm/msm/msm_gem_vma.c | 38 +++++++++++++++--- 8 files changed, 216 insertions(+), 31 deletions(-)