@@ -9,6 +9,8 @@
#include "a6xx_gmu.xml.h"
#include <linux/devfreq.h>
+#include <linux/bitfield.h>
+#include <linux/soc/qcom/llcc-qcom.h>
#define GPU_PAS_ID 13
@@ -969,6 +971,79 @@ static const u32 a6xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A6XX_CP_RB_CNTL),
};
+static void a6xx_llc_rmw(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 mask, u32 or)
+{
+ return msm_rmw(a6xx_gpu->llc_mmio + (reg << 2), mask, or);
+}
+
+static void a6xx_llc_write(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 value)
+{
+ return msm_writel(value, a6xx_gpu->llc_mmio + (reg << 2));
+}
+
+static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
+{
+ llcc_slice_deactivate(a6xx_gpu->llc_slice);
+ llcc_slice_deactivate(a6xx_gpu->htw_llc_slice);
+}
+
+static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
+{
+ u32 cntl1_regval = 0;
+
+ if (IS_ERR(a6xx_gpu->llc_mmio))
+ return;
+
+ if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
+ u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
+
+ gpu_scid &= 0x1f;
+ cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) |
+ (gpu_scid << 15) | (gpu_scid << 20);
+ }
+
+ if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
+ u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
+
+ gpuhtw_scid &= 0x1f;
+ cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
+ }
+
+ if (cntl1_regval) {
+ /*
+ * Program the slice IDs for the various GPU blocks and GPU MMU
+ * pagetables
+ */
+ a6xx_llc_write(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval);
+
+ /*
+ * Program cacheability overrides to not allocate cache lines on
+ * a write miss
+ */
+ a6xx_llc_rmw(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03);
+ }
+}
+
+static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
+{
+ llcc_slice_putd(a6xx_gpu->llc_slice);
+ llcc_slice_putd(a6xx_gpu->htw_llc_slice);
+}
+
+static void a6xx_llc_slices_init(struct platform_device *pdev,
+ struct a6xx_gpu *a6xx_gpu)
+{
+ a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx");
+ if (IS_ERR(a6xx_gpu->llc_mmio))
+ return;
+
+ a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
+ a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
+
+ if (IS_ERR(a6xx_gpu->llc_slice) && IS_ERR(a6xx_gpu->htw_llc_slice))
+ a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL);
+}
+
static int a6xx_pm_resume(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -985,6 +1060,8 @@ static int a6xx_pm_resume(struct msm_gpu *gpu)
msm_gpu_resume_devfreq(gpu);
+ a6xx_llc_activate(a6xx_gpu);
+
return 0;
}
@@ -995,6 +1072,8 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
trace_msm_gpu_suspend(0);
+ a6xx_llc_deactivate(a6xx_gpu);
+
devfreq_suspend_device(gpu->devfreq.devfreq);
return a6xx_gmu_stop(a6xx_gpu);
@@ -1033,6 +1112,8 @@ static void a6xx_destroy(struct msm_gpu *gpu)
drm_gem_object_put(a6xx_gpu->sqe_bo);
}
+ a6xx_llc_slices_destroy(a6xx_gpu);
+
a6xx_gmu_remove(a6xx_gpu);
adreno_gpu_cleanup(adreno_gpu);
@@ -1132,6 +1213,8 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
if (adreno_is_a650(adreno_gpu))
adreno_gpu->base.hw_apriv = true;
+ a6xx_llc_slices_init(pdev, a6xx_gpu);
+
ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
if (ret) {
a6xx_destroy(&(a6xx_gpu->base.base));
@@ -22,6 +22,10 @@ struct a6xx_gpu {
struct msm_file_private *cur_ctx;
struct a6xx_gmu gmu;
+
+ void __iomem *llc_mmio;
+ void *llc_slice;
+ void *htw_llc_slice;
};
#define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base)
@@ -16,6 +16,7 @@
#include <linux/soc/qcom/mdt_loader.h>
#include <soc/qcom/ocmem.h>
#include "adreno_gpu.h"
+#include "a6xx_gpu.h"
#include "msm_gem.h"
#include "msm_mmu.h"
@@ -190,10 +191,28 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu,
struct platform_device *pdev)
{
struct iommu_domain *iommu = iommu_domain_alloc(&platform_bus_type);
- struct msm_mmu *mmu = msm_iommu_new(&pdev->dev, iommu);
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
struct msm_gem_address_space *aspace;
+ struct msm_mmu *mmu;
u64 start, size;
+ /*
+ * This allows GPU to set the bus attributes required to use system
+ * cache on behalf of the iommu page table walker.
+ */
+ if (!IS_ERR(a6xx_gpu->htw_llc_slice)) {
+ int gpu_htw_llc = 1;
+
+ iommu_domain_set_attr(iommu, DOMAIN_ATTR_SYS_CACHE, &gpu_htw_llc);
+ }
+
+ mmu = msm_iommu_new(&pdev->dev, iommu);
+ if (IS_ERR(mmu)) {
+ iommu_domain_free(iommu);
+ return ERR_CAST(mmu);
+ }
+
/*
* Use the aperture start or SZ_16M, whichever is greater. This will
* ensure that we align with the allocated pagetable range while still