@@ -69,11 +69,58 @@ static bool a3xx_me_init(struct msm_gpu *gpu)
return a3xx_idle(gpu);
}
-static int a3xx_hw_init(struct msm_gpu *gpu)
+static int a3xx_ucode_init(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
+ const struct firmware *fw;
uint32_t *ptr, len;
+ int i;
+
+ if (!a3xx_gpu->pm4) {
+ fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pm4fw);
+ if (IS_ERR(fw))
+ return PTR_ERR(fw);
+
+ a3xx_gpu->pm4 = fw;
+ }
+
+ if (!a3xx_gpu->pfp) {
+ fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pfpfw);
+ if (IS_ERR(fw))
+ return PTR_ERR(fw);
+
+ a3xx_gpu->pfp = fw;
+ }
+
+ /* Load PM4: */
+ ptr = (uint32_t *)(a3xx_gpu->pm4->data);
+ len = a3xx_gpu->pm4->size / 4;
+ DBG("loading PM4 ucode version: %x", ptr[1]);
+
+ gpu_write(gpu, REG_AXXX_CP_DEBUG,
+ AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
+ AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
+ gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
+ for (i = 1; i < len; i++)
+ gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
+
+ /* Load PFP: */
+ ptr = (uint32_t *)(a3xx_gpu->pfp->data);
+ len = a3xx_gpu->pfp->size / 4;
+ DBG("loading PFP ucode version: %x", ptr[5]);
+
+ gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
+ for (i = 1; i < len; i++)
+ gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
+
+ return 0;
+}
+
+static int a3xx_hw_init(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
int i, ret;
DBG("%s", gpu->name);
@@ -225,6 +272,10 @@ static int a3xx_hw_init(struct msm_gpu *gpu)
if (ret)
return ret;
+ ret = a3xx_ucode_init(gpu);
+ if (ret)
+ return ret;
+
/* setup access protection: */
gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
@@ -249,33 +300,6 @@ static int a3xx_hw_init(struct msm_gpu *gpu)
/* VBIF registers */
gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
- /* NOTE: PM4/micro-engine firmware registers look to be the same
- * for a2xx and a3xx.. we could possibly push that part down to
- * adreno_gpu base class. Or push both PM4 and PFP but
- * parameterize the pfp ucode addr/data registers..
- */
-
- /* Load PM4: */
- ptr = (uint32_t *)(adreno_gpu->pm4->data);
- len = adreno_gpu->pm4->size / 4;
- DBG("loading PM4 ucode version: %x", ptr[1]);
-
- gpu_write(gpu, REG_AXXX_CP_DEBUG,
- AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
- AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
- gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
- for (i = 1; i < len; i++)
- gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
-
- /* Load PFP: */
- ptr = (uint32_t *)(adreno_gpu->pfp->data);
- len = adreno_gpu->pfp->size / 4;
- DBG("loading PFP ucode version: %x", ptr[5]);
-
- gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
- for (i = 1; i < len; i++)
- gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
-
/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) ||
adreno_is_a320(adreno_gpu)) {
@@ -326,6 +350,9 @@ static void a3xx_destroy(struct msm_gpu *gpu)
DBG("%s", gpu->name);
+ release_firmware(a3xx_gpu->pm4);
+ release_firmware(a3xx_gpu->pfp);
+
adreno_gpu_cleanup(adreno_gpu);
#ifdef CONFIG_MSM_OCMEM
@@ -32,6 +32,8 @@ struct a3xx_gpu {
/* if OCMEM is used for GMEM: */
uint32_t ocmem_base;
void *ocmem_hdl;
+
+ const struct firmware *pm4, *pfp;
};
#define to_a3xx_gpu(x) container_of(x, struct a3xx_gpu, base)
@@ -141,12 +141,55 @@ static bool a4xx_me_init(struct msm_gpu *gpu)
return a4xx_idle(gpu);
}
-static int a4xx_hw_init(struct msm_gpu *gpu)
+static int a4xx_ucode_init(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
+ const struct firmware *fw;
uint32_t *ptr, len;
- int i, ret;
+ int i;
+
+ if (!a4xx_gpu->pm4) {
+ fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pm4fw);
+ if (IS_ERR(fw))
+ return PTR_ERR(fw);
+
+ a4xx_gpu->pm4 = fw;
+ }
+
+ if (!a4xx_gpu->pfp) {
+ fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pfpfw);
+ if (IS_ERR(fw))
+ return PTR_ERR(fw);
+
+ a4xx_gpu->pfp = fw;
+ }
+
+ /* Load PM4: */
+ ptr = (uint32_t *)(a4xx_gpu->pm4->data);
+ len = a4xx_gpu->pm4->size / 4;
+ DBG("loading PM4 ucode version: %u", ptr[0]);
+ gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
+ for (i = 1; i < len; i++)
+ gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
+
+ /* Load PFP: */
+ ptr = (uint32_t *)(a4xx_gpu->pfp->data);
+ len = a4xx_gpu->pfp->size / 4;
+ DBG("loading PFP ucode version: %u", ptr[0]);
+
+ gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
+ for (i = 1; i < len; i++)
+ gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
+
+ return 0;
+}
+
+static int a4xx_hw_init(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
+ int ret;
if (adreno_is_a420(adreno_gpu)) {
gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
@@ -273,22 +316,9 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
if (ret)
return ret;
- /* Load PM4: */
- ptr = (uint32_t *)(adreno_gpu->pm4->data);
- len = adreno_gpu->pm4->size / 4;
- DBG("loading PM4 ucode version: %u", ptr[0]);
- gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
- for (i = 1; i < len; i++)
- gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
-
- /* Load PFP: */
- ptr = (uint32_t *)(adreno_gpu->pfp->data);
- len = adreno_gpu->pfp->size / 4;
- DBG("loading PFP ucode version: %u", ptr[0]);
-
- gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
- for (i = 1; i < len; i++)
- gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
+ ret = a4xx_ucode_init(gpu);
+ if (ret)
+ return ret;
/* clear ME_HALT to start micro engine */
gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
@@ -324,6 +354,9 @@ static void a4xx_destroy(struct msm_gpu *gpu)
DBG("%s", gpu->name);
+ release_firmware(a4xx_gpu->pm4);
+ release_firmware(a4xx_gpu->pfp);
+
adreno_gpu_cleanup(adreno_gpu);
#ifdef CONFIG_MSM_OCMEM
@@ -27,6 +27,8 @@ struct a4xx_gpu {
/* if OCMEM is used for GMEM: */
uint32_t ocmem_base;
void *ocmem_hdl;
+
+ const struct firmware *pm4, *pfp;
};
#define to_a4xx_gpu(x) container_of(x, struct a4xx_gpu, base)
@@ -434,20 +434,30 @@ static int a5xx_preempt_start(struct msm_gpu *gpu)
static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu,
- const struct firmware *fw, u64 *iova)
+ const char *fwname, u64 *iova)
{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ const struct firmware *fw;
struct drm_gem_object *bo;
void *ptr;
+ fw = adreno_request_fw(adreno_gpu, fwname);
+ if (IS_ERR(fw))
+ return ERR_CAST(fw);
+
ptr = msm_gem_kernel_new_locked(gpu->dev, fw->size - 4,
MSM_BO_UNCACHED | MSM_BO_GPU_READONLY, gpu->aspace, &bo, iova);
- if (IS_ERR(ptr))
- return ERR_CAST(ptr);
+ if (IS_ERR(ptr)) {
+ bo = ERR_CAST(ptr);
+ goto out;
+ }
memcpy(ptr, &fw->data[4], fw->size - 4);
msm_gem_put_vaddr(bo);
+out:
+ release_firmware(fw);
return bo;
}
@@ -458,8 +468,8 @@ static int a5xx_ucode_init(struct msm_gpu *gpu)
int ret;
if (!a5xx_gpu->pm4_bo) {
- a5xx_gpu->pm4_bo = a5xx_ucode_load_bo(gpu, adreno_gpu->pm4,
- &a5xx_gpu->pm4_iova);
+ a5xx_gpu->pm4_bo = a5xx_ucode_load_bo(gpu,
+ adreno_gpu->info->pm4fw, &a5xx_gpu->pm4_iova);
if (IS_ERR(a5xx_gpu->pm4_bo)) {
ret = PTR_ERR(a5xx_gpu->pm4_bo);
@@ -471,8 +481,8 @@ static int a5xx_ucode_init(struct msm_gpu *gpu)
}
if (!a5xx_gpu->pfp_bo) {
- a5xx_gpu->pfp_bo = a5xx_ucode_load_bo(gpu, adreno_gpu->pfp,
- &a5xx_gpu->pfp_iova);
+ a5xx_gpu->pfp_bo = a5xx_ucode_load_bo(gpu,
+ adreno_gpu->info->pfpfw, &a5xx_gpu->pfp_iova);
if (IS_ERR(a5xx_gpu->pfp_bo)) {
ret = PTR_ERR(a5xx_gpu->pfp_bo);
@@ -138,29 +138,6 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
return ERR_PTR(-ENOENT);
}
-static int adreno_load_fw(struct adreno_gpu *adreno_gpu)
-{
- const struct firmware *fw;
-
- if (adreno_gpu->pm4)
- return 0;
-
- fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pm4fw);
- if (IS_ERR(fw))
- return PTR_ERR(fw);
- adreno_gpu->pm4 = fw;
-
- fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pfpfw);
- if (IS_ERR(fw)) {
- release_firmware(adreno_gpu->pm4);
- adreno_gpu->pm4 = NULL;
- return PTR_ERR(fw);
- }
- adreno_gpu->pfp = fw;
-
- return 0;
-}
-
int adreno_hw_init(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -168,10 +145,6 @@ int adreno_hw_init(struct msm_gpu *gpu)
DBG("%s", gpu->name);
- ret = adreno_load_fw(adreno_gpu);
- if (ret)
- return ret;
-
for (i = 0; i < gpu->nr_rings; i++) {
struct msm_ringbuffer *ring = gpu->rb[i];
@@ -569,8 +542,5 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu)
{
- release_firmware(adreno_gpu->pm4);
- release_firmware(adreno_gpu->pfp);
-
msm_gpu_cleanup(&adreno_gpu->base);
}
@@ -114,9 +114,6 @@ struct adreno_gpu {
FW_LOCATION_HELPER,
} fwloc;
- /* firmware: */
- const struct firmware *pm4, *pfp;
-
/*
* Register offsets are different between some GPUs.
* GPU specific offsets will be exported by GPU specific
Move microcode loading to be target specific. While this results in a bit more code duplication (especially between A3XX/A4XX) this gives us more flexibility for newer targets that don't need to keep an extra copy of the firmware data around in memory. Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org> --- drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 83 ++++++++++++++++++++++----------- drivers/gpu/drm/msm/adreno/a3xx_gpu.h | 2 + drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 69 ++++++++++++++++++++------- drivers/gpu/drm/msm/adreno/a4xx_gpu.h | 2 + drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 24 +++++++--- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 30 ------------ drivers/gpu/drm/msm/adreno/adreno_gpu.h | 3 -- 7 files changed, 127 insertions(+), 86 deletions(-)