Message ID | 20181017162818.8498-8-jcrouse@codeaurora.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/msm GPU updates for 4.21 | expand |
On 10/17/2018 9:58 PM, Jordan Crouse wrote: > Add support for gathering and dumping the a6xx GPU state including > registers, GMU registers, indexed registers, shader blocks, > context clusters and debugbus. > > Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org> > --- > drivers/gpu/drm/msm/Makefile | 1 + > drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 25 +- > drivers/gpu/drm/msm/adreno/a6xx_gmu.h | 3 + > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 39 +- > drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 8 + > drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 1159 +++++++++++++++++++ > drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h | 430 +++++++ > 7 files changed, 1627 insertions(+), 38 deletions(-) > create mode 100644 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c > create mode 100644 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h > > diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile > index 19ab521d4c3a..33645c6539ee 100644 > --- a/drivers/gpu/drm/msm/Makefile > +++ b/drivers/gpu/drm/msm/Makefile > @@ -14,6 +14,7 @@ msm-y := \ > adreno/a6xx_gpu.o \ > adreno/a6xx_gmu.o \ > adreno/a6xx_hfi.o \ > + adreno/a6xx_gpu_state.o \ > hdmi/hdmi.o \ > hdmi/hdmi_audio.o \ > hdmi/hdmi_bridge.o \ > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c > index d4e98e5876bc..089b013d7bb9 100644 > --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c > @@ -51,10 +51,31 @@ static irqreturn_t a6xx_hfi_irq(int irq, void *data) > return IRQ_HANDLED; > } > > +bool a6xx_gmu_sptprac_is_on(struct a6xx_gmu *gmu) > +{ > + u32 val; > + > + /* This can be called from gpu state code so make sure GMU is valid */ > + if (IS_ERR_OR_NULL(gmu->mmio)) > + return false; > + > + val = gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS); > + > + return !(val & > + (A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_SPTPRAC_GDSC_POWER_OFF | > + A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_SP_CLOCK_OFF)); > +} > + > /* Check to see if the GX rail is still powered */ > -static bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu) > +bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu) > { > - u32 val = gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS); > + u32 val; > + > + /* This can be called from gpu state code so make sure GMU is valid */ > + if (IS_ERR_OR_NULL(gmu->mmio)) > + return false; > + > + val = gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS); > > return !(val & > (A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_GDSC_POWER_OFF | > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h > index 35f765afae45..c721d9165d8e 100644 > --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h > @@ -164,4 +164,7 @@ void a6xx_hfi_init(struct a6xx_gmu *gmu); > int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state); > void a6xx_hfi_stop(struct a6xx_gmu *gmu); > > +bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu); > +bool a6xx_gmu_sptprac_is_on(struct a6xx_gmu *gmu); > + > #endif > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > index 631257c297fd..3afd4df2e250 100644 > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > @@ -645,33 +645,6 @@ static const u32 a6xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { > REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A6XX_CP_RB_CNTL), > }; > > -static const u32 a6xx_registers[] = { > - 0x0000, 0x0002, 0x0010, 0x0010, 0x0012, 0x0012, 0x0018, 0x001b, > - 0x001e, 0x0032, 0x0038, 0x003c, 0x0042, 0x0042, 0x0044, 0x0044, > - 0x0047, 0x0047, 0x0056, 0x0056, 0x00ad, 0x00ae, 0x00b0, 0x00fb, > - 0x0100, 0x011d, 0x0200, 0x020d, 0x0210, 0x0213, 0x0218, 0x023d, > - 0x0400, 0x04f9, 0x0500, 0x0500, 0x0505, 0x050b, 0x050e, 0x0511, > - 0x0533, 0x0533, 0x0540, 0x0555, 0x0800, 0x0808, 0x0810, 0x0813, > - 0x0820, 0x0821, 0x0823, 0x0827, 0x0830, 0x0833, 0x0840, 0x0843, > - 0x084f, 0x086f, 0x0880, 0x088a, 0x08a0, 0x08ab, 0x08c0, 0x08c4, > - 0x08d0, 0x08dd, 0x08f0, 0x08f3, 0x0900, 0x0903, 0x0908, 0x0911, > - 0x0928, 0x093e, 0x0942, 0x094d, 0x0980, 0x0984, 0x098d, 0x0996, > - 0x0998, 0x099e, 0x09a0, 0x09a6, 0x09a8, 0x09ae, 0x09b0, 0x09b1, > - 0x09c2, 0x09c8, 0x0a00, 0x0a03, 0x0c00, 0x0c04, 0x0c06, 0x0c06, > - 0x0c10, 0x0cd9, 0x0e00, 0x0e0e, 0x0e10, 0x0e13, 0x0e17, 0x0e19, > - 0x0e1c, 0x0e2b, 0x0e30, 0x0e32, 0x0e38, 0x0e39, 0x8600, 0x8601, > - 0x8610, 0x861b, 0x8620, 0x8620, 0x8628, 0x862b, 0x8630, 0x8637, > - 0x8e01, 0x8e01, 0x8e04, 0x8e05, 0x8e07, 0x8e08, 0x8e0c, 0x8e0c, > - 0x8e10, 0x8e1c, 0x8e20, 0x8e25, 0x8e28, 0x8e28, 0x8e2c, 0x8e2f, > - 0x8e3b, 0x8e3e, 0x8e40, 0x8e43, 0x8e50, 0x8e5e, 0x8e70, 0x8e77, > - 0x9600, 0x9604, 0x9624, 0x9637, 0x9e00, 0x9e01, 0x9e03, 0x9e0e, > - 0x9e11, 0x9e16, 0x9e19, 0x9e19, 0x9e1c, 0x9e1c, 0x9e20, 0x9e23, > - 0x9e30, 0x9e31, 0x9e34, 0x9e34, 0x9e70, 0x9e72, 0x9e78, 0x9e79, > - 0x9e80, 0x9fff, 0xa600, 0xa601, 0xa603, 0xa603, 0xa60a, 0xa60a, > - 0xa610, 0xa617, 0xa630, 0xa630, > - ~0 > -}; > - > static int a6xx_pm_resume(struct msm_gpu *gpu) > { > struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > @@ -724,14 +697,6 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) > return 0; > } > > -#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) > -static void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, > - struct drm_printer *p) > -{ > - adreno_show(gpu, state, p); > -} > -#endif > - > static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu) > { > struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > @@ -796,6 +761,8 @@ static const struct adreno_gpu_funcs funcs = { > .gpu_busy = a6xx_gpu_busy, > .gpu_get_freq = a6xx_gmu_get_freq, > .gpu_set_freq = a6xx_gmu_set_freq, > + .gpu_state_get = a6xx_gpu_state_get, > + .gpu_state_put = a6xx_gpu_state_put, > }, > .get_timestamp = a6xx_get_timestamp, > }; > @@ -817,7 +784,7 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) > adreno_gpu = &a6xx_gpu->base; > gpu = &adreno_gpu->base; > > - adreno_gpu->registers = a6xx_registers; > + adreno_gpu->registers = NULL; > adreno_gpu->reg_offsets = a6xx_register_offsets; > > ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > index 4127dcebc202..528a4cfe07cd 100644 > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > @@ -56,6 +56,14 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state); > > int a6xx_gmu_probe(struct a6xx_gpu *a6xx_gpu, struct device_node *node); > void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu); > + > void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq); > unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu); > + > +void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, > + struct drm_printer *p); > + > +struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu); > +int a6xx_gpu_state_put(struct msm_gpu_state *state); > + > #endif /* __A6XX_GPU_H__ */ > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c > new file mode 100644 > index 000000000000..d46b94462bb5 > --- /dev/null > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c > @@ -0,0 +1,1159 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2018 The Linux Foundation. All rights reserved. */ > + > +#include <linux/ascii85.h> > +#include "msm_gem.h" > +#include "a6xx_gpu.h" > +#include "a6xx_gmu.h" > +#include "a6xx_gpu_state.h" > +#include "a6xx_gmu.xml.h" > + > +struct a6xx_gpu_state_obj { > + const void *handle; > + u32 *data; > +}; > + > +struct a6xx_gpu_state { > + struct msm_gpu_state base; > + > + struct a6xx_gpu_state_obj *gmu_registers; > + int nr_gmu_registers; > + > + struct a6xx_gpu_state_obj *registers; > + int nr_registers; > + > + struct a6xx_gpu_state_obj *shaders; > + int nr_shaders; > + > + struct a6xx_gpu_state_obj *clusters; > + int nr_clusters; > + > + struct a6xx_gpu_state_obj *dbgahb_clusters; > + int nr_dbgahb_clusters; > + > + struct a6xx_gpu_state_obj *indexed_regs; > + int nr_indexed_regs; > + > + struct a6xx_gpu_state_obj *debugbus; > + int nr_debugbus; > + > + struct a6xx_gpu_state_obj *vbif_debugbus; > + > + struct a6xx_gpu_state_obj *cx_debugbus; > + int nr_cx_debugbus; > +}; > + > +static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val) > +{ > + in[0] = val; > + in[1] = (((u64) reg) << 44 | (1 << 21) | 1); > + > + return 2; > +} > + > +static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target) > +{ > + in[0] = target; > + in[1] = (((u64) reg) << 44 | dwords); > + > + return 2; > +} > + > +static inline int CRASHDUMP_FINI(u64 *in) > +{ > + in[0] = 0; > + in[1] = 0; > + > + return 2; > +} > + > +struct a6xx_crashdumper { > + void *ptr; > + struct drm_gem_object *bo; > + u64 iova; > +}; > + > +/* > + * Allocate 1MB for the crashdumper scratch region - 8k for the script and > + * the rest for the data > + */ > +#define A6XX_CD_DATA_OFFSET 8192 > +#define A6XX_CD_DATA_SIZE (SZ_1M - 8192) > + > +static int a6xx_crashdumper_init(struct msm_gpu *gpu, > + struct a6xx_crashdumper *dumper) > +{ > + dumper->ptr = msm_gem_kernel_new_locked(gpu->dev, > + SZ_1M, MSM_BO_UNCACHED, gpu->aspace, > + &dumper->bo, &dumper->iova); > + > + return IS_ERR(dumper->ptr) ? PTR_ERR(dumper->ptr) : 0; > +} > + > +static int a6xx_crashdumper_run(struct msm_gpu *gpu, > + struct a6xx_crashdumper *dumper) > +{ > + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); > + u32 val; > + int ret; > + > + if (IS_ERR_OR_NULL(dumper->ptr)) > + return -EINVAL; > + > + if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu)) > + return -EINVAL; > + > + /* Make sure all pending memory writes are posted */ > + wmb(); > + > + gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO, > + REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova); > + > + gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1); > + > + ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val, > + val & 0x02, 100, 10000); > + > + gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0); > + > + return ret; > +} > + > +static void a6xx_crashdumper_free(struct msm_gpu *gpu, > + struct a6xx_crashdumper *dumper) > +{ > + msm_gem_put_iova(dumper->bo, gpu->aspace); > + msm_gem_put_vaddr(dumper->bo); > + > + drm_gem_object_unreference(dumper->bo); > +} > + > +/* read a value from the GX debug bus */ > +static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset, > + u32 *data) > +{ > + u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | > + A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); > + > + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg); > + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg); > + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg); > + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg); > + > + /* Wait 1 us to make sure the data is flowing */ > + udelay(1); > + > + data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2); > + data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1); > + > + return 2; > +} > + > +#define cxdbg_write(ptr, offset, val) \ > + msm_writel((val), (ptr) + ((offset) << 2)) > + > +#define cxdbg_read(ptr, offset) \ > + msm_readl((ptr) + ((offset) << 2)) > + > +/* read a value from the CX debug bus */ > +static int cx_debugbus_read(void *__iomem cxdbg, u32 block, u32 offset, > + u32 *data) > +{ > + u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | > + A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); > + > + cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg); > + cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg); > + cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg); > + cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg); > + > + /* Wait 1 us to make sure the data is flowing */ > + udelay(1); > + > + data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2); > + data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1); > + > + return 2; > +} > + > +/* Read a chunk of data from the VBIF debug bus */ > +static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1, > + u32 reg, int count, u32 *data) > +{ > + int i; > + > + gpu_write(gpu, ctrl0, reg); > + > + for (i = 0; i < count; i++) { > + gpu_write(gpu, ctrl1, i); > + data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT); > + } > + > + return count; > +} > + > +#define AXI_ARB_BLOCKS 2 > +#define XIN_AXI_BLOCKS 5 > +#define XIN_CORE_BLOCKS 4 > + > +#define VBIF_DEBUGBUS_BLOCK_SIZE \ > + ((16 * AXI_ARB_BLOCKS) + \ > + (18 * XIN_AXI_BLOCKS) + \ > + (12 * XIN_CORE_BLOCKS)) > + > +static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu, > + struct a6xx_gpu_state_obj *obj) > +{ > + u32 clk, *ptr; > + int i; > + > + obj->data = kcalloc(VBIF_DEBUGBUS_BLOCK_SIZE, sizeof(u32), GFP_KERNEL); > + obj->handle = NULL; > + > + /* Get the current clock setting */ > + clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON); > + > + /* Force on the bus so we can read it */ > + gpu_write(gpu, REG_A6XX_VBIF_CLKON, > + clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS); > + > + /* We will read from BUS2 first, so disable BUS1 */ > + gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0); > + > + /* Enable the VBIF bus for reading */ > + gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1); > + > + ptr = obj->data; > + > + for (i = 0; i < AXI_ARB_BLOCKS; i++) > + ptr += vbif_debugbus_read(gpu, > + REG_A6XX_VBIF_TEST_BUS2_CTRL0, > + REG_A6XX_VBIF_TEST_BUS2_CTRL1, > + 1 << (i + 16), 16, ptr); > + > + for (i = 0; i < XIN_AXI_BLOCKS; i++) > + ptr += vbif_debugbus_read(gpu, > + REG_A6XX_VBIF_TEST_BUS2_CTRL0, > + REG_A6XX_VBIF_TEST_BUS2_CTRL1, > + 1 << i, 18, ptr); > + > + /* Stop BUS2 so we can turn on BUS1 */ > + gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0); > + > + for (i = 0; i < XIN_CORE_BLOCKS; i++) > + ptr += vbif_debugbus_read(gpu, > + REG_A6XX_VBIF_TEST_BUS1_CTRL0, > + REG_A6XX_VBIF_TEST_BUS1_CTRL1, > + 1 << i, 12, ptr); > + > + /* Restore the VBIF clock setting */ > + gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk); > +} > + > +static void a6xx_get_debugbus_block(struct msm_gpu *gpu, > + const struct a6xx_debugbus_block *block, > + struct a6xx_gpu_state_obj *obj) > +{ > + int i; > + u32 *ptr; > + > + obj->data = kcalloc(block->count, sizeof(u64), GFP_KERNEL); > + if (!obj->data) > + return; > + > + obj->handle = block; > + > + for (ptr = obj->data, i = 0; i < block->count; i++) > + ptr += debugbus_read(gpu, block->id, i, ptr); > +} > + > +static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg, > + const struct a6xx_debugbus_block *block, > + struct a6xx_gpu_state_obj *obj) > +{ > + int i; > + u32 *ptr; > + > + obj->data = kcalloc(block->count, sizeof(u64), GFP_KERNEL); > + if (!obj->data) > + return; > + > + obj->handle = block; > + > + for (ptr = obj->data, i = 0; i < block->count; i++) > + ptr += cx_debugbus_read(cxdbg, block->id, i, ptr); > +} > + > +static void a6xx_get_debugbus(struct msm_gpu *gpu, > + struct a6xx_gpu_state *a6xx_state) > +{ > + struct resource *res; > + void __iomem *cxdbg = NULL; > + > + /* Set up the GX debug bus */ > + > + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT, > + A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); > + > + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM, > + A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); > + > + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0); > + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0); > + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0); > + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0); > + > + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210); > + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98); > + > + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0); > + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0); > + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0); > + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0); > + > + /* Set up the CX debug bus - it lives elsewhere in the system so do a > + * temporary ioremap for the registers > + */ > + res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM, > + "cx_dbgc"); > + > + if (res) > + cxdbg = ioremap(res->start, resource_size(res)); > + > + if (cxdbg) { > + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT, > + A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); > + > + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM, > + A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); > + > + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0); > + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0); > + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0); > + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0); > + > + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, > + 0x76543210); > + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, > + 0xFEDCBA98); > + > + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0); > + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0); > + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0); > + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0); > + } > + > + a6xx_state->debugbus = kcalloc(ARRAY_SIZE(a6xx_debugbus_blocks), > + sizeof(*a6xx_state->debugbus), GFP_KERNEL); > + > + if (a6xx_state->debugbus) { > + int i; > + > + for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++) > + a6xx_get_debugbus_block(gpu, > + &a6xx_debugbus_blocks[i], > + &a6xx_state->debugbus[i]); > + > + a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks); > + } > + > + a6xx_state->vbif_debugbus = kzalloc(sizeof(*a6xx_state->vbif_debugbus), > + GFP_KERNEL); > + > + if (a6xx_state->vbif_debugbus) > + a6xx_get_vbif_debugbus_block(gpu, a6xx_state->vbif_debugbus); > + > + if (cxdbg) { > + a6xx_state->cx_debugbus = > + kcalloc(ARRAY_SIZE(a6xx_cx_debugbus_blocks), > + sizeof(*a6xx_state->cx_debugbus), GFP_KERNEL); > + > + if (a6xx_state->cx_debugbus) { > + int i; > + > + for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++) > + a6xx_get_cx_debugbus_block(cxdbg, > + &a6xx_cx_debugbus_blocks[i], > + &a6xx_state->cx_debugbus[i]); > + > + a6xx_state->nr_cx_debugbus = > + ARRAY_SIZE(a6xx_cx_debugbus_blocks); > + } > + > + iounmap(cxdbg); > + } > +} > + > +#define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1) > + > +/* Read a data cluster from behind the AHB aperture */ > +static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu, > + const struct a6xx_dbgahb_cluster *dbgahb, > + struct a6xx_gpu_state_obj *obj, > + struct a6xx_crashdumper *dumper) > +{ > + u64 *in = dumper->ptr; > + u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; > + size_t datasize; > + int i, regcount = 0; > + > + for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { > + int j; > + > + in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, > + (dbgahb->statetype + i * 2) << 8); > + > + for (j = 0; j < dbgahb->count; j += 2) { > + int count = RANGE(dbgahb->registers, j); > + u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + > + dbgahb->registers[j] - (dbgahb->base >> 2); > + > + in += CRASHDUMP_READ(in, offset, count, out); > + > + out += count * sizeof(u32); > + > + if (i == 0) > + regcount += count; > + } > + } > + > + CRASHDUMP_FINI(in); > + > + datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); > + > + if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) > + return; > + > + if (a6xx_crashdumper_run(gpu, dumper)) > + return; > + > + obj->handle = dbgahb; > + obj->data = kmemdup(dumper->ptr + A6XX_CD_DATA_OFFSET, > + datasize, GFP_KERNEL); > +} > + > +static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu, > + struct a6xx_gpu_state *a6xx_state, > + struct a6xx_crashdumper *dumper) > +{ > + int i; > + > + a6xx_state->dbgahb_clusters = kcalloc(ARRAY_SIZE(a6xx_dbgahb_clusters), > + sizeof(*a6xx_state->dbgahb_clusters), GFP_KERNEL); > + > + if (!a6xx_state->dbgahb_clusters) > + return; > + > + a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters); > + > + for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++) > + a6xx_get_dbgahb_cluster(gpu, &a6xx_dbgahb_clusters[i], > + &a6xx_state->dbgahb_clusters[i], dumper); > +} > + > +/* Read a data cluster from the CP aperture with the crashdumper */ > +static void a6xx_get_cluster(struct msm_gpu *gpu, > + const struct a6xx_cluster *cluster, > + struct a6xx_gpu_state_obj *obj, > + struct a6xx_crashdumper *dumper) > +{ > + u64 *in = dumper->ptr; > + u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; > + size_t datasize; > + int i, regcount = 0; > + > + /* Some clusters need a selector register to be programmed too */ > + if (cluster->sel_reg) > + in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val); > + > + for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { > + int j; > + > + in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD, > + (cluster->id << 8) | (i << 4) | i); > + > + for (j = 0; j < cluster->count; j += 2) { > + int count = RANGE(cluster->registers, j); > + > + in += CRASHDUMP_READ(in, cluster->registers[j], > + count, out); > + > + out += count * sizeof(u32); > + > + if (i == 0) > + regcount += count; > + } > + } > + > + CRASHDUMP_FINI(in); > + > + datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); > + > + if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) > + return; > + > + if (a6xx_crashdumper_run(gpu, dumper)) > + return; > + > + obj->handle = cluster; > + obj->data = kmemdup(dumper->ptr + A6XX_CD_DATA_OFFSET, > + datasize, GFP_KERNEL); > +} > + > +static void a6xx_get_clusters(struct msm_gpu *gpu, > + struct a6xx_gpu_state *a6xx_state, > + struct a6xx_crashdumper *dumper) > +{ > + int i; > + > + a6xx_state->clusters = kcalloc(ARRAY_SIZE(a6xx_clusters), > + sizeof(*a6xx_state->clusters), GFP_KERNEL); > + > + if (!a6xx_state->clusters) > + return; > + > + a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters); > + > + for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++) > + a6xx_get_cluster(gpu, &a6xx_clusters[i], > + &a6xx_state->clusters[i], dumper); > +} > + > +/* Read a shader / debug block from the HLSQ aperture with the crashdumper */ > +static void a6xx_get_shader_block(struct msm_gpu *gpu, > + const struct a6xx_shader_block *block, > + struct a6xx_gpu_state_obj *obj, > + struct a6xx_crashdumper *dumper) > +{ > + u64 *in = dumper->ptr; > + size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32); > + int i; > + > + if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) > + return; > + > + for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { > + in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, > + (block->type << 8) | i); > + > + in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE, > + block->size, dumper->iova + A6XX_CD_DATA_OFFSET); > + } > + > + CRASHDUMP_FINI(in); > + > + if (a6xx_crashdumper_run(gpu, dumper)) > + return; > + > + obj->handle = block; > + obj->data = kmemdup(dumper->ptr + A6XX_CD_DATA_OFFSET, > + datasize, GFP_KERNEL); > +} > + > +static void a6xx_get_shaders(struct msm_gpu *gpu, > + struct a6xx_gpu_state *a6xx_state, > + struct a6xx_crashdumper *dumper) > +{ > + int i; > + > + a6xx_state->shaders = kcalloc(ARRAY_SIZE(a6xx_shader_blocks), > + sizeof(*a6xx_state->shaders), GFP_KERNEL); > + > + if (!a6xx_state->shaders) > + return; > + > + a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks); > + > + for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++) > + a6xx_get_shader_block(gpu, &a6xx_shader_blocks[i], > + &a6xx_state->shaders[i], dumper); > +} > + > +/* Read registers from behind the HLSQ aperture with the crashdumper */ > +static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu, > + const struct a6xx_registers *regs, > + struct a6xx_gpu_state_obj *obj, > + struct a6xx_crashdumper *dumper) > + > +{ > + u64 *in = dumper->ptr; > + u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; > + int i, regcount = 0; > + > + in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1); > + > + for (i = 0; i < regs->count; i += 2) { > + u32 count = RANGE(regs->registers, i); > + u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + > + regs->registers[i] - (regs->val0 >> 2); > + > + in += CRASHDUMP_READ(in, offset, count, out); > + > + out += count * sizeof(u32); > + regcount += count; > + } > + > + CRASHDUMP_FINI(in); > + > + if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) > + return; > + > + if (a6xx_crashdumper_run(gpu, dumper)) > + return; > + > + obj->handle = regs; > + obj->data = kmemdup(dumper->ptr + A6XX_CD_DATA_OFFSET, > + regcount * sizeof(u32), GFP_KERNEL); > +} > + > +/* Read a block of registers using the crashdumper */ > +static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu, > + const struct a6xx_registers *regs, > + struct a6xx_gpu_state_obj *obj, > + struct a6xx_crashdumper *dumper) > + > +{ > + u64 *in = dumper->ptr; > + u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; > + int i, regcount = 0; > + > + /* Some blocks might need to program a selector register first */ > + if (regs->val0) > + in += CRASHDUMP_WRITE(in, regs->val0, regs->val1); > + > + for (i = 0; i < regs->count; i += 2) { > + u32 count = RANGE(regs->registers, i); > + > + in += CRASHDUMP_READ(in, regs->registers[i], count, out); > + > + out += count * sizeof(u32); > + regcount += count; > + } > + > + CRASHDUMP_FINI(in); > + > + if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) > + return; > + > + if (a6xx_crashdumper_run(gpu, dumper)) > + return; > + > + obj->handle = regs; > + obj->data = kmemdup(dumper->ptr + A6XX_CD_DATA_OFFSET, > + regcount * sizeof(u32), GFP_KERNEL); > +} > + > +/* Read a block of registers via AHB */ > +static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu, > + const struct a6xx_registers *regs, > + struct a6xx_gpu_state_obj *obj) > +{ > + int i, regcount = 0, index = 0; > + > + for (i = 0; i < regs->count; i += 2) > + regcount += RANGE(regs->registers, i); > + > + obj->handle = (const void *) regs; > + obj->data = kcalloc(regcount, sizeof(u32), GFP_KERNEL); > + if (!obj->data) > + return; > + > + for (i = 0; i < regs->count; i += 2) { > + u32 count = RANGE(regs->registers, i); > + int j; > + > + for (j = 0; j < count; j++) > + obj->data[index++] = gpu_read(gpu, > + regs->registers[i] + j); > + } > +} > + > +/* Read a block of GMU registers */ > +static void _a6xx_get_gmu_registers(struct msm_gpu *gpu, > + const struct a6xx_registers *regs, > + struct a6xx_gpu_state_obj *obj) > +{ > + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); > + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; > + int i, regcount = 0, index = 0; > + > + for (i = 0; i < regs->count; i += 2) > + regcount += RANGE(regs->registers, i); > + > + obj->handle = (const void *) regs; > + obj->data = kcalloc(regcount, sizeof(u32), GFP_KERNEL); > + if (!obj->data) > + return; > + > + for (i = 0; i < regs->count; i += 2) { > + u32 count = RANGE(regs->registers, i); > + int j; > + > + for (j = 0; j < count; j++) > + obj->data[index++] = gmu_read(gmu, > + regs->registers[i] + j); > + } > +} > + > +static void a6xx_get_gmu_registers(struct msm_gpu *gpu, > + struct a6xx_gpu_state *a6xx_state) > +{ > + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); > + > + a6xx_state->gmu_registers = kcalloc(2, > + sizeof(*a6xx_state->gmu_registers), GFP_KERNEL); > + > + if (!a6xx_state->gmu_registers) > + return; > + > + a6xx_state->nr_gmu_registers = 2; > + > + /* Get the CX GMU registers from AHB */ > + _a6xx_get_gmu_registers(gpu, &a6xx_gmu_reglist[0], > + &a6xx_state->gmu_registers[0]); > + > + if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) > + return; > + > + /* Set the fence to ALLOW mode so we can access the registers */ > + gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0); > + > + _a6xx_get_gmu_registers(gpu, &a6xx_gmu_reglist[1], > + &a6xx_state->gmu_registers[1]); > +} > + > +static void a6xx_get_registers(struct msm_gpu *gpu, > + struct a6xx_gpu_state *a6xx_state, > + struct a6xx_crashdumper *dumper) > +{ > + int i, count = ARRAY_SIZE(a6xx_ahb_reglist) + > + ARRAY_SIZE(a6xx_reglist) + > + ARRAY_SIZE(a6xx_hlsq_reglist); > + int index = 0; > + > + a6xx_state->registers = kcalloc(count, sizeof(*a6xx_state->registers), > + GFP_KERNEL); > + > + if (!a6xx_state->registers) > + return; > + > + a6xx_state->nr_registers = count; > + > + for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++) > + a6xx_get_ahb_gpu_registers(gpu, > + &a6xx_ahb_reglist[i], > + &a6xx_state->registers[index++]); > + > + for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++) > + a6xx_get_crashdumper_registers(gpu, > + &a6xx_reglist[i], > + &a6xx_state->registers[index++], > + dumper); > + > + for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++) > + a6xx_get_crashdumper_hlsq_registers(gpu, > + &a6xx_hlsq_reglist[i], > + &a6xx_state->registers[index++], > + dumper); > +} > + > +/* Read a block of data from an indexed register pair */ > +static void a6xx_get_indexed_regs(struct msm_gpu *gpu, > + const struct a6xx_indexed_registers *indexed, > + struct a6xx_gpu_state_obj *obj) > +{ > + int i; > + > + obj->handle = (const void *) indexed; > + obj->data = kcalloc(indexed->count, sizeof(u32), GFP_KERNEL); > + if (!obj->data) > + return; > + > + /* All the indexed banks start at address 0 */ > + gpu_write(gpu, indexed->addr, 0); > + > + /* Read the data - each read increments the internal address by 1 */ > + for (i = 0; i < indexed->count; i++) > + obj->data[i] = gpu_read(gpu, indexed->data); > +} > + > +static void a6xx_get_indexed_registers(struct msm_gpu *gpu, > + struct a6xx_gpu_state *a6xx_state) > +{ > + u32 mempool_size; > + int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1; > + int i; > + > + a6xx_state->indexed_regs = kcalloc(count, > + sizeof(a6xx_state->indexed_regs), GFP_KERNEL); > + if (!a6xx_state->indexed_regs) > + return; > + > + for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++) > + a6xx_get_indexed_regs(gpu, &a6xx_indexed_reglist[i], > + &a6xx_state->indexed_regs[i]); > + > + /* Set the CP mempool size to 0 to stabilize it while dumping */ > + mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE); > + gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0); > + > + /* Get the contents of the CP mempool */ > + a6xx_get_indexed_regs(gpu, &a6xx_cp_mempool_indexed, > + &a6xx_state->indexed_regs[i]); > + > + /* > + * Offset 0x2000 in the mempool is the size - copy the saved size over > + * so the data is consistent > + */ > + a6xx_state->indexed_regs[i].data[0x2000] = mempool_size; > + > + /* Restore the size in the hardware */ > + gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size); > + > + a6xx_state->nr_indexed_regs = count; > +} > + > +struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) > +{ > + struct a6xx_crashdumper dumper = { 0 }; > + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); > + struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state), > + GFP_KERNEL); > + > + if (!a6xx_state) > + return ERR_PTR(-ENOMEM); > + > + /* Get the generic state from the adreno core */ > + adreno_gpu_state_get(gpu, &a6xx_state->base); > + > + a6xx_get_gmu_registers(gpu, a6xx_state); > + > + /* If GX isn't on the rest of the data isn't going to be accessible */ > + if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) > + return &a6xx_state->base; > + > + /* Get the banks of indexed registers */ > + a6xx_get_indexed_registers(gpu, a6xx_state); > + > + /* Try to initialize the crashdumper */ > + if (!a6xx_crashdumper_init(gpu, &dumper)) { > + a6xx_get_registers(gpu, a6xx_state, &dumper); > + a6xx_get_shaders(gpu, a6xx_state, &dumper); > + a6xx_get_clusters(gpu, a6xx_state, &dumper); > + a6xx_get_dbgahb_clusters(gpu, a6xx_state, &dumper); > + > + a6xx_crashdumper_free(gpu, &dumper); > + } > + > + a6xx_get_debugbus(gpu, a6xx_state); > + > + return &a6xx_state->base; > +} > + > +void a6xx_gpu_state_destroy(struct kref *kref) > +{ > + struct msm_gpu_state *state = container_of(kref, > + struct msm_gpu_state, ref); > + struct a6xx_gpu_state *a6xx_state = container_of(state, > + struct a6xx_gpu_state, base); > + int i; > + > + for (i = 0; i < a6xx_state->nr_gmu_registers; i++) > + kfree(a6xx_state->gmu_registers[i].data); > + > + kfree(a6xx_state->gmu_registers); > + > + for (i = 0; i < a6xx_state->nr_registers; i++) > + kfree(a6xx_state->registers[i].data); > + > + kfree(a6xx_state->registers); > + > + for (i = 0; i < a6xx_state->nr_shaders; i++) > + kfree(a6xx_state->shaders[i].data); > + > + kfree(a6xx_state->shaders); > + > + for (i = 0; i < a6xx_state->nr_clusters; i++) > + kfree(a6xx_state->clusters[i].data); > + > + kfree(a6xx_state->clusters); > + > + for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) > + kfree(a6xx_state->dbgahb_clusters[i].data); > + > + kfree(a6xx_state->dbgahb_clusters); > + > + for (i = 0; i < a6xx_state->nr_indexed_regs; i++) > + kfree(a6xx_state->indexed_regs[i].data); > + > + kfree(a6xx_state->indexed_regs); > + > + for (i = 0; i < a6xx_state->nr_debugbus; i++) > + kfree(a6xx_state->debugbus[i].data); > + > + kfree(a6xx_state->debugbus); > + > + if (a6xx_state->vbif_debugbus) > + kfree(a6xx_state->vbif_debugbus->data); > + > + kfree(a6xx_state->vbif_debugbus); > + > + for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) > + kfree(a6xx_state->cx_debugbus[i].data); > + > + kfree(a6xx_state->cx_debugbus); nit - remove the extra line There is a leak here - we need to free the base class adreno_gpu_state_destroy(state); > + > + > + kfree(a6xx_state); > +} > + > +int a6xx_gpu_state_put(struct msm_gpu_state *state) > +{ > + if (IS_ERR_OR_NULL(state)) > + return 1; > + > + return kref_put(&state->ref, a6xx_gpu_state_destroy); > +} > + > +static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count, > + struct drm_printer *p) > +{ > + int i, index = 0; > + > + if (!data) > + return; > + > + for (i = 0; i < count; i += 2) { > + u32 count = RANGE(registers, i); > + u32 offset = registers[i]; > + int j; > + > + for (j = 0; j < count; index++, offset++, j++) { > + if (data[index] == 0xdeafbead) > + continue; > + > + drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", > + offset << 2, data[index]); > + } > + } > +} > + > +static void print_ascii85(struct drm_printer *p, size_t len, u32 *data) > +{ > + char out[ASCII85_BUFSZ]; > + long i, l, datalen = 0; > + > + for (i = 0; i < len >> 2; i++) { > + if (data[i]) > + datalen = (i << 2) + 1; This should be datalen = (i + 1) << 2; > + } > + > + if (datalen == 0) > + return; > + > + drm_puts(p, " data: !!ascii85 |\n"); > + drm_puts(p, " "); > + > + > + l = ascii85_encode_len(datalen); > + > + for (i = 0; i < l; i++) > + drm_puts(p, ascii85_encode(data[i], out)); > + > + drm_puts(p, "\n"); > +} > + > +static void print_name(struct drm_printer *p, const char *fmt, const char *name) > +{ > + drm_puts(p, fmt); > + drm_puts(p, name); > + drm_puts(p, "\n"); > +} > + > +static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj, > + struct drm_printer *p) > +{ > + const struct a6xx_shader_block *block = obj->handle; > + int i; > + > + if (!obj->handle) > + return; > + > + print_name(p, " - type: ", block->name); > + > + for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { > + drm_printf(p, " - bank: %d\n", i); > + drm_printf(p, " size: %d\n", block->size); > + > + if (!obj->data) > + continue; > + > + print_ascii85(p, block->size << 2, > + obj->data + (block->size * i)); > + } > +} > + > +static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data, > + struct drm_printer *p) > +{ > + int ctx, index = 0; > + > + for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) { > + int j; > + > + drm_printf(p, " - context: %d\n", ctx); > + > + for (j = 0; j < size; j += 2) { > + u32 count = RANGE(registers, j); > + u32 offset = registers[j]; > + int k; > + > + for (k = 0; k < count; index++, offset++, k++) { > + if (data[index] == 0xdeafbead) > + continue; > + > + drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", > + offset << 2, data[index]); > + } > + } > + } > +} > + > +static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj, > + struct drm_printer *p) > +{ > + const struct a6xx_dbgahb_cluster *dbgahb = obj->handle; > + > + if (dbgahb) { > + print_name(p, " - cluster-name: ", dbgahb->name); > + a6xx_show_cluster_data(dbgahb->registers, dbgahb->count, > + obj->data, p); > + } > +} > + > +static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj, > + struct drm_printer *p) > +{ > + const struct a6xx_cluster *cluster = obj->handle; > + > + if (cluster) { > + print_name(p, " - cluster-name: ", cluster->name); > + a6xx_show_cluster_data(cluster->registers, cluster->count, > + obj->data, p); > + } > +} > + > +static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj, > + struct drm_printer *p) > +{ > + const struct a6xx_indexed_registers *indexed = obj->handle; > + > + if (!indexed) > + return; > + > + print_name(p, " - regs-name: ", indexed->name); > + drm_printf(p, " dwords: %d\n", indexed->count); > + > + print_ascii85(p, indexed->count << 2, obj->data); > +} > + > +static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block, > + u32 *data, struct drm_printer *p) > +{ > + if (block) { > + print_name(p, " - debugbus-block: ", block->name); > + > + /* > + * count for regular debugbus data is in quadwords, > + * but print the size in dwords for consistency > + */ > + drm_printf(p, " count: %d\n", block->count << 1); > + > + print_ascii85(p, block->count << 3, data); > + } > +} > + > +static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state, > + struct drm_printer *p) > +{ > + int i; > + > + for (i = 0; i < a6xx_state->nr_debugbus; i++) { > + struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i]; > + > + a6xx_show_debugbus_block(obj->handle, obj->data, p); > + } > + > + if (a6xx_state->vbif_debugbus) { > + struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus; > + > + drm_puts(p, " - debugbus-block: A6XX_DBGBUS_VBIF\n"); > + drm_printf(p, " count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE); > + > + /* vbif debugbus data is in dwords. Confusing, huh? */ > + print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data); > + } > + > + for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) { > + struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i]; > + > + a6xx_show_debugbus_block(obj->handle, obj->data, p); > + } > +} > + > +void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, > + struct drm_printer *p) > +{ > + struct a6xx_gpu_state *a6xx_state = container_of(state, > + struct a6xx_gpu_state, base); > + int i; > + > + if (IS_ERR_OR_NULL(state)) > + return; > + > + adreno_show(gpu, state, p); > + > + drm_puts(p, "registers:\n"); > + for (i = 0; i < a6xx_state->nr_registers; i++) { > + struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i]; > + const struct a6xx_registers *regs = obj->handle; > + > + if (!obj->handle) > + continue; > + > + a6xx_show_registers(regs->registers, obj->data, regs->count, p); > + } > + > + drm_puts(p, "registers-gmu:\n"); > + for (i = 0; i < a6xx_state->nr_gmu_registers; i++) { > + struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i]; > + const struct a6xx_registers *regs = obj->handle; > + > + if (!obj->handle) > + continue; > + > + a6xx_show_registers(regs->registers, obj->data, regs->count, p); > + } > + > + drm_puts(p, "indexed-registers:\n"); > + for (i = 0; i < a6xx_state->nr_indexed_regs; i++) > + a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p); > + > + drm_puts(p, "shader-blocks:\n"); > + for (i = 0; i < a6xx_state->nr_shaders; i++) > + a6xx_show_shader(&a6xx_state->shaders[i], p); > + > + drm_puts(p, "clusters:\n"); > + for (i = 0; i < a6xx_state->nr_clusters; i++) > + a6xx_show_cluster(&a6xx_state->clusters[i], p); > + > + for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) > + a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p); > + > + drm_puts(p, "debugbus:\n"); > + a6xx_show_debugbus(a6xx_state, p); > +} > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h > new file mode 100644 > index 000000000000..68cccfa2870a > --- /dev/null > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h > @@ -0,0 +1,430 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2018 The Linux Foundation. All rights reserved. */ > + > +#ifndef _A6XX_CRASH_DUMP_H_ > +#define _A6XX_CRASH_DUMP_H_ > + > +#include "a6xx.xml.h" > + > +#define A6XX_NUM_CONTEXTS 2 > +#define A6XX_NUM_SHADER_BANKS 3 > + > +static const u32 a6xx_gras_cluster[] = { > + 0x8000, 0x8006, 0x8010, 0x8092, 0x8094, 0x809d, 0x80a0, 0x80a6, > + 0x80af, 0x80f1, 0x8100, 0x8107, 0x8109, 0x8109, 0x8110, 0x8110, > + 0x8400, 0x840b, > +}; > + > +static const u32 a6xx_ps_cluster_rac[] = { > + 0x8800, 0x8806, 0x8809, 0x8811, 0x8818, 0x881e, 0x8820, 0x8865, > + 0x8870, 0x8879, 0x8880, 0x8889, 0x8890, 0x8891, 0x8898, 0x8898, > + 0x88c0, 0x88c1, 0x88d0, 0x88e3, 0x8900, 0x890c, 0x890f, 0x891a, > + 0x8c00, 0x8c01, 0x8c08, 0x8c10, 0x8c17, 0x8c1f, 0x8c26, 0x8c33, > +}; > + > +static const u32 a6xx_ps_cluster_rbp[] = { > + 0x88f0, 0x88f3, 0x890d, 0x890e, 0x8927, 0x8928, 0x8bf0, 0x8bf1, > + 0x8c02, 0x8c07, 0x8c11, 0x8c16, 0x8c20, 0x8c25, > +}; > + > +static const u32 a6xx_ps_cluster[] = { > + 0x9200, 0x9216, 0x9218, 0x9236, 0x9300, 0x9306, > +}; > + > +static const u32 a6xx_fe_cluster[] = { > + 0x9300, 0x9306, 0x9800, 0x9806, 0x9b00, 0x9b07, 0xa000, 0xa009, > + 0xa00e, 0xa0ef, 0xa0f8, 0xa0f8, > +}; > + > +static const u32 a6xx_pc_vs_cluster[] = { > + 0x9100, 0x9108, 0x9300, 0x9306, 0x9980, 0x9981, 0x9b00, 0x9b07, > +}; > + > +#define CLUSTER_FE 0 > +#define CLUSTER_SP_VS 1 > +#define CLUSTER_PC_VS 2 > +#define CLUSTER_GRAS 3 > +#define CLUSTER_SP_PS 4 > +#define CLUSTER_PS 5 > + > +#define CLUSTER(_id, _reg, _sel_reg, _sel_val) \ > + { .id = _id, .name = #_id,\ > + .registers = _reg, \ > + .count = ARRAY_SIZE(_reg), \ > + .sel_reg = _sel_reg, .sel_val = _sel_val } > + > +static const struct a6xx_cluster { > + u32 id; > + const char *name; > + const u32 *registers; > + size_t count; > + u32 sel_reg; > + u32 sel_val; > +} a6xx_clusters[] = { > + CLUSTER(CLUSTER_GRAS, a6xx_gras_cluster, 0, 0), > + CLUSTER(CLUSTER_PS, a6xx_ps_cluster_rac, REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD, 0x0), > + CLUSTER(CLUSTER_PS, a6xx_ps_cluster_rbp, REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD, 0x9), > + CLUSTER(CLUSTER_PS, a6xx_ps_cluster, 0, 0), > + CLUSTER(CLUSTER_FE, a6xx_fe_cluster, 0, 0), > + CLUSTER(CLUSTER_PC_VS, a6xx_pc_vs_cluster, 0, 0), > +}; > + > +static const u32 a6xx_sp_vs_hlsq_cluster[] = { > + 0xb800, 0xb803, 0xb820, 0xb822, > +}; > + > +static const u32 a6xx_sp_vs_sp_cluster[] = { > + 0xa800, 0xa824, 0xa830, 0xa83c, 0xa840, 0xa864, 0xa870, 0xa895, > + 0xa8a0, 0xa8af, 0xa8c0, 0xa8c3, > +}; > + > +static const u32 a6xx_hlsq_duplicate_cluster[] = { > + 0xbb10, 0xbb11, 0xbb20, 0xbb29, > +}; > + > +static const u32 a6xx_hlsq_2d_duplicate_cluster[] = { > + 0xbd80, 0xbd80, > +}; > + > +static const u32 a6xx_sp_duplicate_cluster[] = { > + 0xab00, 0xab00, 0xab04, 0xab05, 0xab10, 0xab1b, 0xab20, 0xab20, > +}; > + > +static const u32 a6xx_tp_duplicate_cluster[] = { > + 0xb300, 0xb307, 0xb309, 0xb309, 0xb380, 0xb382, > +}; > + > +static const u32 a6xx_sp_ps_hlsq_cluster[] = { > + 0xb980, 0xb980, 0xb982, 0xb987, 0xb990, 0xb99b, 0xb9a0, 0xb9a2, > + 0xb9c0, 0xb9c9, > +}; > + > +static const u32 a6xx_sp_ps_hlsq_2d_cluster[] = { > + 0xbd80, 0xbd80, > +}; > + > +static const u32 a6xx_sp_ps_sp_cluster[] = { > + 0xa980, 0xa9a8, 0xa9b0, 0xa9bc, 0xa9d0, 0xa9d3, 0xa9e0, 0xa9f3, > + 0xaa00, 0xaa00, 0xaa30, 0xaa31, > +}; > + > +static const u32 a6xx_sp_ps_sp_2d_cluster[] = { > + 0xacc0, 0xacc0, > +}; > + > +static const u32 a6xx_sp_ps_tp_cluster[] = { > + 0xb180, 0xb183, 0xb190, 0xb191, > +}; > + > +static const u32 a6xx_sp_ps_tp_2d_cluster[] = { > + 0xb4c0, 0xb4d1, > +}; > + > +#define CLUSTER_DBGAHB(_id, _base, _type, _reg) \ > + { .name = #_id, .statetype = _type, .base = _base, \ > + .registers = _reg, .count = ARRAY_SIZE(_reg) } > + > +static const struct a6xx_dbgahb_cluster { > + const char *name; > + u32 statetype; > + u32 base; > + const u32 *registers; > + size_t count; > +} a6xx_dbgahb_clusters[] = { > + CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002e000, 0x41, a6xx_sp_vs_hlsq_cluster), > + CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002a000, 0x21, a6xx_sp_vs_sp_cluster), > + CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002e000, 0x41, a6xx_hlsq_duplicate_cluster), > + CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002f000, 0x45, a6xx_hlsq_2d_duplicate_cluster), > + CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002a000, 0x21, a6xx_sp_duplicate_cluster), > + CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002c000, 0x1, a6xx_tp_duplicate_cluster), > + CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002e000, 0x42, a6xx_sp_ps_hlsq_cluster), > + CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002f000, 0x46, a6xx_sp_ps_hlsq_2d_cluster), > + CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002a000, 0x22, a6xx_sp_ps_sp_cluster), > + CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002b000, 0x26, a6xx_sp_ps_sp_2d_cluster), > + CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002c000, 0x2, a6xx_sp_ps_tp_cluster), > + CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002d000, 0x6, a6xx_sp_ps_tp_2d_cluster), > + CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002e000, 0x42, a6xx_hlsq_duplicate_cluster), > + CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002a000, 0x22, a6xx_sp_duplicate_cluster), > + CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002c000, 0x2, a6xx_tp_duplicate_cluster), > +}; > + > +static const u32 a6xx_hlsq_registers[] = { > + 0xbe00, 0xbe01, 0xbe04, 0xbe05, 0xbe08, 0xbe09, 0xbe10, 0xbe15, > + 0xbe20, 0xbe23, > +}; > + > +static const u32 a6xx_sp_registers[] = { > + 0xae00, 0xae04, 0xae0c, 0xae0c, 0xae0f, 0xae2b, 0xae30, 0xae32, > + 0xae35, 0xae35, 0xae3a, 0xae3f, 0xae50, 0xae52, > +}; > + > +static const u32 a6xx_tp_registers[] = { > + 0xb600, 0xb601, 0xb604, 0xb605, 0xb610, 0xb61b, 0xb620, 0xb623, > +}; > + > +struct a6xx_registers { > + const u32 *registers; > + size_t count; > + u32 val0; > + u32 val1; > +}; > + > +#define HLSQ_DBG_REGS(_base, _type, _array) \ > + { .val0 = _base, .val1 = _type, .registers = _array, \ > + .count = ARRAY_SIZE(_array), } > + > +static const struct a6xx_registers a6xx_hlsq_reglist[] = { > + HLSQ_DBG_REGS(0x0002F800, 0x40, a6xx_hlsq_registers), > + HLSQ_DBG_REGS(0x0002B800, 0x20, a6xx_sp_registers), > + HLSQ_DBG_REGS(0x0002D800, 0x0, a6xx_tp_registers), > +}; > + > +#define SHADER(_type, _size) \ > + { .type = _type, .name = #_type, .size = _size } > + > +static const struct a6xx_shader_block { > + const char *name; > + u32 type; > + u32 size; > +} a6xx_shader_blocks[] = { > + SHADER(A6XX_TP0_TMO_DATA, 0x200), > + SHADER(A6XX_TP0_SMO_DATA, 0x80), > + SHADER(A6XX_TP0_MIPMAP_BASE_DATA, 0x3c0), > + SHADER(A6XX_TP1_TMO_DATA, 0x200), > + SHADER(A6XX_TP1_SMO_DATA, 0x80), > + SHADER(A6XX_TP1_MIPMAP_BASE_DATA, 0x3c0), > + SHADER(A6XX_SP_INST_DATA, 0x800), > + SHADER(A6XX_SP_LB_0_DATA, 0x800), > + SHADER(A6XX_SP_LB_1_DATA, 0x800), > + SHADER(A6XX_SP_LB_2_DATA, 0x800), > + SHADER(A6XX_SP_LB_3_DATA, 0x800), > + SHADER(A6XX_SP_LB_4_DATA, 0x800), > + SHADER(A6XX_SP_LB_5_DATA, 0x200), > + SHADER(A6XX_SP_CB_BINDLESS_DATA, 0x2000), > + SHADER(A6XX_SP_CB_LEGACY_DATA, 0x280), > + SHADER(A6XX_SP_UAV_DATA, 0x80), > + SHADER(A6XX_SP_INST_TAG, 0x80), > + SHADER(A6XX_SP_CB_BINDLESS_TAG, 0x80), > + SHADER(A6XX_SP_TMO_UMO_TAG, 0x80), > + SHADER(A6XX_SP_SMO_TAG, 0x80), > + SHADER(A6XX_SP_STATE_DATA, 0x3f), > + SHADER(A6XX_HLSQ_CHUNK_CVS_RAM, 0x1c0), > + SHADER(A6XX_HLSQ_CHUNK_CPS_RAM, 0x280), > + SHADER(A6XX_HLSQ_CHUNK_CVS_RAM_TAG, 0x40), > + SHADER(A6XX_HLSQ_CHUNK_CPS_RAM_TAG, 0x40), > + SHADER(A6XX_HLSQ_ICB_CVS_CB_BASE_TAG, 0x4), > + SHADER(A6XX_HLSQ_ICB_CPS_CB_BASE_TAG, 0x4), > + SHADER(A6XX_HLSQ_CVS_MISC_RAM, 0x1c0), > + SHADER(A6XX_HLSQ_CPS_MISC_RAM, 0x580), > + SHADER(A6XX_HLSQ_INST_RAM, 0x800), > + SHADER(A6XX_HLSQ_GFX_CVS_CONST_RAM, 0x800), > + SHADER(A6XX_HLSQ_GFX_CPS_CONST_RAM, 0x800), > + SHADER(A6XX_HLSQ_CVS_MISC_RAM_TAG, 0x8), > + SHADER(A6XX_HLSQ_CPS_MISC_RAM_TAG, 0x4), > + SHADER(A6XX_HLSQ_INST_RAM_TAG, 0x80), > + SHADER(A6XX_HLSQ_GFX_CVS_CONST_RAM_TAG, 0xc), > + SHADER(A6XX_HLSQ_GFX_CPS_CONST_RAM_TAG, 0x10), > + SHADER(A6XX_HLSQ_PWR_REST_RAM, 0x28), > + SHADER(A6XX_HLSQ_PWR_REST_TAG, 0x14), > + SHADER(A6XX_HLSQ_DATAPATH_META, 0x40), > + SHADER(A6XX_HLSQ_FRONTEND_META, 0x40), > + SHADER(A6XX_HLSQ_INDIRECT_META, 0x40), > +}; > + > +static const u32 a6xx_rb_rac_registers[] = { > + 0x8e04, 0x8e05, 0x8e07, 0x8e08, 0x8e10, 0x8e1c, 0x8e20, 0x8e25, > + 0x8e28, 0x8e28, 0x8e2c, 0x8e2f, 0x8e50, 0x8e52, > +}; > + > +static const u32 a6xx_rb_rbp_registers[] = { > + 0x8e01, 0x8e01, 0x8e0c, 0x8e0c, 0x8e3b, 0x8e3e, 0x8e40, 0x8e43, > + 0x8e53, 0x8e5f, 0x8e70, 0x8e77, > +}; > + > +static const u32 a6xx_registers[] = { > + /* RBBM */ > + 0x0000, 0x0002, 0x0010, 0x0010, 0x0012, 0x0012, 0x0018, 0x001b, > + 0x001e, 0x0032, 0x0038, 0x003c, 0x0042, 0x0042, 0x0044, 0x0044, > + 0x0047, 0x0047, 0x0056, 0x0056, 0x00ad, 0x00ae, 0x00b0, 0x00fb, > + 0x0100, 0x011d, 0x0200, 0x020d, 0x0218, 0x023d, 0x0400, 0x04f9, > + 0x0500, 0x0500, 0x0505, 0x050b, 0x050e, 0x0511, 0x0533, 0x0533, > + 0x0540, 0x0555, > + /* CP */ > + 0x0800, 0x0808, 0x0810, 0x0813, 0x0820, 0x0821, 0x0823, 0x0824, > + 0x0826, 0x0827, 0x0830, 0x0833, 0x0840, 0x0843, 0x084f, 0x086f, > + 0x0880, 0x088a, 0x08a0, 0x08ab, 0x08c0, 0x08c4, 0x08d0, 0x08dd, > + 0x08f0, 0x08f3, 0x0900, 0x0903, 0x0908, 0x0911, 0x0928, 0x093e, > + 0x0942, 0x094d, 0x0980, 0x0984, 0x098d, 0x0996, 0x0998, 0x099e, > + 0x09a0, 0x09a6, 0x09a8, 0x09ae, 0x09b0, 0x09b1, 0x09c2, 0x09c8, > + 0x0a00, 0x0a03, > + /* VSC */ > + 0x0c00, 0x0c04, 0x0c06, 0x0c06, 0x0c10, 0x0cd9, 0x0e00, 0x0e0e, > + /* UCHE */ > + 0x0e10, 0x0e13, 0x0e17, 0x0e19, 0x0e1c, 0x0e2b, 0x0e30, 0x0e32, > + 0x0e38, 0x0e39, > + /* GRAS */ > + 0x8600, 0x8601, 0x8610, 0x861b, 0x8620, 0x8620, 0x8628, 0x862b, > + 0x8630, 0x8637, > + /* VPC */ > + 0x9600, 0x9604, 0x9624, 0x9637, > + /* PC */ > + 0x9e00, 0x9e01, 0x9e03, 0x9e0e, 0x9e11, 0x9e16, 0x9e19, 0x9e19, > + 0x9e1c, 0x9e1c, 0x9e20, 0x9e23, 0x9e30, 0x9e31, 0x9e34, 0x9e34, > + 0x9e70, 0x9e72, 0x9e78, 0x9e79, 0x9e80, 0x9fff, > + /* VFD */ > + 0xa600, 0xa601, 0xa603, 0xa603, 0xa60a, 0xa60a, 0xa610, 0xa617, > + 0xa630, 0xa630, > +}; > + > +#define REGS(_array, _sel_reg, _sel_val) \ > + { .registers = _array, .count = ARRAY_SIZE(_array), \ > + .val0 = _sel_reg, .val1 = _sel_val } > + > +static const struct a6xx_registers a6xx_reglist[] = { > + REGS(a6xx_registers, 0, 0), > + REGS(a6xx_rb_rac_registers, REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD, 0), > + REGS(a6xx_rb_rbp_registers, REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD, 9), > +}; > + > +static const u32 a6xx_ahb_registers[] = { > + /* RBBM_STATUS - RBBM_STATUS3 */ > + 0x210, 0x213, > + /* CP_STATUS_1 */ > + 0x825, 0x825, > +}; > + > +static const u32 a6xx_vbif_registers[] = { > + 0x3000, 0x3007, 0x300c, 0x3014, 0x3018, 0x302d, 0x3030, 0x3031, > + 0x3034, 0x3036, 0x303c, 0x303d, 0x3040, 0x3040, 0x3042, 0x3042, > + 0x3049, 0x3049, 0x3058, 0x3058, 0x305a, 0x3061, 0x3064, 0x3068, > + 0x306c, 0x306d, 0x3080, 0x3088, 0x308b, 0x308c, 0x3090, 0x3094, > + 0x3098, 0x3098, 0x309c, 0x309c, 0x30c0, 0x30c0, 0x30c8, 0x30c8, > + 0x30d0, 0x30d0, 0x30d8, 0x30d8, 0x30e0, 0x30e0, 0x3100, 0x3100, > + 0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, > + 0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x3154, 0x3154, > + 0x3156, 0x3156, 0x3158, 0x3158, 0x315a, 0x315a, 0x315c, 0x315c, > + 0x315e, 0x315e, 0x3160, 0x3160, 0x3162, 0x3162, 0x340c, 0x340c, > + 0x3410, 0x3410, 0x3800, 0x3801, > +}; > + > +static const struct a6xx_registers a6xx_ahb_reglist[] = { > + REGS(a6xx_ahb_registers, 0, 0), > + REGS(a6xx_vbif_registers, 0, 0), > +}; > + > +static const u32 a6xx_gmu_gx_registers[] = { > + /* GMU GX */ > + 0x0000, 0x0000, 0x0010, 0x0013, 0x0016, 0x0016, 0x0018, 0x001b, > + 0x001e, 0x001e, 0x0020, 0x0023, 0x0026, 0x0026, 0x0028, 0x002b, > + 0x002e, 0x002e, 0x0030, 0x0033, 0x0036, 0x0036, 0x0038, 0x003b, > + 0x003e, 0x003e, 0x0040, 0x0043, 0x0046, 0x0046, 0x0080, 0x0084, > + 0x0100, 0x012b, 0x0140, 0x0140, > +}; > + > +static const u32 a6xx_gmu_cx_registers[] = { > + /* GMU CX */ > + 0x4c00, 0x4c07, 0x4c10, 0x4c12, 0x4d00, 0x4d00, 0x4d07, 0x4d0a, > + 0x5000, 0x5004, 0x5007, 0x5008, 0x500b, 0x500c, 0x500f, 0x501c, > + 0x5024, 0x502a, 0x502d, 0x5030, 0x5040, 0x5053, 0x5087, 0x5089, > + 0x50a0, 0x50a2, 0x50a4, 0x50af, 0x50c0, 0x50c3, 0x50d0, 0x50d0, > + 0x50e4, 0x50e4, 0x50e8, 0x50ec, 0x5100, 0x5103, 0x5140, 0x5140, > + 0x5142, 0x5144, 0x514c, 0x514d, 0x514f, 0x5151, 0x5154, 0x5154, > + 0x5157, 0x5158, 0x515d, 0x515d, 0x5162, 0x5162, 0x5164, 0x5165, > + 0x5180, 0x5186, 0x5190, 0x519e, 0x51c0, 0x51c0, 0x51c5, 0x51cc, > + 0x51e0, 0x51e2, 0x51f0, 0x51f0, 0x5200, 0x5201, > + /* GPU RSCC */ > + 0x8c8c, 0x8c8c, 0x8d01, 0x8d02, 0x8f40, 0x8f42, 0x8f44, 0x8f47, > + 0x8f4c, 0x8f87, 0x8fec, 0x8fef, 0x8ff4, 0x902f, 0x9094, 0x9097, > + 0x909c, 0x90d7, 0x913c, 0x913f, 0x9144, 0x917f, > + /* GMU AO */ > + 0x9300, 0x9316, 0x9400, 0x9400, > + /* GPU CC */ > + 0x9800, 0x9812, 0x9840, 0x9852, 0x9c00, 0x9c04, 0x9c07, 0x9c0b, > + 0x9c15, 0x9c1c, 0x9c1e, 0x9c2d, 0x9c3c, 0x9c3d, 0x9c3f, 0x9c40, > + 0x9c42, 0x9c49, 0x9c58, 0x9c5a, 0x9d40, 0x9d5e, 0xa000, 0xa002, > + 0xa400, 0xa402, 0xac00, 0xac02, 0xb000, 0xb002, 0xb400, 0xb402, > + 0xb800, 0xb802, > + /* GPU CC ACD */ > + 0xbc00, 0xbc16, 0xbc20, 0xbc27, > +}; > + > +static const struct a6xx_registers a6xx_gmu_reglist[] = { > + REGS(a6xx_gmu_cx_registers, 0, 0), > + REGS(a6xx_gmu_gx_registers, 0, 0), > +}; > + > +static const struct a6xx_indexed_registers { > + const char *name; > + u32 addr; > + u32 data; > + u32 count; > +} a6xx_indexed_reglist[] = { > + { "CP_SEQ_STAT", REG_A6XX_CP_SQE_STAT_ADDR, > + REG_A6XX_CP_SQE_STAT_DATA, 0x33 }, > + { "CP_DRAW_STATE", REG_A6XX_CP_DRAW_STATE_ADDR, > + REG_A6XX_CP_DRAW_STATE_DATA, 0x100 }, > + { "CP_UCODE_DBG_DATA", REG_A6XX_CP_SQE_UCODE_DBG_ADDR, > + REG_A6XX_CP_SQE_UCODE_DBG_DATA, 0x6000 }, > + { "CP_ROQ", REG_A6XX_CP_ROQ_DBG_ADDR, > + REG_A6XX_CP_ROQ_DBG_DATA, 0x400 }, > +}; > + > +static const struct a6xx_indexed_registers a6xx_cp_mempool_indexed = { > + "CP_MEMPOOOL", REG_A6XX_CP_MEM_POOL_DBG_ADDR, > + REG_A6XX_CP_MEM_POOL_DBG_DATA, 0x2060, > +}; > + > +#define DEBUGBUS(_id, _count) { .id = _id, .name = #_id, .count = _count } > + > +static const struct a6xx_debugbus_block { > + const char *name; > + u32 id; > + u32 count; > +} a6xx_debugbus_blocks[] = { > + DEBUGBUS(A6XX_DBGBUS_CP, 0x100), > + DEBUGBUS(A6XX_DBGBUS_RBBM, 0x100), > + DEBUGBUS(A6XX_DBGBUS_HLSQ, 0x100), > + DEBUGBUS(A6XX_DBGBUS_UCHE, 0x100), > + DEBUGBUS(A6XX_DBGBUS_DPM, 0x100), > + DEBUGBUS(A6XX_DBGBUS_TESS, 0x100), > + DEBUGBUS(A6XX_DBGBUS_PC, 0x100), > + DEBUGBUS(A6XX_DBGBUS_VFDP, 0x100), > + DEBUGBUS(A6XX_DBGBUS_VPC, 0x100), > + DEBUGBUS(A6XX_DBGBUS_TSE, 0x100), > + DEBUGBUS(A6XX_DBGBUS_RAS, 0x100), > + DEBUGBUS(A6XX_DBGBUS_VSC, 0x100), > + DEBUGBUS(A6XX_DBGBUS_COM, 0x100), > + DEBUGBUS(A6XX_DBGBUS_LRZ, 0x100), > + DEBUGBUS(A6XX_DBGBUS_A2D, 0x100), > + DEBUGBUS(A6XX_DBGBUS_CCUFCHE, 0x100), > + DEBUGBUS(A6XX_DBGBUS_RBP, 0x100), > + DEBUGBUS(A6XX_DBGBUS_DCS, 0x100), > + DEBUGBUS(A6XX_DBGBUS_DBGC, 0x100), > + DEBUGBUS(A6XX_DBGBUS_GMU_GX, 0x100), > + DEBUGBUS(A6XX_DBGBUS_TPFCHE, 0x100), > + DEBUGBUS(A6XX_DBGBUS_GPC, 0x100), > + DEBUGBUS(A6XX_DBGBUS_LARC, 0x100), > + DEBUGBUS(A6XX_DBGBUS_HLSQ_SPTP, 0x100), > + DEBUGBUS(A6XX_DBGBUS_RB_0, 0x100), > + DEBUGBUS(A6XX_DBGBUS_RB_1, 0x100), > + DEBUGBUS(A6XX_DBGBUS_UCHE_WRAPPER, 0x100), > + DEBUGBUS(A6XX_DBGBUS_CCU_0, 0x100), > + DEBUGBUS(A6XX_DBGBUS_CCU_1, 0x100), > + DEBUGBUS(A6XX_DBGBUS_VFD_0, 0x100), > + DEBUGBUS(A6XX_DBGBUS_VFD_1, 0x100), > + DEBUGBUS(A6XX_DBGBUS_VFD_2, 0x100), > + DEBUGBUS(A6XX_DBGBUS_VFD_3, 0x100), > + DEBUGBUS(A6XX_DBGBUS_SP_0, 0x100), > + DEBUGBUS(A6XX_DBGBUS_SP_1, 0x100), > + DEBUGBUS(A6XX_DBGBUS_TPL1_0, 0x100), > + DEBUGBUS(A6XX_DBGBUS_TPL1_1, 0x100), > + DEBUGBUS(A6XX_DBGBUS_TPL1_2, 0x100), > + DEBUGBUS(A6XX_DBGBUS_TPL1_3, 0x100), > +}; > + > +static const struct a6xx_debugbus_block a6xx_cx_debugbus_blocks[] = { > + DEBUGBUS(A6XX_DBGBUS_GMU_CX, 0x100), > + DEBUGBUS(A6XX_DBGBUS_CX, 0x100), > +}; > + > +#endif >
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 19ab521d4c3a..33645c6539ee 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -14,6 +14,7 @@ msm-y := \ adreno/a6xx_gpu.o \ adreno/a6xx_gmu.o \ adreno/a6xx_hfi.o \ + adreno/a6xx_gpu_state.o \ hdmi/hdmi.o \ hdmi/hdmi_audio.o \ hdmi/hdmi_bridge.o \ diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index d4e98e5876bc..089b013d7bb9 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -51,10 +51,31 @@ static irqreturn_t a6xx_hfi_irq(int irq, void *data) return IRQ_HANDLED; } +bool a6xx_gmu_sptprac_is_on(struct a6xx_gmu *gmu) +{ + u32 val; + + /* This can be called from gpu state code so make sure GMU is valid */ + if (IS_ERR_OR_NULL(gmu->mmio)) + return false; + + val = gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS); + + return !(val & + (A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_SPTPRAC_GDSC_POWER_OFF | + A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_SP_CLOCK_OFF)); +} + /* Check to see if the GX rail is still powered */ -static bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu) +bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu) { - u32 val = gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS); + u32 val; + + /* This can be called from gpu state code so make sure GMU is valid */ + if (IS_ERR_OR_NULL(gmu->mmio)) + return false; + + val = gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS); return !(val & (A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_GDSC_POWER_OFF | diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h index 35f765afae45..c721d9165d8e 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h @@ -164,4 +164,7 @@ void a6xx_hfi_init(struct a6xx_gmu *gmu); int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state); void a6xx_hfi_stop(struct a6xx_gmu *gmu); +bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu); +bool a6xx_gmu_sptprac_is_on(struct a6xx_gmu *gmu); + #endif diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 631257c297fd..3afd4df2e250 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -645,33 +645,6 @@ static const u32 a6xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A6XX_CP_RB_CNTL), }; -static const u32 a6xx_registers[] = { - 0x0000, 0x0002, 0x0010, 0x0010, 0x0012, 0x0012, 0x0018, 0x001b, - 0x001e, 0x0032, 0x0038, 0x003c, 0x0042, 0x0042, 0x0044, 0x0044, - 0x0047, 0x0047, 0x0056, 0x0056, 0x00ad, 0x00ae, 0x00b0, 0x00fb, - 0x0100, 0x011d, 0x0200, 0x020d, 0x0210, 0x0213, 0x0218, 0x023d, - 0x0400, 0x04f9, 0x0500, 0x0500, 0x0505, 0x050b, 0x050e, 0x0511, - 0x0533, 0x0533, 0x0540, 0x0555, 0x0800, 0x0808, 0x0810, 0x0813, - 0x0820, 0x0821, 0x0823, 0x0827, 0x0830, 0x0833, 0x0840, 0x0843, - 0x084f, 0x086f, 0x0880, 0x088a, 0x08a0, 0x08ab, 0x08c0, 0x08c4, - 0x08d0, 0x08dd, 0x08f0, 0x08f3, 0x0900, 0x0903, 0x0908, 0x0911, - 0x0928, 0x093e, 0x0942, 0x094d, 0x0980, 0x0984, 0x098d, 0x0996, - 0x0998, 0x099e, 0x09a0, 0x09a6, 0x09a8, 0x09ae, 0x09b0, 0x09b1, - 0x09c2, 0x09c8, 0x0a00, 0x0a03, 0x0c00, 0x0c04, 0x0c06, 0x0c06, - 0x0c10, 0x0cd9, 0x0e00, 0x0e0e, 0x0e10, 0x0e13, 0x0e17, 0x0e19, - 0x0e1c, 0x0e2b, 0x0e30, 0x0e32, 0x0e38, 0x0e39, 0x8600, 0x8601, - 0x8610, 0x861b, 0x8620, 0x8620, 0x8628, 0x862b, 0x8630, 0x8637, - 0x8e01, 0x8e01, 0x8e04, 0x8e05, 0x8e07, 0x8e08, 0x8e0c, 0x8e0c, - 0x8e10, 0x8e1c, 0x8e20, 0x8e25, 0x8e28, 0x8e28, 0x8e2c, 0x8e2f, - 0x8e3b, 0x8e3e, 0x8e40, 0x8e43, 0x8e50, 0x8e5e, 0x8e70, 0x8e77, - 0x9600, 0x9604, 0x9624, 0x9637, 0x9e00, 0x9e01, 0x9e03, 0x9e0e, - 0x9e11, 0x9e16, 0x9e19, 0x9e19, 0x9e1c, 0x9e1c, 0x9e20, 0x9e23, - 0x9e30, 0x9e31, 0x9e34, 0x9e34, 0x9e70, 0x9e72, 0x9e78, 0x9e79, - 0x9e80, 0x9fff, 0xa600, 0xa601, 0xa603, 0xa603, 0xa60a, 0xa60a, - 0xa610, 0xa617, 0xa630, 0xa630, - ~0 -}; - static int a6xx_pm_resume(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -724,14 +697,6 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) return 0; } -#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) -static void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, - struct drm_printer *p) -{ - adreno_show(gpu, state, p); -} -#endif - static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -796,6 +761,8 @@ static const struct adreno_gpu_funcs funcs = { .gpu_busy = a6xx_gpu_busy, .gpu_get_freq = a6xx_gmu_get_freq, .gpu_set_freq = a6xx_gmu_set_freq, + .gpu_state_get = a6xx_gpu_state_get, + .gpu_state_put = a6xx_gpu_state_put, }, .get_timestamp = a6xx_get_timestamp, }; @@ -817,7 +784,7 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) adreno_gpu = &a6xx_gpu->base; gpu = &adreno_gpu->base; - adreno_gpu->registers = a6xx_registers; + adreno_gpu->registers = NULL; adreno_gpu->reg_offsets = a6xx_register_offsets; ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index 4127dcebc202..528a4cfe07cd 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -56,6 +56,14 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state); int a6xx_gmu_probe(struct a6xx_gpu *a6xx_gpu, struct device_node *node); void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu); + void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq); unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu); + +void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, + struct drm_printer *p); + +struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu); +int a6xx_gpu_state_put(struct msm_gpu_state *state); + #endif /* __A6XX_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c new file mode 100644 index 000000000000..d46b94462bb5 --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -0,0 +1,1159 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 The Linux Foundation. All rights reserved. */ + +#include <linux/ascii85.h> +#include "msm_gem.h" +#include "a6xx_gpu.h" +#include "a6xx_gmu.h" +#include "a6xx_gpu_state.h" +#include "a6xx_gmu.xml.h" + +struct a6xx_gpu_state_obj { + const void *handle; + u32 *data; +}; + +struct a6xx_gpu_state { + struct msm_gpu_state base; + + struct a6xx_gpu_state_obj *gmu_registers; + int nr_gmu_registers; + + struct a6xx_gpu_state_obj *registers; + int nr_registers; + + struct a6xx_gpu_state_obj *shaders; + int nr_shaders; + + struct a6xx_gpu_state_obj *clusters; + int nr_clusters; + + struct a6xx_gpu_state_obj *dbgahb_clusters; + int nr_dbgahb_clusters; + + struct a6xx_gpu_state_obj *indexed_regs; + int nr_indexed_regs; + + struct a6xx_gpu_state_obj *debugbus; + int nr_debugbus; + + struct a6xx_gpu_state_obj *vbif_debugbus; + + struct a6xx_gpu_state_obj *cx_debugbus; + int nr_cx_debugbus; +}; + +static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val) +{ + in[0] = val; + in[1] = (((u64) reg) << 44 | (1 << 21) | 1); + + return 2; +} + +static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target) +{ + in[0] = target; + in[1] = (((u64) reg) << 44 | dwords); + + return 2; +} + +static inline int CRASHDUMP_FINI(u64 *in) +{ + in[0] = 0; + in[1] = 0; + + return 2; +} + +struct a6xx_crashdumper { + void *ptr; + struct drm_gem_object *bo; + u64 iova; +}; + +/* + * Allocate 1MB for the crashdumper scratch region - 8k for the script and + * the rest for the data + */ +#define A6XX_CD_DATA_OFFSET 8192 +#define A6XX_CD_DATA_SIZE (SZ_1M - 8192) + +static int a6xx_crashdumper_init(struct msm_gpu *gpu, + struct a6xx_crashdumper *dumper) +{ + dumper->ptr = msm_gem_kernel_new_locked(gpu->dev, + SZ_1M, MSM_BO_UNCACHED, gpu->aspace, + &dumper->bo, &dumper->iova); + + return IS_ERR(dumper->ptr) ? PTR_ERR(dumper->ptr) : 0; +} + +static int a6xx_crashdumper_run(struct msm_gpu *gpu, + struct a6xx_crashdumper *dumper) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + u32 val; + int ret; + + if (IS_ERR_OR_NULL(dumper->ptr)) + return -EINVAL; + + if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu)) + return -EINVAL; + + /* Make sure all pending memory writes are posted */ + wmb(); + + gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO, + REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova); + + gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1); + + ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val, + val & 0x02, 100, 10000); + + gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0); + + return ret; +} + +static void a6xx_crashdumper_free(struct msm_gpu *gpu, + struct a6xx_crashdumper *dumper) +{ + msm_gem_put_iova(dumper->bo, gpu->aspace); + msm_gem_put_vaddr(dumper->bo); + + drm_gem_object_unreference(dumper->bo); +} + +/* read a value from the GX debug bus */ +static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset, + u32 *data) +{ + u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) | + A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block); + + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg); + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg); + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg); + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg); + + /* Wait 1 us to make sure the data is flowing */ + udelay(1); + + data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2); + data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1); + + return 2; +} + +#define cxdbg_write(ptr, offset, val) \ + msm_writel((val), (ptr) + ((offset) << 2)) + +#define cxdbg_read(ptr, offset) \ + msm_readl((ptr) + ((offset) << 2)) + +/* read a value from the CX debug bus */ +static int cx_debugbus_read(void *__iomem cxdbg, u32 block, u32 offset, + u32 *data) +{ + u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) | + A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); + + cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg); + cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg); + cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg); + cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg); + + /* Wait 1 us to make sure the data is flowing */ + udelay(1); + + data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2); + data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1); + + return 2; +} + +/* Read a chunk of data from the VBIF debug bus */ +static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1, + u32 reg, int count, u32 *data) +{ + int i; + + gpu_write(gpu, ctrl0, reg); + + for (i = 0; i < count; i++) { + gpu_write(gpu, ctrl1, i); + data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT); + } + + return count; +} + +#define AXI_ARB_BLOCKS 2 +#define XIN_AXI_BLOCKS 5 +#define XIN_CORE_BLOCKS 4 + +#define VBIF_DEBUGBUS_BLOCK_SIZE \ + ((16 * AXI_ARB_BLOCKS) + \ + (18 * XIN_AXI_BLOCKS) + \ + (12 * XIN_CORE_BLOCKS)) + +static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu, + struct a6xx_gpu_state_obj *obj) +{ + u32 clk, *ptr; + int i; + + obj->data = kcalloc(VBIF_DEBUGBUS_BLOCK_SIZE, sizeof(u32), GFP_KERNEL); + obj->handle = NULL; + + /* Get the current clock setting */ + clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON); + + /* Force on the bus so we can read it */ + gpu_write(gpu, REG_A6XX_VBIF_CLKON, + clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS); + + /* We will read from BUS2 first, so disable BUS1 */ + gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0); + + /* Enable the VBIF bus for reading */ + gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1); + + ptr = obj->data; + + for (i = 0; i < AXI_ARB_BLOCKS; i++) + ptr += vbif_debugbus_read(gpu, + REG_A6XX_VBIF_TEST_BUS2_CTRL0, + REG_A6XX_VBIF_TEST_BUS2_CTRL1, + 1 << (i + 16), 16, ptr); + + for (i = 0; i < XIN_AXI_BLOCKS; i++) + ptr += vbif_debugbus_read(gpu, + REG_A6XX_VBIF_TEST_BUS2_CTRL0, + REG_A6XX_VBIF_TEST_BUS2_CTRL1, + 1 << i, 18, ptr); + + /* Stop BUS2 so we can turn on BUS1 */ + gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0); + + for (i = 0; i < XIN_CORE_BLOCKS; i++) + ptr += vbif_debugbus_read(gpu, + REG_A6XX_VBIF_TEST_BUS1_CTRL0, + REG_A6XX_VBIF_TEST_BUS1_CTRL1, + 1 << i, 12, ptr); + + /* Restore the VBIF clock setting */ + gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk); +} + +static void a6xx_get_debugbus_block(struct msm_gpu *gpu, + const struct a6xx_debugbus_block *block, + struct a6xx_gpu_state_obj *obj) +{ + int i; + u32 *ptr; + + obj->data = kcalloc(block->count, sizeof(u64), GFP_KERNEL); + if (!obj->data) + return; + + obj->handle = block; + + for (ptr = obj->data, i = 0; i < block->count; i++) + ptr += debugbus_read(gpu, block->id, i, ptr); +} + +static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg, + const struct a6xx_debugbus_block *block, + struct a6xx_gpu_state_obj *obj) +{ + int i; + u32 *ptr; + + obj->data = kcalloc(block->count, sizeof(u64), GFP_KERNEL); + if (!obj->data) + return; + + obj->handle = block; + + for (ptr = obj->data, i = 0; i < block->count; i++) + ptr += cx_debugbus_read(cxdbg, block->id, i, ptr); +} + +static void a6xx_get_debugbus(struct msm_gpu *gpu, + struct a6xx_gpu_state *a6xx_state) +{ + struct resource *res; + void __iomem *cxdbg = NULL; + + /* Set up the GX debug bus */ + + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT, + A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); + + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM, + A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); + + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0); + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0); + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0); + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0); + + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210); + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98); + + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0); + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0); + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0); + gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0); + + /* Set up the CX debug bus - it lives elsewhere in the system so do a + * temporary ioremap for the registers + */ + res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM, + "cx_dbgc"); + + if (res) + cxdbg = ioremap(res->start, resource_size(res)); + + if (cxdbg) { + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT, + A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf)); + + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM, + A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf)); + + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0); + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0); + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0); + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0); + + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, + 0x76543210); + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, + 0xFEDCBA98); + + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0); + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0); + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0); + cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0); + } + + a6xx_state->debugbus = kcalloc(ARRAY_SIZE(a6xx_debugbus_blocks), + sizeof(*a6xx_state->debugbus), GFP_KERNEL); + + if (a6xx_state->debugbus) { + int i; + + for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++) + a6xx_get_debugbus_block(gpu, + &a6xx_debugbus_blocks[i], + &a6xx_state->debugbus[i]); + + a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks); + } + + a6xx_state->vbif_debugbus = kzalloc(sizeof(*a6xx_state->vbif_debugbus), + GFP_KERNEL); + + if (a6xx_state->vbif_debugbus) + a6xx_get_vbif_debugbus_block(gpu, a6xx_state->vbif_debugbus); + + if (cxdbg) { + a6xx_state->cx_debugbus = + kcalloc(ARRAY_SIZE(a6xx_cx_debugbus_blocks), + sizeof(*a6xx_state->cx_debugbus), GFP_KERNEL); + + if (a6xx_state->cx_debugbus) { + int i; + + for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++) + a6xx_get_cx_debugbus_block(cxdbg, + &a6xx_cx_debugbus_blocks[i], + &a6xx_state->cx_debugbus[i]); + + a6xx_state->nr_cx_debugbus = + ARRAY_SIZE(a6xx_cx_debugbus_blocks); + } + + iounmap(cxdbg); + } +} + +#define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1) + +/* Read a data cluster from behind the AHB aperture */ +static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu, + const struct a6xx_dbgahb_cluster *dbgahb, + struct a6xx_gpu_state_obj *obj, + struct a6xx_crashdumper *dumper) +{ + u64 *in = dumper->ptr; + u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; + size_t datasize; + int i, regcount = 0; + + for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { + int j; + + in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, + (dbgahb->statetype + i * 2) << 8); + + for (j = 0; j < dbgahb->count; j += 2) { + int count = RANGE(dbgahb->registers, j); + u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + + dbgahb->registers[j] - (dbgahb->base >> 2); + + in += CRASHDUMP_READ(in, offset, count, out); + + out += count * sizeof(u32); + + if (i == 0) + regcount += count; + } + } + + CRASHDUMP_FINI(in); + + datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); + + if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) + return; + + if (a6xx_crashdumper_run(gpu, dumper)) + return; + + obj->handle = dbgahb; + obj->data = kmemdup(dumper->ptr + A6XX_CD_DATA_OFFSET, + datasize, GFP_KERNEL); +} + +static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu, + struct a6xx_gpu_state *a6xx_state, + struct a6xx_crashdumper *dumper) +{ + int i; + + a6xx_state->dbgahb_clusters = kcalloc(ARRAY_SIZE(a6xx_dbgahb_clusters), + sizeof(*a6xx_state->dbgahb_clusters), GFP_KERNEL); + + if (!a6xx_state->dbgahb_clusters) + return; + + a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters); + + for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++) + a6xx_get_dbgahb_cluster(gpu, &a6xx_dbgahb_clusters[i], + &a6xx_state->dbgahb_clusters[i], dumper); +} + +/* Read a data cluster from the CP aperture with the crashdumper */ +static void a6xx_get_cluster(struct msm_gpu *gpu, + const struct a6xx_cluster *cluster, + struct a6xx_gpu_state_obj *obj, + struct a6xx_crashdumper *dumper) +{ + u64 *in = dumper->ptr; + u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; + size_t datasize; + int i, regcount = 0; + + /* Some clusters need a selector register to be programmed too */ + if (cluster->sel_reg) + in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val); + + for (i = 0; i < A6XX_NUM_CONTEXTS; i++) { + int j; + + in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD, + (cluster->id << 8) | (i << 4) | i); + + for (j = 0; j < cluster->count; j += 2) { + int count = RANGE(cluster->registers, j); + + in += CRASHDUMP_READ(in, cluster->registers[j], + count, out); + + out += count * sizeof(u32); + + if (i == 0) + regcount += count; + } + } + + CRASHDUMP_FINI(in); + + datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32); + + if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) + return; + + if (a6xx_crashdumper_run(gpu, dumper)) + return; + + obj->handle = cluster; + obj->data = kmemdup(dumper->ptr + A6XX_CD_DATA_OFFSET, + datasize, GFP_KERNEL); +} + +static void a6xx_get_clusters(struct msm_gpu *gpu, + struct a6xx_gpu_state *a6xx_state, + struct a6xx_crashdumper *dumper) +{ + int i; + + a6xx_state->clusters = kcalloc(ARRAY_SIZE(a6xx_clusters), + sizeof(*a6xx_state->clusters), GFP_KERNEL); + + if (!a6xx_state->clusters) + return; + + a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters); + + for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++) + a6xx_get_cluster(gpu, &a6xx_clusters[i], + &a6xx_state->clusters[i], dumper); +} + +/* Read a shader / debug block from the HLSQ aperture with the crashdumper */ +static void a6xx_get_shader_block(struct msm_gpu *gpu, + const struct a6xx_shader_block *block, + struct a6xx_gpu_state_obj *obj, + struct a6xx_crashdumper *dumper) +{ + u64 *in = dumper->ptr; + size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32); + int i; + + if (WARN_ON(datasize > A6XX_CD_DATA_SIZE)) + return; + + for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { + in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, + (block->type << 8) | i); + + in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE, + block->size, dumper->iova + A6XX_CD_DATA_OFFSET); + } + + CRASHDUMP_FINI(in); + + if (a6xx_crashdumper_run(gpu, dumper)) + return; + + obj->handle = block; + obj->data = kmemdup(dumper->ptr + A6XX_CD_DATA_OFFSET, + datasize, GFP_KERNEL); +} + +static void a6xx_get_shaders(struct msm_gpu *gpu, + struct a6xx_gpu_state *a6xx_state, + struct a6xx_crashdumper *dumper) +{ + int i; + + a6xx_state->shaders = kcalloc(ARRAY_SIZE(a6xx_shader_blocks), + sizeof(*a6xx_state->shaders), GFP_KERNEL); + + if (!a6xx_state->shaders) + return; + + a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks); + + for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++) + a6xx_get_shader_block(gpu, &a6xx_shader_blocks[i], + &a6xx_state->shaders[i], dumper); +} + +/* Read registers from behind the HLSQ aperture with the crashdumper */ +static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu, + const struct a6xx_registers *regs, + struct a6xx_gpu_state_obj *obj, + struct a6xx_crashdumper *dumper) + +{ + u64 *in = dumper->ptr; + u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; + int i, regcount = 0; + + in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1); + + for (i = 0; i < regs->count; i += 2) { + u32 count = RANGE(regs->registers, i); + u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE + + regs->registers[i] - (regs->val0 >> 2); + + in += CRASHDUMP_READ(in, offset, count, out); + + out += count * sizeof(u32); + regcount += count; + } + + CRASHDUMP_FINI(in); + + if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) + return; + + if (a6xx_crashdumper_run(gpu, dumper)) + return; + + obj->handle = regs; + obj->data = kmemdup(dumper->ptr + A6XX_CD_DATA_OFFSET, + regcount * sizeof(u32), GFP_KERNEL); +} + +/* Read a block of registers using the crashdumper */ +static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu, + const struct a6xx_registers *regs, + struct a6xx_gpu_state_obj *obj, + struct a6xx_crashdumper *dumper) + +{ + u64 *in = dumper->ptr; + u64 out = dumper->iova + A6XX_CD_DATA_OFFSET; + int i, regcount = 0; + + /* Some blocks might need to program a selector register first */ + if (regs->val0) + in += CRASHDUMP_WRITE(in, regs->val0, regs->val1); + + for (i = 0; i < regs->count; i += 2) { + u32 count = RANGE(regs->registers, i); + + in += CRASHDUMP_READ(in, regs->registers[i], count, out); + + out += count * sizeof(u32); + regcount += count; + } + + CRASHDUMP_FINI(in); + + if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE)) + return; + + if (a6xx_crashdumper_run(gpu, dumper)) + return; + + obj->handle = regs; + obj->data = kmemdup(dumper->ptr + A6XX_CD_DATA_OFFSET, + regcount * sizeof(u32), GFP_KERNEL); +} + +/* Read a block of registers via AHB */ +static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu, + const struct a6xx_registers *regs, + struct a6xx_gpu_state_obj *obj) +{ + int i, regcount = 0, index = 0; + + for (i = 0; i < regs->count; i += 2) + regcount += RANGE(regs->registers, i); + + obj->handle = (const void *) regs; + obj->data = kcalloc(regcount, sizeof(u32), GFP_KERNEL); + if (!obj->data) + return; + + for (i = 0; i < regs->count; i += 2) { + u32 count = RANGE(regs->registers, i); + int j; + + for (j = 0; j < count; j++) + obj->data[index++] = gpu_read(gpu, + regs->registers[i] + j); + } +} + +/* Read a block of GMU registers */ +static void _a6xx_get_gmu_registers(struct msm_gpu *gpu, + const struct a6xx_registers *regs, + struct a6xx_gpu_state_obj *obj) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + int i, regcount = 0, index = 0; + + for (i = 0; i < regs->count; i += 2) + regcount += RANGE(regs->registers, i); + + obj->handle = (const void *) regs; + obj->data = kcalloc(regcount, sizeof(u32), GFP_KERNEL); + if (!obj->data) + return; + + for (i = 0; i < regs->count; i += 2) { + u32 count = RANGE(regs->registers, i); + int j; + + for (j = 0; j < count; j++) + obj->data[index++] = gmu_read(gmu, + regs->registers[i] + j); + } +} + +static void a6xx_get_gmu_registers(struct msm_gpu *gpu, + struct a6xx_gpu_state *a6xx_state) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + a6xx_state->gmu_registers = kcalloc(2, + sizeof(*a6xx_state->gmu_registers), GFP_KERNEL); + + if (!a6xx_state->gmu_registers) + return; + + a6xx_state->nr_gmu_registers = 2; + + /* Get the CX GMU registers from AHB */ + _a6xx_get_gmu_registers(gpu, &a6xx_gmu_reglist[0], + &a6xx_state->gmu_registers[0]); + + if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) + return; + + /* Set the fence to ALLOW mode so we can access the registers */ + gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0); + + _a6xx_get_gmu_registers(gpu, &a6xx_gmu_reglist[1], + &a6xx_state->gmu_registers[1]); +} + +static void a6xx_get_registers(struct msm_gpu *gpu, + struct a6xx_gpu_state *a6xx_state, + struct a6xx_crashdumper *dumper) +{ + int i, count = ARRAY_SIZE(a6xx_ahb_reglist) + + ARRAY_SIZE(a6xx_reglist) + + ARRAY_SIZE(a6xx_hlsq_reglist); + int index = 0; + + a6xx_state->registers = kcalloc(count, sizeof(*a6xx_state->registers), + GFP_KERNEL); + + if (!a6xx_state->registers) + return; + + a6xx_state->nr_registers = count; + + for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++) + a6xx_get_ahb_gpu_registers(gpu, + &a6xx_ahb_reglist[i], + &a6xx_state->registers[index++]); + + for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++) + a6xx_get_crashdumper_registers(gpu, + &a6xx_reglist[i], + &a6xx_state->registers[index++], + dumper); + + for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++) + a6xx_get_crashdumper_hlsq_registers(gpu, + &a6xx_hlsq_reglist[i], + &a6xx_state->registers[index++], + dumper); +} + +/* Read a block of data from an indexed register pair */ +static void a6xx_get_indexed_regs(struct msm_gpu *gpu, + const struct a6xx_indexed_registers *indexed, + struct a6xx_gpu_state_obj *obj) +{ + int i; + + obj->handle = (const void *) indexed; + obj->data = kcalloc(indexed->count, sizeof(u32), GFP_KERNEL); + if (!obj->data) + return; + + /* All the indexed banks start at address 0 */ + gpu_write(gpu, indexed->addr, 0); + + /* Read the data - each read increments the internal address by 1 */ + for (i = 0; i < indexed->count; i++) + obj->data[i] = gpu_read(gpu, indexed->data); +} + +static void a6xx_get_indexed_registers(struct msm_gpu *gpu, + struct a6xx_gpu_state *a6xx_state) +{ + u32 mempool_size; + int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1; + int i; + + a6xx_state->indexed_regs = kcalloc(count, + sizeof(a6xx_state->indexed_regs), GFP_KERNEL); + if (!a6xx_state->indexed_regs) + return; + + for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++) + a6xx_get_indexed_regs(gpu, &a6xx_indexed_reglist[i], + &a6xx_state->indexed_regs[i]); + + /* Set the CP mempool size to 0 to stabilize it while dumping */ + mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE); + gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0); + + /* Get the contents of the CP mempool */ + a6xx_get_indexed_regs(gpu, &a6xx_cp_mempool_indexed, + &a6xx_state->indexed_regs[i]); + + /* + * Offset 0x2000 in the mempool is the size - copy the saved size over + * so the data is consistent + */ + a6xx_state->indexed_regs[i].data[0x2000] = mempool_size; + + /* Restore the size in the hardware */ + gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size); + + a6xx_state->nr_indexed_regs = count; +} + +struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) +{ + struct a6xx_crashdumper dumper = { 0 }; + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state), + GFP_KERNEL); + + if (!a6xx_state) + return ERR_PTR(-ENOMEM); + + /* Get the generic state from the adreno core */ + adreno_gpu_state_get(gpu, &a6xx_state->base); + + a6xx_get_gmu_registers(gpu, a6xx_state); + + /* If GX isn't on the rest of the data isn't going to be accessible */ + if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) + return &a6xx_state->base; + + /* Get the banks of indexed registers */ + a6xx_get_indexed_registers(gpu, a6xx_state); + + /* Try to initialize the crashdumper */ + if (!a6xx_crashdumper_init(gpu, &dumper)) { + a6xx_get_registers(gpu, a6xx_state, &dumper); + a6xx_get_shaders(gpu, a6xx_state, &dumper); + a6xx_get_clusters(gpu, a6xx_state, &dumper); + a6xx_get_dbgahb_clusters(gpu, a6xx_state, &dumper); + + a6xx_crashdumper_free(gpu, &dumper); + } + + a6xx_get_debugbus(gpu, a6xx_state); + + return &a6xx_state->base; +} + +void a6xx_gpu_state_destroy(struct kref *kref) +{ + struct msm_gpu_state *state = container_of(kref, + struct msm_gpu_state, ref); + struct a6xx_gpu_state *a6xx_state = container_of(state, + struct a6xx_gpu_state, base); + int i; + + for (i = 0; i < a6xx_state->nr_gmu_registers; i++) + kfree(a6xx_state->gmu_registers[i].data); + + kfree(a6xx_state->gmu_registers); + + for (i = 0; i < a6xx_state->nr_registers; i++) + kfree(a6xx_state->registers[i].data); + + kfree(a6xx_state->registers); + + for (i = 0; i < a6xx_state->nr_shaders; i++) + kfree(a6xx_state->shaders[i].data); + + kfree(a6xx_state->shaders); + + for (i = 0; i < a6xx_state->nr_clusters; i++) + kfree(a6xx_state->clusters[i].data); + + kfree(a6xx_state->clusters); + + for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) + kfree(a6xx_state->dbgahb_clusters[i].data); + + kfree(a6xx_state->dbgahb_clusters); + + for (i = 0; i < a6xx_state->nr_indexed_regs; i++) + kfree(a6xx_state->indexed_regs[i].data); + + kfree(a6xx_state->indexed_regs); + + for (i = 0; i < a6xx_state->nr_debugbus; i++) + kfree(a6xx_state->debugbus[i].data); + + kfree(a6xx_state->debugbus); + + if (a6xx_state->vbif_debugbus) + kfree(a6xx_state->vbif_debugbus->data); + + kfree(a6xx_state->vbif_debugbus); + + for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) + kfree(a6xx_state->cx_debugbus[i].data); + + kfree(a6xx_state->cx_debugbus); + + + kfree(a6xx_state); +} + +int a6xx_gpu_state_put(struct msm_gpu_state *state) +{ + if (IS_ERR_OR_NULL(state)) + return 1; + + return kref_put(&state->ref, a6xx_gpu_state_destroy); +} + +static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count, + struct drm_printer *p) +{ + int i, index = 0; + + if (!data) + return; + + for (i = 0; i < count; i += 2) { + u32 count = RANGE(registers, i); + u32 offset = registers[i]; + int j; + + for (j = 0; j < count; index++, offset++, j++) { + if (data[index] == 0xdeafbead) + continue; + + drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", + offset << 2, data[index]); + } + } +} + +static void print_ascii85(struct drm_printer *p, size_t len, u32 *data) +{ + char out[ASCII85_BUFSZ]; + long i, l, datalen = 0; + + for (i = 0; i < len >> 2; i++) { + if (data[i]) + datalen = (i << 2) + 1; + } + + if (datalen == 0) + return; + + drm_puts(p, " data: !!ascii85 |\n"); + drm_puts(p, " "); + + + l = ascii85_encode_len(datalen); + + for (i = 0; i < l; i++) + drm_puts(p, ascii85_encode(data[i], out)); + + drm_puts(p, "\n"); +} + +static void print_name(struct drm_printer *p, const char *fmt, const char *name) +{ + drm_puts(p, fmt); + drm_puts(p, name); + drm_puts(p, "\n"); +} + +static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj, + struct drm_printer *p) +{ + const struct a6xx_shader_block *block = obj->handle; + int i; + + if (!obj->handle) + return; + + print_name(p, " - type: ", block->name); + + for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) { + drm_printf(p, " - bank: %d\n", i); + drm_printf(p, " size: %d\n", block->size); + + if (!obj->data) + continue; + + print_ascii85(p, block->size << 2, + obj->data + (block->size * i)); + } +} + +static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data, + struct drm_printer *p) +{ + int ctx, index = 0; + + for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) { + int j; + + drm_printf(p, " - context: %d\n", ctx); + + for (j = 0; j < size; j += 2) { + u32 count = RANGE(registers, j); + u32 offset = registers[j]; + int k; + + for (k = 0; k < count; index++, offset++, k++) { + if (data[index] == 0xdeafbead) + continue; + + drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n", + offset << 2, data[index]); + } + } + } +} + +static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj, + struct drm_printer *p) +{ + const struct a6xx_dbgahb_cluster *dbgahb = obj->handle; + + if (dbgahb) { + print_name(p, " - cluster-name: ", dbgahb->name); + a6xx_show_cluster_data(dbgahb->registers, dbgahb->count, + obj->data, p); + } +} + +static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj, + struct drm_printer *p) +{ + const struct a6xx_cluster *cluster = obj->handle; + + if (cluster) { + print_name(p, " - cluster-name: ", cluster->name); + a6xx_show_cluster_data(cluster->registers, cluster->count, + obj->data, p); + } +} + +static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj, + struct drm_printer *p) +{ + const struct a6xx_indexed_registers *indexed = obj->handle; + + if (!indexed) + return; + + print_name(p, " - regs-name: ", indexed->name); + drm_printf(p, " dwords: %d\n", indexed->count); + + print_ascii85(p, indexed->count << 2, obj->data); +} + +static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block, + u32 *data, struct drm_printer *p) +{ + if (block) { + print_name(p, " - debugbus-block: ", block->name); + + /* + * count for regular debugbus data is in quadwords, + * but print the size in dwords for consistency + */ + drm_printf(p, " count: %d\n", block->count << 1); + + print_ascii85(p, block->count << 3, data); + } +} + +static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state, + struct drm_printer *p) +{ + int i; + + for (i = 0; i < a6xx_state->nr_debugbus; i++) { + struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i]; + + a6xx_show_debugbus_block(obj->handle, obj->data, p); + } + + if (a6xx_state->vbif_debugbus) { + struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus; + + drm_puts(p, " - debugbus-block: A6XX_DBGBUS_VBIF\n"); + drm_printf(p, " count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE); + + /* vbif debugbus data is in dwords. Confusing, huh? */ + print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data); + } + + for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) { + struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i]; + + a6xx_show_debugbus_block(obj->handle, obj->data, p); + } +} + +void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, + struct drm_printer *p) +{ + struct a6xx_gpu_state *a6xx_state = container_of(state, + struct a6xx_gpu_state, base); + int i; + + if (IS_ERR_OR_NULL(state)) + return; + + adreno_show(gpu, state, p); + + drm_puts(p, "registers:\n"); + for (i = 0; i < a6xx_state->nr_registers; i++) { + struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i]; + const struct a6xx_registers *regs = obj->handle; + + if (!obj->handle) + continue; + + a6xx_show_registers(regs->registers, obj->data, regs->count, p); + } + + drm_puts(p, "registers-gmu:\n"); + for (i = 0; i < a6xx_state->nr_gmu_registers; i++) { + struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i]; + const struct a6xx_registers *regs = obj->handle; + + if (!obj->handle) + continue; + + a6xx_show_registers(regs->registers, obj->data, regs->count, p); + } + + drm_puts(p, "indexed-registers:\n"); + for (i = 0; i < a6xx_state->nr_indexed_regs; i++) + a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p); + + drm_puts(p, "shader-blocks:\n"); + for (i = 0; i < a6xx_state->nr_shaders; i++) + a6xx_show_shader(&a6xx_state->shaders[i], p); + + drm_puts(p, "clusters:\n"); + for (i = 0; i < a6xx_state->nr_clusters; i++) + a6xx_show_cluster(&a6xx_state->clusters[i], p); + + for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) + a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p); + + drm_puts(p, "debugbus:\n"); + a6xx_show_debugbus(a6xx_state, p); +} diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h new file mode 100644 index 000000000000..68cccfa2870a --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h @@ -0,0 +1,430 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 The Linux Foundation. All rights reserved. */ + +#ifndef _A6XX_CRASH_DUMP_H_ +#define _A6XX_CRASH_DUMP_H_ + +#include "a6xx.xml.h" + +#define A6XX_NUM_CONTEXTS 2 +#define A6XX_NUM_SHADER_BANKS 3 + +static const u32 a6xx_gras_cluster[] = { + 0x8000, 0x8006, 0x8010, 0x8092, 0x8094, 0x809d, 0x80a0, 0x80a6, + 0x80af, 0x80f1, 0x8100, 0x8107, 0x8109, 0x8109, 0x8110, 0x8110, + 0x8400, 0x840b, +}; + +static const u32 a6xx_ps_cluster_rac[] = { + 0x8800, 0x8806, 0x8809, 0x8811, 0x8818, 0x881e, 0x8820, 0x8865, + 0x8870, 0x8879, 0x8880, 0x8889, 0x8890, 0x8891, 0x8898, 0x8898, + 0x88c0, 0x88c1, 0x88d0, 0x88e3, 0x8900, 0x890c, 0x890f, 0x891a, + 0x8c00, 0x8c01, 0x8c08, 0x8c10, 0x8c17, 0x8c1f, 0x8c26, 0x8c33, +}; + +static const u32 a6xx_ps_cluster_rbp[] = { + 0x88f0, 0x88f3, 0x890d, 0x890e, 0x8927, 0x8928, 0x8bf0, 0x8bf1, + 0x8c02, 0x8c07, 0x8c11, 0x8c16, 0x8c20, 0x8c25, +}; + +static const u32 a6xx_ps_cluster[] = { + 0x9200, 0x9216, 0x9218, 0x9236, 0x9300, 0x9306, +}; + +static const u32 a6xx_fe_cluster[] = { + 0x9300, 0x9306, 0x9800, 0x9806, 0x9b00, 0x9b07, 0xa000, 0xa009, + 0xa00e, 0xa0ef, 0xa0f8, 0xa0f8, +}; + +static const u32 a6xx_pc_vs_cluster[] = { + 0x9100, 0x9108, 0x9300, 0x9306, 0x9980, 0x9981, 0x9b00, 0x9b07, +}; + +#define CLUSTER_FE 0 +#define CLUSTER_SP_VS 1 +#define CLUSTER_PC_VS 2 +#define CLUSTER_GRAS 3 +#define CLUSTER_SP_PS 4 +#define CLUSTER_PS 5 + +#define CLUSTER(_id, _reg, _sel_reg, _sel_val) \ + { .id = _id, .name = #_id,\ + .registers = _reg, \ + .count = ARRAY_SIZE(_reg), \ + .sel_reg = _sel_reg, .sel_val = _sel_val } + +static const struct a6xx_cluster { + u32 id; + const char *name; + const u32 *registers; + size_t count; + u32 sel_reg; + u32 sel_val; +} a6xx_clusters[] = { + CLUSTER(CLUSTER_GRAS, a6xx_gras_cluster, 0, 0), + CLUSTER(CLUSTER_PS, a6xx_ps_cluster_rac, REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD, 0x0), + CLUSTER(CLUSTER_PS, a6xx_ps_cluster_rbp, REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD, 0x9), + CLUSTER(CLUSTER_PS, a6xx_ps_cluster, 0, 0), + CLUSTER(CLUSTER_FE, a6xx_fe_cluster, 0, 0), + CLUSTER(CLUSTER_PC_VS, a6xx_pc_vs_cluster, 0, 0), +}; + +static const u32 a6xx_sp_vs_hlsq_cluster[] = { + 0xb800, 0xb803, 0xb820, 0xb822, +}; + +static const u32 a6xx_sp_vs_sp_cluster[] = { + 0xa800, 0xa824, 0xa830, 0xa83c, 0xa840, 0xa864, 0xa870, 0xa895, + 0xa8a0, 0xa8af, 0xa8c0, 0xa8c3, +}; + +static const u32 a6xx_hlsq_duplicate_cluster[] = { + 0xbb10, 0xbb11, 0xbb20, 0xbb29, +}; + +static const u32 a6xx_hlsq_2d_duplicate_cluster[] = { + 0xbd80, 0xbd80, +}; + +static const u32 a6xx_sp_duplicate_cluster[] = { + 0xab00, 0xab00, 0xab04, 0xab05, 0xab10, 0xab1b, 0xab20, 0xab20, +}; + +static const u32 a6xx_tp_duplicate_cluster[] = { + 0xb300, 0xb307, 0xb309, 0xb309, 0xb380, 0xb382, +}; + +static const u32 a6xx_sp_ps_hlsq_cluster[] = { + 0xb980, 0xb980, 0xb982, 0xb987, 0xb990, 0xb99b, 0xb9a0, 0xb9a2, + 0xb9c0, 0xb9c9, +}; + +static const u32 a6xx_sp_ps_hlsq_2d_cluster[] = { + 0xbd80, 0xbd80, +}; + +static const u32 a6xx_sp_ps_sp_cluster[] = { + 0xa980, 0xa9a8, 0xa9b0, 0xa9bc, 0xa9d0, 0xa9d3, 0xa9e0, 0xa9f3, + 0xaa00, 0xaa00, 0xaa30, 0xaa31, +}; + +static const u32 a6xx_sp_ps_sp_2d_cluster[] = { + 0xacc0, 0xacc0, +}; + +static const u32 a6xx_sp_ps_tp_cluster[] = { + 0xb180, 0xb183, 0xb190, 0xb191, +}; + +static const u32 a6xx_sp_ps_tp_2d_cluster[] = { + 0xb4c0, 0xb4d1, +}; + +#define CLUSTER_DBGAHB(_id, _base, _type, _reg) \ + { .name = #_id, .statetype = _type, .base = _base, \ + .registers = _reg, .count = ARRAY_SIZE(_reg) } + +static const struct a6xx_dbgahb_cluster { + const char *name; + u32 statetype; + u32 base; + const u32 *registers; + size_t count; +} a6xx_dbgahb_clusters[] = { + CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002e000, 0x41, a6xx_sp_vs_hlsq_cluster), + CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002a000, 0x21, a6xx_sp_vs_sp_cluster), + CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002e000, 0x41, a6xx_hlsq_duplicate_cluster), + CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002f000, 0x45, a6xx_hlsq_2d_duplicate_cluster), + CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002a000, 0x21, a6xx_sp_duplicate_cluster), + CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002c000, 0x1, a6xx_tp_duplicate_cluster), + CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002e000, 0x42, a6xx_sp_ps_hlsq_cluster), + CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002f000, 0x46, a6xx_sp_ps_hlsq_2d_cluster), + CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002a000, 0x22, a6xx_sp_ps_sp_cluster), + CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002b000, 0x26, a6xx_sp_ps_sp_2d_cluster), + CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002c000, 0x2, a6xx_sp_ps_tp_cluster), + CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002d000, 0x6, a6xx_sp_ps_tp_2d_cluster), + CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002e000, 0x42, a6xx_hlsq_duplicate_cluster), + CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002a000, 0x22, a6xx_sp_duplicate_cluster), + CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002c000, 0x2, a6xx_tp_duplicate_cluster), +}; + +static const u32 a6xx_hlsq_registers[] = { + 0xbe00, 0xbe01, 0xbe04, 0xbe05, 0xbe08, 0xbe09, 0xbe10, 0xbe15, + 0xbe20, 0xbe23, +}; + +static const u32 a6xx_sp_registers[] = { + 0xae00, 0xae04, 0xae0c, 0xae0c, 0xae0f, 0xae2b, 0xae30, 0xae32, + 0xae35, 0xae35, 0xae3a, 0xae3f, 0xae50, 0xae52, +}; + +static const u32 a6xx_tp_registers[] = { + 0xb600, 0xb601, 0xb604, 0xb605, 0xb610, 0xb61b, 0xb620, 0xb623, +}; + +struct a6xx_registers { + const u32 *registers; + size_t count; + u32 val0; + u32 val1; +}; + +#define HLSQ_DBG_REGS(_base, _type, _array) \ + { .val0 = _base, .val1 = _type, .registers = _array, \ + .count = ARRAY_SIZE(_array), } + +static const struct a6xx_registers a6xx_hlsq_reglist[] = { + HLSQ_DBG_REGS(0x0002F800, 0x40, a6xx_hlsq_registers), + HLSQ_DBG_REGS(0x0002B800, 0x20, a6xx_sp_registers), + HLSQ_DBG_REGS(0x0002D800, 0x0, a6xx_tp_registers), +}; + +#define SHADER(_type, _size) \ + { .type = _type, .name = #_type, .size = _size } + +static const struct a6xx_shader_block { + const char *name; + u32 type; + u32 size; +} a6xx_shader_blocks[] = { + SHADER(A6XX_TP0_TMO_DATA, 0x200), + SHADER(A6XX_TP0_SMO_DATA, 0x80), + SHADER(A6XX_TP0_MIPMAP_BASE_DATA, 0x3c0), + SHADER(A6XX_TP1_TMO_DATA, 0x200), + SHADER(A6XX_TP1_SMO_DATA, 0x80), + SHADER(A6XX_TP1_MIPMAP_BASE_DATA, 0x3c0), + SHADER(A6XX_SP_INST_DATA, 0x800), + SHADER(A6XX_SP_LB_0_DATA, 0x800), + SHADER(A6XX_SP_LB_1_DATA, 0x800), + SHADER(A6XX_SP_LB_2_DATA, 0x800), + SHADER(A6XX_SP_LB_3_DATA, 0x800), + SHADER(A6XX_SP_LB_4_DATA, 0x800), + SHADER(A6XX_SP_LB_5_DATA, 0x200), + SHADER(A6XX_SP_CB_BINDLESS_DATA, 0x2000), + SHADER(A6XX_SP_CB_LEGACY_DATA, 0x280), + SHADER(A6XX_SP_UAV_DATA, 0x80), + SHADER(A6XX_SP_INST_TAG, 0x80), + SHADER(A6XX_SP_CB_BINDLESS_TAG, 0x80), + SHADER(A6XX_SP_TMO_UMO_TAG, 0x80), + SHADER(A6XX_SP_SMO_TAG, 0x80), + SHADER(A6XX_SP_STATE_DATA, 0x3f), + SHADER(A6XX_HLSQ_CHUNK_CVS_RAM, 0x1c0), + SHADER(A6XX_HLSQ_CHUNK_CPS_RAM, 0x280), + SHADER(A6XX_HLSQ_CHUNK_CVS_RAM_TAG, 0x40), + SHADER(A6XX_HLSQ_CHUNK_CPS_RAM_TAG, 0x40), + SHADER(A6XX_HLSQ_ICB_CVS_CB_BASE_TAG, 0x4), + SHADER(A6XX_HLSQ_ICB_CPS_CB_BASE_TAG, 0x4), + SHADER(A6XX_HLSQ_CVS_MISC_RAM, 0x1c0), + SHADER(A6XX_HLSQ_CPS_MISC_RAM, 0x580), + SHADER(A6XX_HLSQ_INST_RAM, 0x800), + SHADER(A6XX_HLSQ_GFX_CVS_CONST_RAM, 0x800), + SHADER(A6XX_HLSQ_GFX_CPS_CONST_RAM, 0x800), + SHADER(A6XX_HLSQ_CVS_MISC_RAM_TAG, 0x8), + SHADER(A6XX_HLSQ_CPS_MISC_RAM_TAG, 0x4), + SHADER(A6XX_HLSQ_INST_RAM_TAG, 0x80), + SHADER(A6XX_HLSQ_GFX_CVS_CONST_RAM_TAG, 0xc), + SHADER(A6XX_HLSQ_GFX_CPS_CONST_RAM_TAG, 0x10), + SHADER(A6XX_HLSQ_PWR_REST_RAM, 0x28), + SHADER(A6XX_HLSQ_PWR_REST_TAG, 0x14), + SHADER(A6XX_HLSQ_DATAPATH_META, 0x40), + SHADER(A6XX_HLSQ_FRONTEND_META, 0x40), + SHADER(A6XX_HLSQ_INDIRECT_META, 0x40), +}; + +static const u32 a6xx_rb_rac_registers[] = { + 0x8e04, 0x8e05, 0x8e07, 0x8e08, 0x8e10, 0x8e1c, 0x8e20, 0x8e25, + 0x8e28, 0x8e28, 0x8e2c, 0x8e2f, 0x8e50, 0x8e52, +}; + +static const u32 a6xx_rb_rbp_registers[] = { + 0x8e01, 0x8e01, 0x8e0c, 0x8e0c, 0x8e3b, 0x8e3e, 0x8e40, 0x8e43, + 0x8e53, 0x8e5f, 0x8e70, 0x8e77, +}; + +static const u32 a6xx_registers[] = { + /* RBBM */ + 0x0000, 0x0002, 0x0010, 0x0010, 0x0012, 0x0012, 0x0018, 0x001b, + 0x001e, 0x0032, 0x0038, 0x003c, 0x0042, 0x0042, 0x0044, 0x0044, + 0x0047, 0x0047, 0x0056, 0x0056, 0x00ad, 0x00ae, 0x00b0, 0x00fb, + 0x0100, 0x011d, 0x0200, 0x020d, 0x0218, 0x023d, 0x0400, 0x04f9, + 0x0500, 0x0500, 0x0505, 0x050b, 0x050e, 0x0511, 0x0533, 0x0533, + 0x0540, 0x0555, + /* CP */ + 0x0800, 0x0808, 0x0810, 0x0813, 0x0820, 0x0821, 0x0823, 0x0824, + 0x0826, 0x0827, 0x0830, 0x0833, 0x0840, 0x0843, 0x084f, 0x086f, + 0x0880, 0x088a, 0x08a0, 0x08ab, 0x08c0, 0x08c4, 0x08d0, 0x08dd, + 0x08f0, 0x08f3, 0x0900, 0x0903, 0x0908, 0x0911, 0x0928, 0x093e, + 0x0942, 0x094d, 0x0980, 0x0984, 0x098d, 0x0996, 0x0998, 0x099e, + 0x09a0, 0x09a6, 0x09a8, 0x09ae, 0x09b0, 0x09b1, 0x09c2, 0x09c8, + 0x0a00, 0x0a03, + /* VSC */ + 0x0c00, 0x0c04, 0x0c06, 0x0c06, 0x0c10, 0x0cd9, 0x0e00, 0x0e0e, + /* UCHE */ + 0x0e10, 0x0e13, 0x0e17, 0x0e19, 0x0e1c, 0x0e2b, 0x0e30, 0x0e32, + 0x0e38, 0x0e39, + /* GRAS */ + 0x8600, 0x8601, 0x8610, 0x861b, 0x8620, 0x8620, 0x8628, 0x862b, + 0x8630, 0x8637, + /* VPC */ + 0x9600, 0x9604, 0x9624, 0x9637, + /* PC */ + 0x9e00, 0x9e01, 0x9e03, 0x9e0e, 0x9e11, 0x9e16, 0x9e19, 0x9e19, + 0x9e1c, 0x9e1c, 0x9e20, 0x9e23, 0x9e30, 0x9e31, 0x9e34, 0x9e34, + 0x9e70, 0x9e72, 0x9e78, 0x9e79, 0x9e80, 0x9fff, + /* VFD */ + 0xa600, 0xa601, 0xa603, 0xa603, 0xa60a, 0xa60a, 0xa610, 0xa617, + 0xa630, 0xa630, +}; + +#define REGS(_array, _sel_reg, _sel_val) \ + { .registers = _array, .count = ARRAY_SIZE(_array), \ + .val0 = _sel_reg, .val1 = _sel_val } + +static const struct a6xx_registers a6xx_reglist[] = { + REGS(a6xx_registers, 0, 0), + REGS(a6xx_rb_rac_registers, REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD, 0), + REGS(a6xx_rb_rbp_registers, REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD, 9), +}; + +static const u32 a6xx_ahb_registers[] = { + /* RBBM_STATUS - RBBM_STATUS3 */ + 0x210, 0x213, + /* CP_STATUS_1 */ + 0x825, 0x825, +}; + +static const u32 a6xx_vbif_registers[] = { + 0x3000, 0x3007, 0x300c, 0x3014, 0x3018, 0x302d, 0x3030, 0x3031, + 0x3034, 0x3036, 0x303c, 0x303d, 0x3040, 0x3040, 0x3042, 0x3042, + 0x3049, 0x3049, 0x3058, 0x3058, 0x305a, 0x3061, 0x3064, 0x3068, + 0x306c, 0x306d, 0x3080, 0x3088, 0x308b, 0x308c, 0x3090, 0x3094, + 0x3098, 0x3098, 0x309c, 0x309c, 0x30c0, 0x30c0, 0x30c8, 0x30c8, + 0x30d0, 0x30d0, 0x30d8, 0x30d8, 0x30e0, 0x30e0, 0x3100, 0x3100, + 0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, + 0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x3154, 0x3154, + 0x3156, 0x3156, 0x3158, 0x3158, 0x315a, 0x315a, 0x315c, 0x315c, + 0x315e, 0x315e, 0x3160, 0x3160, 0x3162, 0x3162, 0x340c, 0x340c, + 0x3410, 0x3410, 0x3800, 0x3801, +}; + +static const struct a6xx_registers a6xx_ahb_reglist[] = { + REGS(a6xx_ahb_registers, 0, 0), + REGS(a6xx_vbif_registers, 0, 0), +}; + +static const u32 a6xx_gmu_gx_registers[] = { + /* GMU GX */ + 0x0000, 0x0000, 0x0010, 0x0013, 0x0016, 0x0016, 0x0018, 0x001b, + 0x001e, 0x001e, 0x0020, 0x0023, 0x0026, 0x0026, 0x0028, 0x002b, + 0x002e, 0x002e, 0x0030, 0x0033, 0x0036, 0x0036, 0x0038, 0x003b, + 0x003e, 0x003e, 0x0040, 0x0043, 0x0046, 0x0046, 0x0080, 0x0084, + 0x0100, 0x012b, 0x0140, 0x0140, +}; + +static const u32 a6xx_gmu_cx_registers[] = { + /* GMU CX */ + 0x4c00, 0x4c07, 0x4c10, 0x4c12, 0x4d00, 0x4d00, 0x4d07, 0x4d0a, + 0x5000, 0x5004, 0x5007, 0x5008, 0x500b, 0x500c, 0x500f, 0x501c, + 0x5024, 0x502a, 0x502d, 0x5030, 0x5040, 0x5053, 0x5087, 0x5089, + 0x50a0, 0x50a2, 0x50a4, 0x50af, 0x50c0, 0x50c3, 0x50d0, 0x50d0, + 0x50e4, 0x50e4, 0x50e8, 0x50ec, 0x5100, 0x5103, 0x5140, 0x5140, + 0x5142, 0x5144, 0x514c, 0x514d, 0x514f, 0x5151, 0x5154, 0x5154, + 0x5157, 0x5158, 0x515d, 0x515d, 0x5162, 0x5162, 0x5164, 0x5165, + 0x5180, 0x5186, 0x5190, 0x519e, 0x51c0, 0x51c0, 0x51c5, 0x51cc, + 0x51e0, 0x51e2, 0x51f0, 0x51f0, 0x5200, 0x5201, + /* GPU RSCC */ + 0x8c8c, 0x8c8c, 0x8d01, 0x8d02, 0x8f40, 0x8f42, 0x8f44, 0x8f47, + 0x8f4c, 0x8f87, 0x8fec, 0x8fef, 0x8ff4, 0x902f, 0x9094, 0x9097, + 0x909c, 0x90d7, 0x913c, 0x913f, 0x9144, 0x917f, + /* GMU AO */ + 0x9300, 0x9316, 0x9400, 0x9400, + /* GPU CC */ + 0x9800, 0x9812, 0x9840, 0x9852, 0x9c00, 0x9c04, 0x9c07, 0x9c0b, + 0x9c15, 0x9c1c, 0x9c1e, 0x9c2d, 0x9c3c, 0x9c3d, 0x9c3f, 0x9c40, + 0x9c42, 0x9c49, 0x9c58, 0x9c5a, 0x9d40, 0x9d5e, 0xa000, 0xa002, + 0xa400, 0xa402, 0xac00, 0xac02, 0xb000, 0xb002, 0xb400, 0xb402, + 0xb800, 0xb802, + /* GPU CC ACD */ + 0xbc00, 0xbc16, 0xbc20, 0xbc27, +}; + +static const struct a6xx_registers a6xx_gmu_reglist[] = { + REGS(a6xx_gmu_cx_registers, 0, 0), + REGS(a6xx_gmu_gx_registers, 0, 0), +}; + +static const struct a6xx_indexed_registers { + const char *name; + u32 addr; + u32 data; + u32 count; +} a6xx_indexed_reglist[] = { + { "CP_SEQ_STAT", REG_A6XX_CP_SQE_STAT_ADDR, + REG_A6XX_CP_SQE_STAT_DATA, 0x33 }, + { "CP_DRAW_STATE", REG_A6XX_CP_DRAW_STATE_ADDR, + REG_A6XX_CP_DRAW_STATE_DATA, 0x100 }, + { "CP_UCODE_DBG_DATA", REG_A6XX_CP_SQE_UCODE_DBG_ADDR, + REG_A6XX_CP_SQE_UCODE_DBG_DATA, 0x6000 }, + { "CP_ROQ", REG_A6XX_CP_ROQ_DBG_ADDR, + REG_A6XX_CP_ROQ_DBG_DATA, 0x400 }, +}; + +static const struct a6xx_indexed_registers a6xx_cp_mempool_indexed = { + "CP_MEMPOOOL", REG_A6XX_CP_MEM_POOL_DBG_ADDR, + REG_A6XX_CP_MEM_POOL_DBG_DATA, 0x2060, +}; + +#define DEBUGBUS(_id, _count) { .id = _id, .name = #_id, .count = _count } + +static const struct a6xx_debugbus_block { + const char *name; + u32 id; + u32 count; +} a6xx_debugbus_blocks[] = { + DEBUGBUS(A6XX_DBGBUS_CP, 0x100), + DEBUGBUS(A6XX_DBGBUS_RBBM, 0x100), + DEBUGBUS(A6XX_DBGBUS_HLSQ, 0x100), + DEBUGBUS(A6XX_DBGBUS_UCHE, 0x100), + DEBUGBUS(A6XX_DBGBUS_DPM, 0x100), + DEBUGBUS(A6XX_DBGBUS_TESS, 0x100), + DEBUGBUS(A6XX_DBGBUS_PC, 0x100), + DEBUGBUS(A6XX_DBGBUS_VFDP, 0x100), + DEBUGBUS(A6XX_DBGBUS_VPC, 0x100), + DEBUGBUS(A6XX_DBGBUS_TSE, 0x100), + DEBUGBUS(A6XX_DBGBUS_RAS, 0x100), + DEBUGBUS(A6XX_DBGBUS_VSC, 0x100), + DEBUGBUS(A6XX_DBGBUS_COM, 0x100), + DEBUGBUS(A6XX_DBGBUS_LRZ, 0x100), + DEBUGBUS(A6XX_DBGBUS_A2D, 0x100), + DEBUGBUS(A6XX_DBGBUS_CCUFCHE, 0x100), + DEBUGBUS(A6XX_DBGBUS_RBP, 0x100), + DEBUGBUS(A6XX_DBGBUS_DCS, 0x100), + DEBUGBUS(A6XX_DBGBUS_DBGC, 0x100), + DEBUGBUS(A6XX_DBGBUS_GMU_GX, 0x100), + DEBUGBUS(A6XX_DBGBUS_TPFCHE, 0x100), + DEBUGBUS(A6XX_DBGBUS_GPC, 0x100), + DEBUGBUS(A6XX_DBGBUS_LARC, 0x100), + DEBUGBUS(A6XX_DBGBUS_HLSQ_SPTP, 0x100), + DEBUGBUS(A6XX_DBGBUS_RB_0, 0x100), + DEBUGBUS(A6XX_DBGBUS_RB_1, 0x100), + DEBUGBUS(A6XX_DBGBUS_UCHE_WRAPPER, 0x100), + DEBUGBUS(A6XX_DBGBUS_CCU_0, 0x100), + DEBUGBUS(A6XX_DBGBUS_CCU_1, 0x100), + DEBUGBUS(A6XX_DBGBUS_VFD_0, 0x100), + DEBUGBUS(A6XX_DBGBUS_VFD_1, 0x100), + DEBUGBUS(A6XX_DBGBUS_VFD_2, 0x100), + DEBUGBUS(A6XX_DBGBUS_VFD_3, 0x100), + DEBUGBUS(A6XX_DBGBUS_SP_0, 0x100), + DEBUGBUS(A6XX_DBGBUS_SP_1, 0x100), + DEBUGBUS(A6XX_DBGBUS_TPL1_0, 0x100), + DEBUGBUS(A6XX_DBGBUS_TPL1_1, 0x100), + DEBUGBUS(A6XX_DBGBUS_TPL1_2, 0x100), + DEBUGBUS(A6XX_DBGBUS_TPL1_3, 0x100), +}; + +static const struct a6xx_debugbus_block a6xx_cx_debugbus_blocks[] = { + DEBUGBUS(A6XX_DBGBUS_GMU_CX, 0x100), + DEBUGBUS(A6XX_DBGBUS_CX, 0x100), +}; + +#endif
Add support for gathering and dumping the a6xx GPU state including registers, GMU registers, indexed registers, shader blocks, context clusters and debugbus. Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org> --- drivers/gpu/drm/msm/Makefile | 1 + drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 25 +- drivers/gpu/drm/msm/adreno/a6xx_gmu.h | 3 + drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 39 +- drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 8 + drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 1159 +++++++++++++++++++ drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h | 430 +++++++ 7 files changed, 1627 insertions(+), 38 deletions(-) create mode 100644 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c create mode 100644 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h