[PATCHv4,4/5] drm/msm: add a3xx gpu support

Message ID	1377371170-20135-5-git-send-email-robdclark@gmail.com (mailing list archive)
State	Accepted
Headers	show Return-Path: <dri-devel-bounces+patchwork-dri-devel=patchwork.kernel.org@lists.freedesktop.org> From: Rob Clark <robdclark@gmail.com> To: dri-devel@lists.freedesktop.org Subject: [PATCHv4 4/5] drm/msm: add a3xx gpu support Date: Sat, 24 Aug 2013 15:06:09 -0400 Message-Id: <1377371170-20135-5-git-send-email-robdclark@gmail.com> In-Reply-To: <1377371170-20135-1-git-send-email-robdclark@gmail.com> References: <1377371170-20135-1-git-send-email-robdclark@gmail.com> Cc: linux-arm-msm@vger.kernel.org Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Sender: dri-devel-bounces+patchwork-dri-devel=patchwork.kernel.org@lists.freedesktop.org Errors-To: dri-devel-bounces+patchwork-dri-devel=patchwork.kernel.org@lists.freedesktop.org

diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 4068122..439dfb5 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -4,6 +4,8 @@ ifeq (, $(findstring -W,$(EXTRA_CFLAGS))) endif msm-y := \ + adreno/adreno_gpu.o \ + adreno/a3xx_gpu.o \ hdmi/hdmi.o \ hdmi/hdmi_connector.o \ hdmi/hdmi_i2c.o \ @@ -18,7 +20,10 @@ msm-y := \ msm_connector.o \ msm_drv.o \ msm_fb.o \ - msm_gem.o + msm_gem.o \ + msm_gem_submit.o \ + msm_gpu.o \ + msm_ringbuffer.o msm-$(CONFIG_DRM_MSM_FBDEV) += msm_fbdev.o diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c new file mode 100644 index 0000000..13d61bb --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -0,0 +1,501 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "a3xx_gpu.h" + +#define A3XX_INT0_MASK \ + (A3XX_INT0_RBBM_AHB_ERROR | \ + A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \ + A3XX_INT0_CP_T0_PACKET_IN_IB | \ + A3XX_INT0_CP_OPCODE_ERROR | \ + A3XX_INT0_CP_RESERVED_BIT_ERROR | \ + A3XX_INT0_CP_HW_FAULT | \ + A3XX_INT0_CP_IB1_INT | \ + A3XX_INT0_CP_IB2_INT | \ + A3XX_INT0_CP_RB_INT | \ + A3XX_INT0_CP_REG_PROTECT_FAULT | \ + A3XX_INT0_CP_AHB_ERROR_HALT | \ + A3XX_INT0_UCHE_OOB_ACCESS) + +static struct platform_device *a3xx_pdev; + +static void a3xx_me_init(struct msm_gpu *gpu) +{ + struct msm_ringbuffer *ring = gpu->rb; + + OUT_PKT3(ring, CP_ME_INIT, 17); + OUT_RING(ring, 0x000003f7); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000080); + OUT_RING(ring, 0x00000100); + OUT_RING(ring, 0x00000180); + OUT_RING(ring, 0x00006600); + OUT_RING(ring, 0x00000150); + OUT_RING(ring, 0x0000014e); + OUT_RING(ring, 0x00000154); + OUT_RING(ring, 0x00000001); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + gpu->funcs->flush(gpu); + gpu->funcs->idle(gpu); +} + +static int a3xx_hw_init(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + uint32_t *ptr, len; + int i, ret; + + DBG("%s", gpu->name); + + if (adreno_is_a305(adreno_gpu)) { + /* Set up 16 deep read/write request queues: */ + gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010); + gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010); + gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010); + gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010); + gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303); + gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010); + gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010); + /* Enable WR-REQ: */ + gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff); + /* Set up round robin arbitration between both AXI ports: */ + gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030); + /* Set up AOOO: */ + gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c); + gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c); + + } else if (adreno_is_a320(adreno_gpu)) { + /* Set up 16 deep read/write request queues: */ + gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010); + gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010); + gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010); + gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010); + gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303); + gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010); + gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010); + /* Enable WR-REQ: */ + gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff); + /* Set up round robin arbitration between both AXI ports: */ + gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030); + /* Set up AOOO: */ + gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c); + gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c); + /* Enable 1K sort: */ + gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff); + gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4); + + } else if (adreno_is_a330(adreno_gpu)) { + /* Set up 16 deep read/write request queues: */ + gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818); + gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818); + gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818); + gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818); + gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303); + gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818); + gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818); + /* Enable WR-REQ: */ + gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f); + /* Set up round robin arbitration between both AXI ports: */ + gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030); + /* Set up VBIF_ROUND_ROBIN_QOS_ARB: */ + gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001); + /* Set up AOOO: */ + gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000ffff); + gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0xffffffff); + /* Enable 1K sort: */ + gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001ffff); + gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4); + /* Disable VBIF clock gating. This is to enable AXI running + * higher frequency than GPU: + */ + gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001); + + } else { + BUG(); + } + + /* Make all blocks contribute to the GPU BUSY perf counter: */ + gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff); + + /* Tune the hystersis counters for SP and CP idle detection: */ + gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10); + gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10); + + /* Enable the RBBM error reporting bits. This lets us get + * useful information on failure: + */ + gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001); + + /* Enable AHB error reporting: */ + gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff); + + /* Turn on the power counters: */ + gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000); + + /* Turn on hang detection - this spews a lot of useful information + * into the RBBM registers on a hang: + */ + gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff); + + /* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */ + gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001); + + /* Enable Clock gating: */ + gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff); + + /* Set the OCMEM base address for A330 */ +//TODO: +// if (adreno_is_a330(adreno_gpu)) { +// gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR, +// (unsigned int)(a3xx_gpu->ocmem_base >> 14)); +// } + + /* Turn on performance counters: */ + gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01); + + /* Set SP perfcounter 7 to count SP_FS_FULL_ALU_INSTRUCTIONS + * we will use this to augment our hang detection: + */ + gpu_write(gpu, REG_A3XX_SP_PERFCOUNTER7_SELECT, + SP_FS_FULL_ALU_INSTRUCTIONS); + + gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK); + + ret = adreno_hw_init(gpu); + if (ret) + return ret; + + /* setup access protection: */ + gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007); + + /* RBBM registers */ + gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040); + gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080); + gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc); + gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108); + gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140); + gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400); + + /* CP registers */ + gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700); + gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8); + gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0); + gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178); + gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180); + + /* RB registers */ + gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300); + + /* VBIF registers */ + gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000); + + /* NOTE: PM4/micro-engine firmware registers look to be the same + * for a2xx and a3xx.. we could possibly push that part down to + * adreno_gpu base class. Or push both PM4 and PFP but + * parameterize the pfp ucode addr/data registers.. + */ + + /* Load PM4: */ + ptr = (uint32_t *)(adreno_gpu->pm4->data); + len = adreno_gpu->pm4->size / 4; + DBG("loading PM4 ucode version: %u", ptr[0]); + + gpu_write(gpu, REG_AXXX_CP_DEBUG, + AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE | + AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE); + gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0); + for (i = 1; i < len; i++) + gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]); + + /* Load PFP: */ + ptr = (uint32_t *)(adreno_gpu->pfp->data); + len = adreno_gpu->pfp->size / 4; + DBG("loading PFP ucode version: %u", ptr[0]); + + gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0); + for (i = 1; i < len; i++) + gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]); + + /* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */ + if (adreno_is_a305(adreno_gpu) || adreno_is_a320(adreno_gpu)) + gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, + AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) | + AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) | + AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14)); + + + /* clear ME_HALT to start micro engine */ + gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0); + + a3xx_me_init(gpu); + + return 0; +} + +static void a3xx_destroy(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu); + + DBG("%s", gpu->name); + + adreno_gpu_cleanup(adreno_gpu); + put_device(&a3xx_gpu->pdev->dev); + kfree(a3xx_gpu); +} + +static void a3xx_idle(struct msm_gpu *gpu) +{ + unsigned long t; + + /* wait for ringbuffer to drain: */ + adreno_idle(gpu); + + t = jiffies + ADRENO_IDLE_TIMEOUT; + + /* then wait for GPU to finish: */ + do { + uint32_t rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS); + if (!(rbbm_status & A3XX_RBBM_STATUS_GPU_BUSY)) + return; + } while(time_before(jiffies, t)); + + DRM_ERROR("timeout waiting for %s to idle!\n", gpu->name); + + /* TODO maybe we need to reset GPU here to recover from hang? */ +} + +static irqreturn_t a3xx_irq(struct msm_gpu *gpu) +{ + uint32_t status; + + status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS); + DBG("%s: %08x", gpu->name, status); + + // TODO + + gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status); + + msm_gpu_retire(gpu); + + return IRQ_HANDLED; +} + +#ifdef CONFIG_DEBUG_FS +static const unsigned int a3xx_registers[] = { + 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027, + 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c, + 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5, + 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1, + 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd, + 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff, + 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f, + 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f, + 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e, + 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f, + 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7, + 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05, + 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65, + 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7, + 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09, + 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069, + 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075, + 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109, + 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115, + 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0, + 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e, + 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8, + 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7, + 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356, + 0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d, + 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472, + 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef, + 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511, + 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed, + 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a, + 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce, + 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec, + 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749, + 0x2750, 0x2756, 0x2760, 0x2760, 0x300c, 0x300e, 0x301c, 0x301d, + 0x302a, 0x302a, 0x302c, 0x302d, 0x3030, 0x3031, 0x3034, 0x3036, + 0x303c, 0x303c, 0x305e, 0x305f, +}; + +static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m) +{ + int i; + + adreno_show(gpu, m); + seq_printf(m, "status: %08x\n", + gpu_read(gpu, REG_A3XX_RBBM_STATUS)); + + /* dump these out in a form that can be parsed by demsm: */ + seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name); + for (i = 0; i < ARRAY_SIZE(a3xx_registers); i += 2) { + uint32_t start = a3xx_registers[i]; + uint32_t end = a3xx_registers[i+1]; + uint32_t addr; + + for (addr = start; addr <= end; addr++) { + uint32_t val = gpu_read(gpu, addr); + seq_printf(m, "IO:R %08x %08x\n", addr<<2, val); + } + } +} +#endif + +static const struct adreno_gpu_funcs funcs = { + .base = { + .get_param = adreno_get_param, + .hw_init = a3xx_hw_init, + .pm_suspend = msm_gpu_pm_suspend, + .pm_resume = msm_gpu_pm_resume, + .last_fence = adreno_last_fence, + .submit = adreno_submit, + .flush = adreno_flush, + .idle = a3xx_idle, + .irq = a3xx_irq, + .destroy = a3xx_destroy, +#ifdef CONFIG_DEBUG_FS + .show = a3xx_show, +#endif + }, +}; + +struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) +{ + struct a3xx_gpu *a3xx_gpu = NULL; + struct msm_gpu *gpu; + struct platform_device *pdev = a3xx_pdev; + struct adreno_platform_config *config; + int ret; + + if (!pdev) { + dev_err(dev->dev, "no a3xx device\n"); + ret = -ENXIO; + goto fail; + } + + config = pdev->dev.platform_data; + + a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL); + if (!a3xx_gpu) { + ret = -ENOMEM; + goto fail; + } + + gpu = &a3xx_gpu->base.base; + + get_device(&pdev->dev); + a3xx_gpu->pdev = pdev; + + gpu->fast_rate = config->fast_rate; + gpu->slow_rate = config->slow_rate; + gpu->bus_freq = config->bus_freq; + + DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u", + gpu->fast_rate, gpu->slow_rate, gpu->bus_freq); + + ret = adreno_gpu_init(dev, pdev, &a3xx_gpu->base, + &funcs, config->rev); + if (ret) + goto fail; + + return &a3xx_gpu->base.base; + +fail: + if (a3xx_gpu) + a3xx_destroy(&a3xx_gpu->base.base); + + return ERR_PTR(ret); +} + +/* + * The a3xx device: + */ + +static int a3xx_probe(struct platform_device *pdev) +{ + static struct adreno_platform_config config = {}; +#ifdef CONFIG_OF + /* TODO */ +#else + uint32_t version = socinfo_get_version(); + if (cpu_is_apq8064ab()) { + config.fast_rate = 450000000; + config.slow_rate = 27000000; + config.bus_freq = 4; + config.rev = ADRENO_REV(3, 2, 1, 0); + } else if (cpu_is_apq8064() || cpu_is_msm8960ab()) { + config.fast_rate = 400000000; + config.slow_rate = 27000000; + config.bus_freq = 4; + + if (SOCINFO_VERSION_MAJOR(version) == 2) + config.rev = ADRENO_REV(3, 2, 0, 2); + else if ((SOCINFO_VERSION_MAJOR(version) == 1) && + (SOCINFO_VERSION_MINOR(version) == 1)) + config.rev = ADRENO_REV(3, 2, 0, 1); + else + config.rev = ADRENO_REV(3, 2, 0, 0); + + } else if (cpu_is_msm8930()) { + config.fast_rate = 400000000; + config.slow_rate = 27000000; + config.bus_freq = 3; + + if ((SOCINFO_VERSION_MAJOR(version) == 1) && + (SOCINFO_VERSION_MINOR(version) == 2)) + config.rev = ADRENO_REV(3, 0, 5, 2); + else + config.rev = ADRENO_REV(3, 0, 5, 0); + + } +#endif + pdev->dev.platform_data = &config; + a3xx_pdev = pdev; + return 0; +} + +static int a3xx_remove(struct platform_device *pdev) +{ + a3xx_pdev = NULL; + return 0; +} + +static struct platform_driver a3xx_driver = { + .probe = a3xx_probe, + .remove = a3xx_remove, + .driver.name = "kgsl-3d0", +}; + +void __init a3xx_register(void) +{ + platform_driver_register(&a3xx_driver); +} + +void __exit a3xx_unregister(void) +{ + platform_driver_unregister(&a3xx_driver); +} diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.h b/drivers/gpu/drm/msm/adreno/a3xx_gpu.h new file mode 100644 index 0000000..32c398c --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __A3XX_GPU_H__ +#define __A3XX_GPU_H__ + +#include "adreno_gpu.h" +#include "a3xx.xml.h" + +struct a3xx_gpu { + struct adreno_gpu base; + struct platform_device *pdev; +}; +#define to_a3xx_gpu(x) container_of(x, struct a3xx_gpu, base) + +#endif /* __A3XX_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c new file mode 100644 index 0000000..282163e --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -0,0 +1,350 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "adreno_gpu.h" +#include "msm_gem.h" + +struct adreno_info { + struct adreno_rev rev; + uint32_t revn; + const char *name; + const char *pm4fw, *pfpfw; + uint32_t gmem; +}; + +#define ANY_ID 0xff + +static const struct adreno_info gpulist[] = { + { + .rev = ADRENO_REV(3, 0, 5, ANY_ID), + .revn = 305, + .name = "A305", + .pm4fw = "a300_pm4.fw", + .pfpfw = "a300_pfp.fw", + .gmem = SZ_256K, + }, { + .rev = ADRENO_REV(3, 2, ANY_ID, ANY_ID), + .revn = 320, + .name = "A320", + .pm4fw = "a300_pm4.fw", + .pfpfw = "a300_pfp.fw", + .gmem = SZ_512K, + }, { + .rev = ADRENO_REV(3, 3, 0, 0), + .revn = 330, + .name = "A330", + .pm4fw = "a330_pm4.fw", + .pfpfw = "a330_pfp.fw", + .gmem = SZ_1M, + }, +}; + +#define RB_SIZE SZ_32K +#define RB_BLKSIZE 16 + +int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + + switch (param) { + case MSM_PARAM_GPU_ID: + *value = adreno_gpu->info->revn; + return 0; + case MSM_PARAM_GMEM_SIZE: + *value = adreno_gpu->info->gmem; + return 0; + default: + DBG("%s: invalid param: %u", gpu->name, param); + return -EINVAL; + } +} + +#define rbmemptr(adreno_gpu, member) \ + ((adreno_gpu)->memptrs_iova + offsetof(struct adreno_rbmemptrs, member)) + +int adreno_hw_init(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + + DBG("%s", gpu->name); + + /* Setup REG_CP_RB_CNTL: */ + gpu_write(gpu, REG_AXXX_CP_RB_CNTL, + /* size is log2(quad-words): */ + AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) | + AXXX_CP_RB_CNTL_BLKSZ(RB_BLKSIZE)); + + /* Setup ringbuffer address: */ + gpu_write(gpu, REG_AXXX_CP_RB_BASE, gpu->rb_iova); + gpu_write(gpu, REG_AXXX_CP_RB_RPTR_ADDR, rbmemptr(adreno_gpu, rptr)); + + /* Setup scratch/timestamp: */ + gpu_write(gpu, REG_AXXX_SCRATCH_ADDR, rbmemptr(adreno_gpu, fence)); + + gpu_write(gpu, REG_AXXX_SCRATCH_UMSK, 0x1); + + return 0; +} + +static uint32_t get_wptr(struct msm_ringbuffer *ring) +{ + return ring->cur - ring->start; +} + +uint32_t adreno_last_fence(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + return adreno_gpu->memptrs->fence; +} + +int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, + struct msm_file_private *ctx) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct msm_drm_private *priv = gpu->dev->dev_private; + struct msm_ringbuffer *ring = gpu->rb; + unsigned i, ibs = 0; + + adreno_gpu->last_fence = submit->fence; + + for (i = 0; i < submit->nr_cmds; i++) { + switch (submit->cmd[i].type) { + case MSM_SUBMIT_CMD_IB_TARGET_BUF: + /* ignore IB-targets */ + break; + case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: + /* ignore if there has not been a ctx switch: */ + if (priv->lastctx == ctx) + break; + case MSM_SUBMIT_CMD_BUF: + OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2); + OUT_RING(ring, submit->cmd[i].iova); + OUT_RING(ring, submit->cmd[i].size); + ibs++; + break; + } + } + + /* on a320, at least, we seem to need to pad things out to an + * even number of qwords to avoid issue w/ CP hanging on wrap- + * around: + */ + if (ibs % 2) + OUT_PKT2(ring); + + OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); + OUT_RING(ring, submit->fence); + + if (adreno_is_a3xx(adreno_gpu)) { + /* Flush HLSQ lazy updates to make sure there is nothing + * pending for indirect loads after the timestamp has + * passed: + */ + OUT_PKT3(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, HLSQ_FLUSH); + + OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); + OUT_RING(ring, 0x00000000); + } + + OUT_PKT3(ring, CP_EVENT_WRITE, 3); + OUT_RING(ring, CACHE_FLUSH_TS); + OUT_RING(ring, rbmemptr(adreno_gpu, fence)); + OUT_RING(ring, submit->fence); + + /* we could maybe be clever and only CP_COND_EXEC the interrupt: */ + OUT_PKT3(ring, CP_INTERRUPT, 1); + OUT_RING(ring, 0x80000000); + +#if 0 + if (adreno_is_a3xx(adreno_gpu)) { + /* Dummy set-constant to trigger context rollover */ + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG)); + OUT_RING(ring, 0x00000000); + } +#endif + + gpu->funcs->flush(gpu); + + return 0; +} + +void adreno_flush(struct msm_gpu *gpu) +{ + uint32_t wptr = get_wptr(gpu->rb); + + /* ensure writes to ringbuffer have hit system memory: */ + mb(); + + gpu_write(gpu, REG_AXXX_CP_RB_WPTR, wptr); +} + +void adreno_idle(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + uint32_t rptr, wptr = get_wptr(gpu->rb); + unsigned long t; + + t = jiffies + ADRENO_IDLE_TIMEOUT; + + /* then wait for CP to drain ringbuffer: */ + do { + rptr = adreno_gpu->memptrs->rptr; + if (rptr == wptr) + return; + } while(time_before(jiffies, t)); + + DRM_ERROR("timeout waiting for %s to drain ringbuffer!\n", gpu->name); + + /* TODO maybe we need to reset GPU here to recover from hang? */ +} + +#ifdef CONFIG_DEBUG_FS +void adreno_show(struct msm_gpu *gpu, struct seq_file *m) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + + seq_printf(m, "revision: %d (%d.%d.%d.%d)\n", + adreno_gpu->info->revn, adreno_gpu->rev.core, + adreno_gpu->rev.major, adreno_gpu->rev.minor, + adreno_gpu->rev.patchid); + + seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence, + adreno_gpu->last_fence); + seq_printf(m, "rptr: %d\n", adreno_gpu->memptrs->rptr); + seq_printf(m, "wptr: %d\n", adreno_gpu->memptrs->wptr); + seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb)); +} +#endif + +void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + uint32_t freedwords; + do { + uint32_t size = gpu->rb->size / 4; + uint32_t wptr = get_wptr(gpu->rb); + uint32_t rptr = adreno_gpu->memptrs->rptr; + freedwords = (rptr + (size - 1) - wptr) % size; + } while(freedwords < ndwords); +} + +static const char *iommu_ports[] = { + "gfx3d_user", "gfx3d_priv", + "gfx3d1_user", "gfx3d1_priv", +}; + +static inline bool _rev_match(uint8_t entry, uint8_t id) +{ + return (entry == ANY_ID) || (entry == id); +} + +int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, + struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs, + struct adreno_rev rev) +{ + int i, ret; + + /* identify gpu: */ + for (i = 0; i < ARRAY_SIZE(gpulist); i++) { + const struct adreno_info *info = &gpulist[i]; + if (_rev_match(info->rev.core, rev.core) && + _rev_match(info->rev.major, rev.major) && + _rev_match(info->rev.minor, rev.minor) && + _rev_match(info->rev.patchid, rev.patchid)) { + gpu->info = info; + gpu->revn = info->revn; + break; + } + } + + if (i == ARRAY_SIZE(gpulist)) { + dev_err(drm->dev, "Unknown GPU revision: %u.%u.%u.%u\n", + rev.core, rev.major, rev.minor, rev.patchid); + return -ENXIO; + } + + DBG("Found GPU: %s (%u.%u.%u.%u)", gpu->info->name, + rev.core, rev.major, rev.minor, rev.patchid); + + gpu->funcs = funcs; + gpu->rev = rev; + + ret = request_firmware(&gpu->pm4, gpu->info->pm4fw, drm->dev); + if (ret) { + dev_err(drm->dev, "failed to load %s PM4 firmware: %d\n", + gpu->info->pm4fw, ret); + return ret; + } + + ret = request_firmware(&gpu->pfp, gpu->info->pfpfw, drm->dev); + if (ret) { + dev_err(drm->dev, "failed to load %s PFP firmware: %d\n", + gpu->info->pfpfw, ret); + return ret; + } + + ret = msm_gpu_init(drm, pdev, &gpu->base, &funcs->base, + gpu->info->name, "kgsl_3d0_reg_memory", "kgsl_3d0_irq", + RB_SIZE); + if (ret) + return ret; + + ret = msm_iommu_attach(drm, gpu->base.iommu, + iommu_ports, ARRAY_SIZE(iommu_ports)); + if (ret) + return ret; + + gpu->memptrs_bo = msm_gem_new(drm, sizeof(*gpu->memptrs), + MSM_BO_UNCACHED); + if (IS_ERR(gpu->memptrs_bo)) { + ret = PTR_ERR(gpu->memptrs_bo); + gpu->memptrs_bo = NULL; + dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); + return ret; + } + + gpu->memptrs = msm_gem_vaddr_locked(gpu->memptrs_bo); + if (!gpu->memptrs) { + dev_err(drm->dev, "could not vmap memptrs\n"); + return -ENOMEM; + } + + ret = msm_gem_get_iova_locked(gpu->memptrs_bo, gpu->base.id, + &gpu->memptrs_iova); + if (ret) { + dev_err(drm->dev, "could not map memptrs: %d\n", ret); + return ret; + } + + return 0; +} + +void adreno_gpu_cleanup(struct adreno_gpu *gpu) +{ + if (gpu->memptrs_bo) { + if (gpu->memptrs_iova) + msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id); + drm_gem_object_unreference(gpu->memptrs_bo); + } + if (gpu->pm4) + release_firmware(gpu->pm4); + if (gpu->pfp) + release_firmware(gpu->pfp); + msm_gpu_cleanup(&gpu->base); +} diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h new file mode 100644 index 0000000..6b49c4f --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -0,0 +1,142 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ADRENO_GPU_H__ +#define __ADRENO_GPU_H__ + +#include <linux/firmware.h> + +#include "msm_gpu.h" + +#include "adreno_common.xml.h" +#include "adreno_pm4.xml.h" + +struct adreno_rev { + uint8_t core; + uint8_t major; + uint8_t minor; + uint8_t patchid; +}; + +#define ADRENO_REV(core, major, minor, patchid) \ + ((struct adreno_rev){ core, major, minor, patchid }) + +struct adreno_gpu_funcs { + struct msm_gpu_funcs base; +}; + +struct adreno_info; + +struct adreno_rbmemptrs { + volatile uint32_t rptr; + volatile uint32_t wptr; + volatile uint32_t fence; +}; + +struct adreno_gpu { + struct msm_gpu base; + struct adreno_rev rev; + const struct adreno_info *info; + uint32_t revn; /* numeric revision name */ + const struct adreno_gpu_funcs *funcs; + + uint32_t last_fence; + + /* firmware: */ + const struct firmware *pm4, *pfp; + + /* ringbuffer rptr/wptr: */ + // TODO should this be in msm_ringbuffer? I think it would be + // different for z180.. + struct adreno_rbmemptrs *memptrs; + struct drm_gem_object *memptrs_bo; + uint32_t memptrs_iova; +}; +#define to_adreno_gpu(x) container_of(x, struct adreno_gpu, base) + +/* platform config data (ie. from DT, or pdata) */ +struct adreno_platform_config { + struct adreno_rev rev; + uint32_t fast_rate, slow_rate, bus_freq; +}; + +#define ADRENO_IDLE_TIMEOUT (20 * 1000) + +static inline bool adreno_is_a3xx(struct adreno_gpu *gpu) +{ + return (gpu->revn >= 300) && (gpu->revn < 400); +} + +static inline bool adreno_is_a305(struct adreno_gpu *gpu) +{ + return gpu->revn == 305; +} + +static inline bool adreno_is_a320(struct adreno_gpu *gpu) +{ + return gpu->revn == 320; +} + +static inline bool adreno_is_a330(struct adreno_gpu *gpu) +{ + return gpu->revn == 330; +} + +int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value); +int adreno_hw_init(struct msm_gpu *gpu); +uint32_t adreno_last_fence(struct msm_gpu *gpu); +int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, + struct msm_file_private *ctx); +void adreno_flush(struct msm_gpu *gpu); +void adreno_idle(struct msm_gpu *gpu); +#ifdef CONFIG_DEBUG_FS +void adreno_show(struct msm_gpu *gpu, struct seq_file *m); +#endif +void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords); + +int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, + struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs, + struct adreno_rev rev); +void adreno_gpu_cleanup(struct adreno_gpu *gpu); + + +/* ringbuffer helpers (the parts that are adreno specific) */ + +static inline void +OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) +{ + adreno_wait_ring(ring->gpu, cnt+1); + OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF)); +} + +/* no-op packet: */ +static inline void +OUT_PKT2(struct msm_ringbuffer *ring) +{ + adreno_wait_ring(ring->gpu, 1); + OUT_RING(ring, CP_TYPE2_PKT); +} + +static inline void +OUT_PKT3(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) +{ + adreno_wait_ring(ring->gpu, cnt+1); + OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8)); +} + + +#endif /* __ADRENO_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index b5ae0db..864c977 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -16,6 +16,7 @@ */ #include "msm_drv.h" +#include "msm_gpu.h" #include <mach/iommu.h> @@ -135,6 +136,7 @@ static int msm_unload(struct drm_device *dev) { struct msm_drm_private *priv = dev->dev_private; struct msm_kms *kms = priv->kms; + struct msm_gpu *gpu = priv->gpu; drm_kms_helper_poll_fini(dev); drm_mode_config_cleanup(dev); @@ -152,6 +154,12 @@ static int msm_unload(struct drm_device *dev) kms->funcs->destroy(kms); } + if (gpu) { + mutex_lock(&dev->struct_mutex); + gpu->funcs->pm_suspend(gpu); + gpu->funcs->destroy(gpu); + mutex_unlock(&dev->struct_mutex); + } dev->dev_private = NULL; @@ -176,6 +184,7 @@ static int msm_load(struct drm_device *dev, unsigned long flags) dev->dev_private = priv; priv->wq = alloc_ordered_workqueue("msm", 0); + init_waitqueue_head(&priv->fence_event); INIT_LIST_HEAD(&priv->inactive_list); @@ -240,12 +249,70 @@ fail: return ret; } +static void load_gpu(struct drm_device *dev) +{ + struct msm_drm_private *priv = dev->dev_private; + struct msm_gpu *gpu; + + if (priv->gpu) + return; + + mutex_lock(&dev->struct_mutex); + gpu = a3xx_gpu_init(dev); + if (IS_ERR(gpu)) { + dev_warn(dev->dev, "failed to load a3xx gpu\n"); + gpu = NULL; + /* not fatal */ + } + mutex_unlock(&dev->struct_mutex); + + if (gpu) { + int ret; + gpu->funcs->pm_resume(gpu); + ret = gpu->funcs->hw_init(gpu); + if (ret) { + dev_err(dev->dev, "gpu hw init failed: %d\n", ret); + gpu->funcs->destroy(gpu); + gpu = NULL; + } + } + + priv->gpu = gpu; +} + +static int msm_open(struct drm_device *dev, struct drm_file *file) +{ + struct msm_file_private *ctx; + + /* For now, load gpu on open.. to avoid the requirement of having + * firmware in the initrd. + */ + load_gpu(dev); + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + file->driver_priv = ctx; + + return 0; +} + static void msm_preclose(struct drm_device *dev, struct drm_file *file) { struct msm_drm_private *priv = dev->dev_private; + struct msm_file_private *ctx = file->driver_priv; struct msm_kms *kms = priv->kms; + if (kms) kms->funcs->preclose(kms, file); + + mutex_lock(&dev->struct_mutex); + if (ctx == priv->lastctx) + priv->lastctx = NULL; + mutex_unlock(&dev->struct_mutex); + + kfree(ctx); } static void msm_lastclose(struct drm_device *dev) @@ -316,11 +383,30 @@ static void msm_disable_vblank(struct drm_device *dev, int crtc_id) */ #ifdef CONFIG_DEBUG_FS +static int msm_gpu_show(struct drm_device *dev, struct seq_file *m) +{ + struct msm_drm_private *priv = dev->dev_private; + struct msm_gpu *gpu = priv->gpu; + + if (gpu) { + seq_printf(m, "%s Status:\n", gpu->name); + gpu->funcs->show(gpu, m); + } + + return 0; +} + static int msm_gem_show(struct drm_device *dev, struct seq_file *m) { struct msm_drm_private *priv = dev->dev_private; + struct msm_gpu *gpu = priv->gpu; + + if (gpu) { + seq_printf(m, "Active Objects (%s):\n", gpu->name); + msm_gem_describe_objects(&gpu->active_list, m); + } - seq_printf(m, "All Objects:\n"); + seq_printf(m, "Inactive Objects:\n"); msm_gem_describe_objects(&priv->inactive_list, m); return 0; @@ -375,6 +461,7 @@ static int show_locked(struct seq_file *m, void *arg) } static struct drm_info_list msm_debugfs_list[] = { + {"gpu", show_locked, 0, msm_gpu_show}, {"gem", show_locked, 0, msm_gem_show}, { "mm", show_locked, 0, msm_mm_show }, { "fb", show_locked, 0, msm_fb_show }, @@ -404,6 +491,158 @@ static void msm_debugfs_cleanup(struct drm_minor *minor) } #endif +/* + * Fences: + */ + +int msm_wait_fence_interruptable(struct drm_device *dev, uint32_t fence, + struct timespec *timeout) +{ + struct msm_drm_private *priv = dev->dev_private; + unsigned long timeout_jiffies = timespec_to_jiffies(timeout); + unsigned long start_jiffies = jiffies; + unsigned long remaining_jiffies; + int ret; + + if (time_after(start_jiffies, timeout_jiffies)) + remaining_jiffies = 0; + else + remaining_jiffies = timeout_jiffies - start_jiffies; + + ret = wait_event_interruptible_timeout(priv->fence_event, + priv->completed_fence >= fence, + remaining_jiffies); + if (ret == 0) { + DBG("timeout waiting for fence: %u (completed: %u)", + fence, priv->completed_fence); + ret = -ETIMEDOUT; + } else if (ret != -ERESTARTSYS) { + ret = 0; + } + + return ret; +} + +/* call under struct_mutex */ +void msm_update_fence(struct drm_device *dev, uint32_t fence) +{ + struct msm_drm_private *priv = dev->dev_private; + + if (fence > priv->completed_fence) { + priv->completed_fence = fence; + wake_up_all(&priv->fence_event); + } +} + +/* + * DRM ioctls: + */ + +static int msm_ioctl_get_param(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct msm_drm_private *priv = dev->dev_private; + struct drm_msm_param *args = data; + struct msm_gpu *gpu; + + /* for now, we just have 3d pipe.. eventually this would need to + * be more clever to dispatch to appropriate gpu module: + */ + if (args->pipe != MSM_PIPE_3D0) + return -EINVAL; + + gpu = priv->gpu; + + if (!gpu) + return -ENXIO; + + return gpu->funcs->get_param(gpu, args->param, &args->value); +} + +static int msm_ioctl_gem_new(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_msm_gem_new *args = data; + return msm_gem_new_handle(dev, file, args->size, + args->flags, &args->handle); +} + +#define TS(t) ((struct timespec){ .tv_sec = (t).tv_sec, .tv_nsec = (t).tv_nsec }) + +static int msm_ioctl_gem_cpu_prep(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_msm_gem_cpu_prep *args = data; + struct drm_gem_object *obj; + int ret; + + obj = drm_gem_object_lookup(dev, file, args->handle); + if (!obj) + return -ENOENT; + + ret = msm_gem_cpu_prep(obj, args->op, &TS(args->timeout)); + + drm_gem_object_unreference_unlocked(obj); + + return ret; +} + +static int msm_ioctl_gem_cpu_fini(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_msm_gem_cpu_fini *args = data; + struct drm_gem_object *obj; + int ret; + + obj = drm_gem_object_lookup(dev, file, args->handle); + if (!obj) + return -ENOENT; + + ret = msm_gem_cpu_fini(obj); + + drm_gem_object_unreference_unlocked(obj); + + return ret; +} + +static int msm_ioctl_gem_info(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_msm_gem_info *args = data; + struct drm_gem_object *obj; + int ret = 0; + + if (args->pad) + return -EINVAL; + + obj = drm_gem_object_lookup(dev, file, args->handle); + if (!obj) + return -ENOENT; + + args->offset = msm_gem_mmap_offset(obj); + + drm_gem_object_unreference_unlocked(obj); + + return ret; +} + +static int msm_ioctl_wait_fence(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_msm_wait_fence *args = data; + return msm_wait_fence_interruptable(dev, args->fence, &TS(args->timeout)); +} + +static const struct drm_ioctl_desc msm_ioctls[] = { + DRM_IOCTL_DEF_DRV(MSM_GET_PARAM, msm_ioctl_get_param, DRM_UNLOCKED|DRM_AUTH), + DRM_IOCTL_DEF_DRV(MSM_GEM_NEW, msm_ioctl_gem_new, DRM_UNLOCKED|DRM_AUTH), + DRM_IOCTL_DEF_DRV(MSM_GEM_INFO, msm_ioctl_gem_info, DRM_UNLOCKED|DRM_AUTH), + DRM_IOCTL_DEF_DRV(MSM_GEM_CPU_PREP, msm_ioctl_gem_cpu_prep, DRM_UNLOCKED|DRM_AUTH), + DRM_IOCTL_DEF_DRV(MSM_GEM_CPU_FINI, msm_ioctl_gem_cpu_fini, DRM_UNLOCKED|DRM_AUTH), + DRM_IOCTL_DEF_DRV(MSM_GEM_SUBMIT, msm_ioctl_gem_submit, DRM_UNLOCKED|DRM_AUTH), + DRM_IOCTL_DEF_DRV(MSM_WAIT_FENCE, msm_ioctl_wait_fence, DRM_UNLOCKED|DRM_AUTH), +}; + static const struct vm_operations_struct vm_ops = { .fault = msm_gem_fault, .open = drm_gem_vm_open, @@ -428,6 +667,7 @@ static struct drm_driver msm_driver = { .driver_features = DRIVER_HAVE_IRQ | DRIVER_GEM | DRIVER_MODESET, .load = msm_load, .unload = msm_unload, + .open = msm_open, .preclose = msm_preclose, .lastclose = msm_lastclose, .irq_handler = msm_irq, @@ -446,6 +686,8 @@ static struct drm_driver msm_driver = { .debugfs_init = msm_debugfs_init, .debugfs_cleanup = msm_debugfs_cleanup, #endif + .ioctls = msm_ioctls, + .num_ioctls = DRM_MSM_NUM_IOCTLS, .fops = &fops, .name = "msm", .desc = "MSM Snapdragon DRM", @@ -514,6 +756,7 @@ static int __init msm_drm_register(void) { DBG("init"); hdmi_register(); + a3xx_register(); return platform_driver_register(&msm_platform_driver); } @@ -522,6 +765,7 @@ static void __exit msm_drm_unregister(void) DBG("fini"); platform_driver_unregister(&msm_platform_driver); hdmi_unregister(); + a3xx_unregister(); } module_init(msm_drm_register); diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 36f8ba2..34c36b2 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -40,17 +40,34 @@ #include <drm/drmP.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_fb_helper.h> +#include <drm/msm_drm.h> struct msm_kms; +struct msm_gpu; -#define NUM_DOMAINS 1 /* one for KMS, then one per gpu core (?) */ +#define NUM_DOMAINS 2 /* one for KMS, then one per gpu core (?) */ + +struct msm_file_private { + /* currently we don't do anything useful with this.. but when + * per-context address spaces are supported we'd keep track of + * the context's page-tables here. + */ + int dummy; +}; struct msm_drm_private { struct msm_kms *kms; + /* when we have more than one 'msm_gpu' these need to be an array: */ + struct msm_gpu *gpu; + struct msm_file_private *lastctx; + struct drm_fb_helper *fbdev; + uint32_t next_fence, completed_fence; + wait_queue_head_t fence_event; + /* list of GEM objects: */ struct list_head inactive_list; @@ -108,6 +125,13 @@ int msm_register_iommu(struct drm_device *dev, struct iommu_domain *iommu); int msm_iommu_attach(struct drm_device *dev, struct iommu_domain *iommu, const char **names, int cnt); +int msm_wait_fence_interruptable(struct drm_device *dev, uint32_t fence, + struct timespec *timeout); +void msm_update_fence(struct drm_device *dev, uint32_t fence); + +int msm_ioctl_gem_submit(struct drm_device *dev, void *data, + struct drm_file *file); + int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma); int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj); @@ -125,6 +149,12 @@ void *msm_gem_vaddr_locked(struct drm_gem_object *obj); void *msm_gem_vaddr(struct drm_gem_object *obj); int msm_gem_queue_inactive_work(struct drm_gem_object *obj, struct work_struct *work); +void msm_gem_move_to_active(struct drm_gem_object *obj, + struct msm_gpu *gpu, uint32_t fence); +void msm_gem_move_to_inactive(struct drm_gem_object *obj); +int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, + struct timespec *timeout); +int msm_gem_cpu_fini(struct drm_gem_object *obj); void msm_gem_free_object(struct drm_gem_object *obj); int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file, uint32_t size, uint32_t flags, uint32_t *handle); @@ -168,20 +198,14 @@ static inline int align_pitch(int width, int bpp) /* for the generated headers: */ #define INVALID_IDX(idx) ({BUG(); 0;}) +#define fui(x) ({BUG(); 0;}) +#define util_float_to_half(x) ({BUG(); 0;}) + #define FIELD(val, name) (((val) & name ## __MASK) >> name ## __SHIFT) /* for conditionally setting boolean flag(s): */ #define COND(bool, val) ((bool) ? (val) : 0) -/* just put these here until we start adding driver private ioctls: */ -// TODO might shuffle these around.. just need something for now.. -#define MSM_BO_CACHE_MASK 0x0000000f -#define MSM_BO_SCANOUT 0x00010000 /* scanout capable */ - -#define MSM_BO_CACHED 0x00000001 /* default */ -#define MSM_BO_WC 0x0000002 -#define MSM_BO_UNCACHED 0x00000004 - #endif /* __MSM_DRV_H__ */ diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index a52e6cc..6b5a6c8 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -20,6 +20,7 @@ #include "msm_drv.h" #include "msm_gem.h" +#include "msm_gpu.h" /* called with dev->struct_mutex held */ @@ -375,10 +376,74 @@ int msm_gem_queue_inactive_work(struct drm_gem_object *obj, { struct drm_device *dev = obj->dev; struct msm_drm_private *priv = dev->dev_private; + struct msm_gem_object *msm_obj = to_msm_bo(obj); + int ret = 0; + + mutex_lock(&dev->struct_mutex); + if (!list_empty(&work->entry)) { + ret = -EINVAL; + } else if (is_active(msm_obj)) { + list_add_tail(&work->entry, &msm_obj->inactive_work); + } else { + queue_work(priv->wq, work); + } + mutex_unlock(&dev->struct_mutex); + + return ret; +} + +void msm_gem_move_to_active(struct drm_gem_object *obj, + struct msm_gpu *gpu, uint32_t fence) +{ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + msm_obj->gpu = gpu; + msm_obj->fence = fence; + list_del_init(&msm_obj->mm_list); + list_add_tail(&msm_obj->mm_list, &gpu->active_list); +} + +void msm_gem_move_to_inactive(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + struct msm_drm_private *priv = dev->dev_private; + struct msm_gem_object *msm_obj = to_msm_bo(obj); + + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + + msm_obj->gpu = NULL; + msm_obj->fence = 0; + list_del_init(&msm_obj->mm_list); + list_add_tail(&msm_obj->mm_list, &priv->inactive_list); + + while (!list_empty(&msm_obj->inactive_work)) { + struct work_struct *work; + + work = list_first_entry(&msm_obj->inactive_work, + struct work_struct, entry); + + list_del_init(&work->entry); + queue_work(priv->wq, work); + } +} + +int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, + struct timespec *timeout) +{ + struct drm_device *dev = obj->dev; + struct msm_gem_object *msm_obj = to_msm_bo(obj); + int ret = 0; + + if (is_active(msm_obj) && !(op & MSM_PREP_NOSYNC)) + ret = msm_wait_fence_interruptable(dev, msm_obj->fence, timeout); + + /* TODO cache maintenance */ - /* just a place-holder until we have gpu.. */ - queue_work(priv->wq, work); + return ret; +} +int msm_gem_cpu_fini(struct drm_gem_object *obj) +{ + /* TODO cache maintenance */ return 0; } @@ -390,8 +455,9 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m) uint64_t off = drm_vma_node_start(&obj->vma_node); WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - seq_printf(m, "%08x: %2d (%2d) %08llx %p %d\n", - msm_obj->flags, obj->name, obj->refcount.refcount.counter, + seq_printf(m, "%08x: %c(%d) %2d (%2d) %08llx %p %d\n", + msm_obj->flags, is_active(msm_obj) ? 'A' : 'I', + msm_obj->fence, obj->name, obj->refcount.refcount.counter, off, msm_obj->vaddr, obj->size); } @@ -421,6 +487,9 @@ void msm_gem_free_object(struct drm_gem_object *obj) WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + /* object should not be on active list: */ + WARN_ON(is_active(msm_obj)); + list_del(&msm_obj->mm_list); for (id = 0; id < ARRAY_SIZE(msm_obj->domain); id++) { @@ -439,6 +508,9 @@ void msm_gem_free_object(struct drm_gem_object *obj) put_pages(obj); + if (msm_obj->resv == &msm_obj->_resv) + reservation_object_fini(msm_obj->resv); + drm_gem_object_release(obj); kfree(msm_obj); @@ -508,7 +580,11 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev, msm_obj->flags = flags; + msm_obj->resv = &msm_obj->_resv; + reservation_object_init(msm_obj->resv); + INIT_LIST_HEAD(&msm_obj->submit_entry); + INIT_LIST_HEAD(&msm_obj->inactive_work); list_add_tail(&msm_obj->mm_list, &priv->inactive_list); return obj; diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index fcafd19..d746f13 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -18,6 +18,7 @@ #ifndef __MSM_GEM_H__ #define __MSM_GEM_H__ +#include <linux/reservation.h> #include "msm_drv.h" struct msm_gem_object { @@ -25,7 +26,27 @@ struct msm_gem_object { uint32_t flags; + /* And object is either: + * inactive - on priv->inactive_list + * active - on one one of the gpu's active_list.. well, at + * least for now we don't have (I don't think) hw sync between + * 2d and 3d one devices which have both, meaning we need to + * block on submit if a bo is already on other ring + * + */ struct list_head mm_list; + struct msm_gpu *gpu; /* non-null if active */ + uint32_t fence; + + /* Transiently in the process of submit ioctl, objects associated + * with the submit are on submit->bo_list.. this only lasts for + * the duration of the ioctl, so one bo can never be on multiple + * submit lists. + */ + struct list_head submit_entry; + + /* work defered until bo is inactive: */ + struct list_head inactive_work; struct page **pages; struct sg_table *sgt; @@ -35,7 +56,44 @@ struct msm_gem_object { // XXX uint32_t iova; } domain[NUM_DOMAINS]; + + /* normally (resv == &_resv) except for imported bo's */ + struct reservation_object *resv; + struct reservation_object _resv; }; #define to_msm_bo(x) container_of(x, struct msm_gem_object, base) +static inline bool is_active(struct msm_gem_object *msm_obj) +{ + return msm_obj->gpu != NULL; +} + +#define MAX_CMDS 4 + +/* Created per submit-ioctl, to track bo's and cmdstream bufs, etc, + * associated with the cmdstream submission for synchronization (and + * make it easier to unwind when things go wrong, etc). This only + * lasts for the duration of the submit-ioctl. + */ +struct msm_gem_submit { + struct drm_device *dev; + struct msm_gpu *gpu; + struct list_head bo_list; + struct ww_acquire_ctx ticket; + uint32_t fence; + bool valid; + unsigned int nr_cmds; + unsigned int nr_bos; + struct { + uint32_t type; + uint32_t size; /* in dwords */ + uint32_t iova; + } cmd[MAX_CMDS]; + struct { + uint32_t flags; + struct msm_gem_object *obj; + uint32_t iova; + } bos[0]; +}; + #endif /* __MSM_GEM_H__ */ diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c new file mode 100644 index 0000000..3e1ef3a --- /dev/null +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -0,0 +1,412 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "msm_drv.h" +#include "msm_gpu.h" +#include "msm_gem.h" + +/* + * Cmdstream submission: + */ + +#define BO_INVALID_FLAGS ~(MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE) +/* make sure these don't conflict w/ MSM_SUBMIT_BO_x */ +#define BO_VALID 0x8000 +#define BO_LOCKED 0x4000 +#define BO_PINNED 0x2000 + +static inline void __user *to_user_ptr(u64 address) +{ + return (void __user *)(uintptr_t)address; +} + +static struct msm_gem_submit *submit_create(struct drm_device *dev, + struct msm_gpu *gpu, int nr) +{ + struct msm_gem_submit *submit; + int sz = sizeof(*submit) + (nr * sizeof(submit->bos[0])); + + submit = kmalloc(sz, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); + if (submit) { + submit->dev = dev; + submit->gpu = gpu; + + /* initially, until copy_from_user() and bo lookup succeeds: */ + submit->nr_bos = 0; + submit->nr_cmds = 0; + + INIT_LIST_HEAD(&submit->bo_list); + ww_acquire_init(&submit->ticket, &reservation_ww_class); + } + + return submit; +} + +static int submit_lookup_objects(struct msm_gem_submit *submit, + struct drm_msm_gem_submit *args, struct drm_file *file) +{ + unsigned i; + int ret = 0; + + spin_lock(&file->table_lock); + + for (i = 0; i < args->nr_bos; i++) { + struct drm_msm_gem_submit_bo submit_bo; + struct drm_gem_object *obj; + struct msm_gem_object *msm_obj; + void __user *userptr = + to_user_ptr(args->bos + (i * sizeof(submit_bo))); + + ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo)); + if (ret) { + ret = -EFAULT; + goto out_unlock; + } + + if (submit_bo.flags & BO_INVALID_FLAGS) { + DBG("invalid flags: %x", submit_bo.flags); + ret = -EINVAL; + goto out_unlock; + } + + submit->bos[i].flags = submit_bo.flags; + /* in validate_objects() we figure out if this is true: */ + submit->bos[i].iova = submit_bo.presumed; + + /* normally use drm_gem_object_lookup(), but for bulk lookup + * all under single table_lock just hit object_idr directly: + */ + obj = idr_find(&file->object_idr, submit_bo.handle); + if (!obj) { + DBG("invalid handle %u at index %u", submit_bo.handle, i); + ret = -EINVAL; + goto out_unlock; + } + + msm_obj = to_msm_bo(obj); + + if (!list_empty(&msm_obj->submit_entry)) { + DBG("handle %u at index %u already on submit list", + submit_bo.handle, i); + ret = -EINVAL; + goto out_unlock; + } + + drm_gem_object_reference(obj); + + submit->bos[i].obj = msm_obj; + + list_add_tail(&msm_obj->submit_entry, &submit->bo_list); + } + +out_unlock: + submit->nr_bos = i; + spin_unlock(&file->table_lock); + + return ret; +} + +static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i) +{ + struct msm_gem_object *msm_obj = submit->bos[i].obj; + + if (submit->bos[i].flags & BO_PINNED) + msm_gem_put_iova(&msm_obj->base, submit->gpu->id); + + if (submit->bos[i].flags & BO_LOCKED) + ww_mutex_unlock(&msm_obj->resv->lock); + + if (!(submit->bos[i].flags & BO_VALID)) + submit->bos[i].iova = 0; + + submit->bos[i].flags &= ~(BO_LOCKED | BO_PINNED); +} + +/* This is where we make sure all the bo's are reserved and pin'd: */ +static int submit_validate_objects(struct msm_gem_submit *submit) +{ + int contended, slow_locked = -1, i, ret = 0; + +retry: + submit->valid = true; + + for (i = 0; i < submit->nr_bos; i++) { + struct msm_gem_object *msm_obj = submit->bos[i].obj; + uint32_t iova; + + if (slow_locked == i) + slow_locked = -1; + + contended = i; + + if (!(submit->bos[i].flags & BO_LOCKED)) { + ret = ww_mutex_lock_interruptible(&msm_obj->resv->lock, + &submit->ticket); + if (ret) + goto fail; + submit->bos[i].flags |= BO_LOCKED; + } + + + /* if locking succeeded, pin bo: */ + ret = msm_gem_get_iova(&msm_obj->base, + submit->gpu->id, &iova); + + /* this would break the logic in the fail path.. there is no + * reason for this to happen, but just to be on the safe side + * let's notice if this starts happening in the future: + */ + WARN_ON(ret == -EDEADLK); + + if (ret) + goto fail; + + submit->bos[i].flags |= BO_PINNED; + + if (iova == submit->bos[i].iova) { + submit->bos[i].flags |= BO_VALID; + } else { + submit->bos[i].iova = iova; + submit->bos[i].flags &= ~BO_VALID; + submit->valid = false; + } + } + + ww_acquire_done(&submit->ticket); + + return 0; + +fail: + for (; i >= 0; i--) + submit_unlock_unpin_bo(submit, i); + + if (slow_locked > 0) + submit_unlock_unpin_bo(submit, slow_locked); + + if (ret == -EDEADLK) { + struct msm_gem_object *msm_obj = submit->bos[contended].obj; + /* we lost out in a seqno race, lock and retry.. */ + ret = ww_mutex_lock_slow_interruptible(&msm_obj->resv->lock, + &submit->ticket); + if (!ret) { + submit->bos[contended].flags |= BO_LOCKED; + slow_locked = contended; + goto retry; + } + } + + return ret; +} + +static int submit_bo(struct msm_gem_submit *submit, uint32_t idx, + struct msm_gem_object **obj, uint32_t *iova, bool *valid) +{ + if (idx >= submit->nr_bos) { + DBG("invalid buffer index: %u (out of %u)", idx, submit->nr_bos); + return EINVAL; + } + + if (obj) + *obj = submit->bos[idx].obj; + if (iova) + *iova = submit->bos[idx].iova; + if (valid) + *valid = !!(submit->bos[idx].flags & BO_VALID); + + return 0; +} + +/* process the reloc's and patch up the cmdstream as needed: */ +static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *obj, + uint32_t offset, uint32_t nr_relocs, uint64_t relocs) +{ + uint32_t i, last_offset = 0; + uint32_t *ptr; + int ret; + + if (offset % 4) { + DBG("non-aligned cmdstream buffer: %u", offset); + return -EINVAL; + } + + /* For now, just map the entire thing. Eventually we probably + * to do it page-by-page, w/ kmap() if not vmap()d.. + */ + ptr = msm_gem_vaddr(&obj->base); + + if (IS_ERR(ptr)) { + ret = PTR_ERR(ptr); + DBG("failed to map: %d", ret); + return ret; + } + + for (i = 0; i < nr_relocs; i++) { + struct drm_msm_gem_submit_reloc submit_reloc; + void __user *userptr = + to_user_ptr(relocs + (i * sizeof(submit_reloc))); + uint32_t iova, off; + bool valid; + + ret = copy_from_user(&submit_reloc, userptr, sizeof(submit_reloc)); + if (ret) + return -EFAULT; + + if (submit_reloc.submit_offset % 4) { + DBG("non-aligned reloc offset: %u", + submit_reloc.submit_offset); + return -EINVAL; + } + + /* offset in dwords: */ + off = submit_reloc.submit_offset / 4; + + if ((off >= (obj->base.size / 4)) || + (off < last_offset)) { + DBG("invalid offset %u at reloc %u", off, i); + return -EINVAL; + } + + ret = submit_bo(submit, submit_reloc.reloc_idx, NULL, &iova, &valid); + if (ret) + return ret; + + if (valid) + continue; + + iova += submit_reloc.reloc_offset; + + if (submit_reloc.shift < 0) + iova >>= -submit_reloc.shift; + else + iova <<= submit_reloc.shift; + + ptr[off] = iova | submit_reloc.or; + + last_offset = off; + } + + return 0; +} + +static void submit_cleanup(struct msm_gem_submit *submit, bool fail) +{ + unsigned i; + + mutex_lock(&submit->dev->struct_mutex); + for (i = 0; i < submit->nr_bos; i++) { + struct msm_gem_object *msm_obj = submit->bos[i].obj; + submit_unlock_unpin_bo(submit, i); + list_del_init(&msm_obj->submit_entry); + drm_gem_object_unreference(&msm_obj->base); + } + mutex_unlock(&submit->dev->struct_mutex); + + ww_acquire_fini(&submit->ticket); + kfree(submit); +} + +int msm_ioctl_gem_submit(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct msm_drm_private *priv = dev->dev_private; + struct drm_msm_gem_submit *args = data; + struct msm_file_private *ctx = file->driver_priv; + struct msm_gem_submit *submit; + struct msm_gpu *gpu; + unsigned i; + int ret; + + /* for now, we just have 3d pipe.. eventually this would need to + * be more clever to dispatch to appropriate gpu module: + */ + if (args->pipe != MSM_PIPE_3D0) + return -EINVAL; + + gpu = priv->gpu; + + if (args->nr_cmds > MAX_CMDS) + return -EINVAL; + + submit = submit_create(dev, gpu, args->nr_bos); + if (!submit) { + ret = -ENOMEM; + goto out; + } + + ret = submit_lookup_objects(submit, args, file); + if (ret) + goto out; + + ret = submit_validate_objects(submit); + if (ret) + goto out; + + for (i = 0; i < args->nr_cmds; i++) { + struct drm_msm_gem_submit_cmd submit_cmd; + void __user *userptr = + to_user_ptr(args->cmds + (i * sizeof(submit_cmd))); + struct msm_gem_object *msm_obj; + uint32_t iova; + + ret = copy_from_user(&submit_cmd, userptr, sizeof(submit_cmd)); + if (ret) { + ret = -EFAULT; + goto out; + } + + ret = submit_bo(submit, submit_cmd.submit_idx, + &msm_obj, &iova, NULL); + if (ret) + goto out; + + if (submit_cmd.size % 4) { + DBG("non-aligned cmdstream buffer size: %u", + submit_cmd.size); + ret = -EINVAL; + goto out; + } + + if (submit_cmd.size >= msm_obj->base.size) { + DBG("invalid cmdstream size: %u", submit_cmd.size); + ret = -EINVAL; + goto out; + } + + submit->cmd[i].type = submit_cmd.type; + submit->cmd[i].size = submit_cmd.size / 4; + submit->cmd[i].iova = iova + submit_cmd.submit_offset; + + if (submit->valid) + continue; + + ret = submit_reloc(submit, msm_obj, submit_cmd.submit_offset, + submit_cmd.nr_relocs, submit_cmd.relocs); + if (ret) + goto out; + } + + submit->nr_cmds = i; + + ret = msm_gpu_submit(gpu, submit, ctx); + + args->fence = submit->fence; + +out: + if (submit) + submit_cleanup(submit, !!ret); + return ret; +} diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c new file mode 100644 index 0000000..7c6541e --- /dev/null +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -0,0 +1,411 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "msm_gpu.h" +#include "msm_gem.h" + + +/* + * Power Management: + */ + +#ifdef CONFIG_MSM_BUS_SCALING +#include <mach/board.h> +#include <mach/kgsl.h> +static void bs_init(struct msm_gpu *gpu, struct platform_device *pdev) +{ + struct drm_device *dev = gpu->dev; + struct kgsl_device_platform_data *pdata = pdev->dev.platform_data; + + if (!pdev) { + dev_err(dev->dev, "could not find dtv pdata\n"); + return; + } + + if (pdata->bus_scale_table) { + gpu->bsc = msm_bus_scale_register_client(pdata->bus_scale_table); + DBG("bus scale client: %08x", gpu->bsc); + } +} + +static void bs_fini(struct msm_gpu *gpu) +{ + if (gpu->bsc) { + msm_bus_scale_unregister_client(gpu->bsc); + gpu->bsc = 0; + } +} + +static void bs_set(struct msm_gpu *gpu, int idx) +{ + if (gpu->bsc) { + DBG("set bus scaling: %d", idx); + msm_bus_scale_client_update_request(gpu->bsc, idx); + } +} +#else +static void bs_init(struct msm_gpu *gpu, struct platform_device *pdev) {} +static void bs_fini(struct msm_gpu *gpu) {} +static void bs_set(struct msm_gpu *gpu, int idx) {} +#endif + +static int enable_pwrrail(struct msm_gpu *gpu) +{ + struct drm_device *dev = gpu->dev; + int ret = 0; + + if (gpu->gpu_reg) { + ret = regulator_enable(gpu->gpu_reg); + if (ret) { + dev_err(dev->dev, "failed to enable 'gpu_reg': %d\n", ret); + return ret; + } + } + + if (gpu->gpu_cx) { + ret = regulator_enable(gpu->gpu_cx); + if (ret) { + dev_err(dev->dev, "failed to enable 'gpu_cx': %d\n", ret); + return ret; + } + } + + return 0; +} + +static int disable_pwrrail(struct msm_gpu *gpu) +{ + if (gpu->gpu_cx) + regulator_disable(gpu->gpu_cx); + if (gpu->gpu_reg) + regulator_disable(gpu->gpu_reg); + return 0; +} + +static int enable_clk(struct msm_gpu *gpu) +{ + struct clk *rate_clk = NULL; + int i; + + /* NOTE: kgsl_pwrctrl_clk() ignores grp_clks[0].. */ + for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--) { + if (gpu->grp_clks[i]) { + clk_prepare(gpu->grp_clks[i]); + rate_clk = gpu->grp_clks[i]; + } + } + + if (rate_clk && gpu->fast_rate) + clk_set_rate(rate_clk, gpu->fast_rate); + + for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--) + if (gpu->grp_clks[i]) + clk_enable(gpu->grp_clks[i]); + + return 0; +} + +static int disable_clk(struct msm_gpu *gpu) +{ + struct clk *rate_clk = NULL; + int i; + + /* NOTE: kgsl_pwrctrl_clk() ignores grp_clks[0].. */ + for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--) { + if (gpu->grp_clks[i]) { + clk_disable(gpu->grp_clks[i]); + rate_clk = gpu->grp_clks[i]; + } + } + + if (rate_clk && gpu->slow_rate) + clk_set_rate(rate_clk, gpu->slow_rate); + + for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--) + if (gpu->grp_clks[i]) + clk_unprepare(gpu->grp_clks[i]); + + return 0; +} + +static int enable_axi(struct msm_gpu *gpu) +{ + if (gpu->ebi1_clk) + clk_prepare_enable(gpu->ebi1_clk); + if (gpu->bus_freq) + bs_set(gpu, gpu->bus_freq); + return 0; +} + +static int disable_axi(struct msm_gpu *gpu) +{ + if (gpu->ebi1_clk) + clk_disable_unprepare(gpu->ebi1_clk); + if (gpu->bus_freq) + bs_set(gpu, 0); + return 0; +} + +int msm_gpu_pm_resume(struct msm_gpu *gpu) +{ + int ret; + + DBG("%s", gpu->name); + + ret = enable_pwrrail(gpu); + if (ret) + return ret; + + ret = enable_clk(gpu); + if (ret) + return ret; + + ret = enable_axi(gpu); + if (ret) + return ret; + + return 0; +} + +int msm_gpu_pm_suspend(struct msm_gpu *gpu) +{ + int ret; + + DBG("%s", gpu->name); + + ret = disable_axi(gpu); + if (ret) + return ret; + + ret = disable_clk(gpu); + if (ret) + return ret; + + ret = disable_pwrrail(gpu); + if (ret) + return ret; + + return 0; +} + +/* + * Cmdstream submission/retirement: + */ + +static void retire_worker(struct work_struct *work) +{ + struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); + struct drm_device *dev = gpu->dev; + uint32_t fence = gpu->funcs->last_fence(gpu); + + mutex_lock(&dev->struct_mutex); + + while (!list_empty(&gpu->active_list)) { + struct msm_gem_object *obj; + + obj = list_first_entry(&gpu->active_list, + struct msm_gem_object, mm_list); + + if (obj->fence <= fence) { + /* move to inactive: */ + msm_gem_move_to_inactive(&obj->base); + msm_gem_put_iova(&obj->base, gpu->id); + drm_gem_object_unreference(&obj->base); + } else { + break; + } + } + + msm_update_fence(gpu->dev, fence); + + mutex_unlock(&dev->struct_mutex); +} + +/* call from irq handler to schedule work to retire bo's */ +void msm_gpu_retire(struct msm_gpu *gpu) +{ + struct msm_drm_private *priv = gpu->dev->dev_private; + queue_work(priv->wq, &gpu->retire_work); +} + +/* add bo's to gpu's ring, and kick gpu: */ +int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, + struct msm_file_private *ctx) +{ + struct drm_device *dev = gpu->dev; + struct msm_drm_private *priv = dev->dev_private; + int i, ret; + + mutex_lock(&dev->struct_mutex); + + submit->fence = ++priv->next_fence; + + ret = gpu->funcs->submit(gpu, submit, ctx); + priv->lastctx = ctx; + + for (i = 0; i < submit->nr_bos; i++) { + struct msm_gem_object *msm_obj = submit->bos[i].obj; + + /* can't happen yet.. but when we add 2d support we'll have + * to deal w/ cross-ring synchronization: + */ + WARN_ON(is_active(msm_obj) && (msm_obj->gpu != gpu)); + + if (!is_active(msm_obj)) { + uint32_t iova; + + /* ring takes a reference to the bo and iova: */ + drm_gem_object_reference(&msm_obj->base); + msm_gem_get_iova_locked(&msm_obj->base, + submit->gpu->id, &iova); + } + + msm_gem_move_to_active(&msm_obj->base, gpu, submit->fence); + } + mutex_unlock(&dev->struct_mutex); + + return ret; +} + +/* + * Init/Cleanup: + */ + +static irqreturn_t irq_handler(int irq, void *data) +{ + struct msm_gpu *gpu = data; + return gpu->funcs->irq(gpu); +} + +static const char *clk_names[] = { + "src_clk", "core_clk", "iface_clk", "mem_clk", "mem_iface_clk", +}; + +int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, + struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, + const char *name, const char *ioname, const char *irqname, int ringsz) +{ + int i, ret; + + gpu->dev = drm; + gpu->funcs = funcs; + gpu->name = name; + + INIT_LIST_HEAD(&gpu->active_list); + INIT_WORK(&gpu->retire_work, retire_worker); + + BUG_ON(ARRAY_SIZE(clk_names) != ARRAY_SIZE(gpu->grp_clks)); + + /* Map registers: */ + gpu->mmio = msm_ioremap(pdev, ioname, name); + if (IS_ERR(gpu->mmio)) { + ret = PTR_ERR(gpu->mmio); + goto fail; + } + + /* Get Interrupt: */ + gpu->irq = platform_get_irq_byname(pdev, irqname); + if (gpu->irq < 0) { + ret = gpu->irq; + dev_err(drm->dev, "failed to get irq: %d\n", ret); + goto fail; + } + + ret = devm_request_irq(&pdev->dev, gpu->irq, irq_handler, + IRQF_TRIGGER_HIGH, gpu->name, gpu); + if (ret) { + dev_err(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret); + goto fail; + } + + /* Acquire clocks: */ + for (i = 0; i < ARRAY_SIZE(clk_names); i++) { + gpu->grp_clks[i] = devm_clk_get(&pdev->dev, clk_names[i]); + DBG("grp_clks[%s]: %p", clk_names[i], gpu->grp_clks[i]); + if (IS_ERR(gpu->grp_clks[i])) + gpu->grp_clks[i] = NULL; + } + + gpu->ebi1_clk = devm_clk_get(&pdev->dev, "bus_clk"); + DBG("ebi1_clk: %p", gpu->ebi1_clk); + if (IS_ERR(gpu->ebi1_clk)) + gpu->ebi1_clk = NULL; + + /* Acquire regulators: */ + gpu->gpu_reg = devm_regulator_get(&pdev->dev, "vdd"); + DBG("gpu_reg: %p", gpu->gpu_reg); + if (IS_ERR(gpu->gpu_reg)) + gpu->gpu_reg = NULL; + + gpu->gpu_cx = devm_regulator_get(&pdev->dev, "vddcx"); + DBG("gpu_cx: %p", gpu->gpu_cx); + if (IS_ERR(gpu->gpu_cx)) + gpu->gpu_cx = NULL; + + /* Setup IOMMU.. eventually we will (I think) do this once per context + * and have separate page tables per context. For now, to keep things + * simple and to get something working, just use a single address space: + */ + gpu->iommu = iommu_domain_alloc(&platform_bus_type); + if (!gpu->iommu) { + dev_err(drm->dev, "failed to allocate IOMMU\n"); + ret = -ENOMEM; + goto fail; + } + gpu->id = msm_register_iommu(drm, gpu->iommu); + + /* Create ringbuffer: */ + gpu->rb = msm_ringbuffer_new(gpu, ringsz); + if (IS_ERR(gpu->rb)) { + ret = PTR_ERR(gpu->rb); + gpu->rb = NULL; + dev_err(drm->dev, "could not create ringbuffer: %d\n", ret); + goto fail; + } + + ret = msm_gem_get_iova_locked(gpu->rb->bo, gpu->id, &gpu->rb_iova); + if (ret) { + gpu->rb_iova = 0; + dev_err(drm->dev, "could not map ringbuffer: %d\n", ret); + goto fail; + } + + bs_init(gpu, pdev); + + return 0; + +fail: + return ret; +} + +void msm_gpu_cleanup(struct msm_gpu *gpu) +{ + DBG("%s", gpu->name); + + WARN_ON(!list_empty(&gpu->active_list)); + + bs_fini(gpu); + + if (gpu->rb) { + if (gpu->rb_iova) + msm_gem_put_iova(gpu->rb->bo, gpu->id); + msm_ringbuffer_destroy(gpu->rb); + } + + if (gpu->iommu) + iommu_domain_free(gpu->iommu); +} diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h new file mode 100644 index 0000000..8d2cd6c --- /dev/null +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __MSM_GPU_H__ +#define __MSM_GPU_H__ + +#include <linux/clk.h> +#include <linux/regulator/consumer.h> + +#include "msm_drv.h" +#include "msm_ringbuffer.h" + +struct msm_gem_submit; + +/* So far, with hardware that I've seen to date, we can have: + * + zero, one, or two z180 2d cores + * + a3xx or a2xx 3d core, which share a common CP (the firmware + * for the CP seems to implement some different PM4 packet types + * but the basics of cmdstream submission are the same) + * + * Which means that the eventual complete "class" hierarchy, once + * support for all past and present hw is in place, becomes: + * + msm_gpu + * + adreno_gpu + * + a3xx_gpu + * + a2xx_gpu + * + z180_gpu + */ +struct msm_gpu_funcs { + int (*get_param)(struct msm_gpu *gpu, uint32_t param, uint64_t *value); + int (*hw_init)(struct msm_gpu *gpu); + int (*pm_suspend)(struct msm_gpu *gpu); + int (*pm_resume)(struct msm_gpu *gpu); + int (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit, + struct msm_file_private *ctx); + void (*flush)(struct msm_gpu *gpu); + void (*idle)(struct msm_gpu *gpu); + irqreturn_t (*irq)(struct msm_gpu *irq); + uint32_t (*last_fence)(struct msm_gpu *gpu); + void (*destroy)(struct msm_gpu *gpu); +#ifdef CONFIG_DEBUG_FS + /* show GPU status in debugfs: */ + void (*show)(struct msm_gpu *gpu, struct seq_file *m); +#endif +}; + +struct msm_gpu { + const char *name; + struct drm_device *dev; + const struct msm_gpu_funcs *funcs; + + struct msm_ringbuffer *rb; + uint32_t rb_iova; + + /* list of GEM active objects: */ + struct list_head active_list; + + /* worker for handling active-list retiring: */ + struct work_struct retire_work; + + void __iomem *mmio; + int irq; + + struct iommu_domain *iommu; + int id; + + /* Power Control: */ + struct regulator *gpu_reg, *gpu_cx; + struct clk *ebi1_clk, *grp_clks[5]; + uint32_t fast_rate, slow_rate, bus_freq; + uint32_t bsc; +}; + +static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data) +{ + msm_writel(data, gpu->mmio + (reg << 2)); +} + +static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg) +{ + return msm_readl(gpu->mmio + (reg << 2)); +} + +int msm_gpu_pm_suspend(struct msm_gpu *gpu); +int msm_gpu_pm_resume(struct msm_gpu *gpu); + +void msm_gpu_retire(struct msm_gpu *gpu); +int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, + struct msm_file_private *ctx); + +int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, + struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, + const char *name, const char *ioname, const char *irqname, int ringsz); +void msm_gpu_cleanup(struct msm_gpu *gpu); + +struct msm_gpu *a3xx_gpu_init(struct drm_device *dev); +void __init a3xx_register(void); +void __exit a3xx_unregister(void); + +#endif /* __MSM_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c new file mode 100644 index 0000000..8171537d --- /dev/null +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "msm_ringbuffer.h" +#include "msm_gpu.h" + +struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) +{ + struct msm_ringbuffer *ring; + int ret; + + size = ALIGN(size, 4); /* size should be dword aligned */ + + ring = kzalloc(sizeof(*ring), GFP_KERNEL); + if (!ring) { + ret = -ENOMEM; + goto fail; + } + + ring->gpu = gpu; + ring->bo = msm_gem_new(gpu->dev, size, MSM_BO_WC); + if (IS_ERR(ring->bo)) { + ret = PTR_ERR(ring->bo); + ring->bo = NULL; + goto fail; + } + + ring->start = msm_gem_vaddr_locked(ring->bo); + ring->end = ring->start + (size / 4); + ring->cur = ring->start; + + ring->size = size; + + return ring; + +fail: + if (ring) + msm_ringbuffer_destroy(ring); + return ERR_PTR(ret); +} + +void msm_ringbuffer_destroy(struct msm_ringbuffer *ring) +{ + if (ring->bo) + drm_gem_object_unreference(ring->bo); + kfree(ring); +} diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h new file mode 100644 index 0000000..6e0e104 --- /dev/null +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __MSM_RINGBUFFER_H__ +#define __MSM_RINGBUFFER_H__ + +#include "msm_drv.h" + +struct msm_ringbuffer { + struct msm_gpu *gpu; + int size; + struct drm_gem_object *bo; + uint32_t *start, *end, *cur; +}; + +struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size); +void msm_ringbuffer_destroy(struct msm_ringbuffer *ring); + +/* ringbuffer helpers (the parts that are same for a3xx/a2xx/z180..) */ + +static inline void +OUT_RING(struct msm_ringbuffer *ring, uint32_t data) +{ + if (ring->cur == ring->end) + ring->cur = ring->start; + *(ring->cur++) = data; +} + +#endif /* __MSM_RINGBUFFER_H__ */ diff --git a/include/uapi/drm/Kbuild b/include/uapi/drm/Kbuild index 119487e..2d9a25d 100644 --- a/include/uapi/drm/Kbuild +++ b/include/uapi/drm/Kbuild @@ -16,3 +16,4 @@ header-y += sis_drm.h header-y += tegra_drm.h header-y += via_drm.h header-y += vmwgfx_drm.h +header-y += msm_drm.h diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h new file mode 100644 index 0000000..d3c6207 --- /dev/null +++ b/include/uapi/drm/msm_drm.h @@ -0,0 +1,207 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __MSM_DRM_H__ +#define __MSM_DRM_H__ + +#include <stddef.h> +#include <drm/drm.h> + +/* Please note that modifications to all structs defined here are + * subject to backwards-compatibility constraints: + * 1) Do not use pointers, use uint64_t instead for 32 bit / 64 bit + * user/kernel compatibility + * 2) Keep fields aligned to their size + * 3) Because of how drm_ioctl() works, we can add new fields at + * the end of an ioctl if some care is taken: drm_ioctl() will + * zero out the new fields at the tail of the ioctl, so a zero + * value should have a backwards compatible meaning. And for + * output params, userspace won't see the newly added output + * fields.. so that has to be somehow ok. + */ + +#define MSM_PIPE_NONE 0x00 +#define MSM_PIPE_2D0 0x01 +#define MSM_PIPE_2D1 0x02 +#define MSM_PIPE_3D0 0x10 + +/* timeouts are specified in clock-monotonic absolute times (to simplify + * restarting interrupted ioctls). The following struct is logically the + * same as 'struct timespec' but 32/64b ABI safe. + */ +struct drm_msm_timespec { + int64_t tv_sec; /* seconds */ + int64_t tv_nsec; /* nanoseconds */ +}; + +#define MSM_PARAM_GPU_ID 0x01 +#define MSM_PARAM_GMEM_SIZE 0x02 + +struct drm_msm_param { + uint32_t pipe; /* in, MSM_PIPE_x */ + uint32_t param; /* in, MSM_PARAM_x */ + uint64_t value; /* out (get_param) or in (set_param) */ +}; + +/* + * GEM buffers: + */ + +#define MSM_BO_SCANOUT 0x00000001 /* scanout capable */ +#define MSM_BO_GPU_READONLY 0x00000002 +#define MSM_BO_CACHE_MASK 0x000f0000 +/* cache modes */ +#define MSM_BO_CACHED 0x00010000 +#define MSM_BO_WC 0x00020000 +#define MSM_BO_UNCACHED 0x00040000 + +struct drm_msm_gem_new { + uint64_t size; /* in */ + uint32_t flags; /* in, mask of MSM_BO_x */ + uint32_t handle; /* out */ +}; + +struct drm_msm_gem_info { + uint32_t handle; /* in */ + uint32_t pad; + uint64_t offset; /* out, offset to pass to mmap() */ +}; + +#define MSM_PREP_READ 0x01 +#define MSM_PREP_WRITE 0x02 +#define MSM_PREP_NOSYNC 0x04 + +struct drm_msm_gem_cpu_prep { + uint32_t handle; /* in */ + uint32_t op; /* in, mask of MSM_PREP_x */ + struct drm_msm_timespec timeout; /* in */ +}; + +struct drm_msm_gem_cpu_fini { + uint32_t handle; /* in */ +}; + +/* + * Cmdstream Submission: + */ + +/* The value written into the cmdstream is logically: + * + * ((relocbuf->gpuaddr + reloc_offset) << shift) | or + * + * When we have GPU's w/ >32bit ptrs, it should be possible to deal + * with this by emit'ing two reloc entries with appropriate shift + * values. Or a new MSM_SUBMIT_CMD_x type would also be an option. + * + * NOTE that reloc's must be sorted by order of increasing submit_offset, + * otherwise EINVAL. + */ +struct drm_msm_gem_submit_reloc { + uint32_t submit_offset; /* in, offset from submit_bo */ + uint32_t or; /* in, value OR'd with result */ + int32_t shift; /* in, amount of left shift (can be negative) */ + uint32_t reloc_idx; /* in, index of reloc_bo buffer */ + uint64_t reloc_offset; /* in, offset from start of reloc_bo */ +}; + +/* submit-types: + * BUF - this cmd buffer is executed normally. + * IB_TARGET_BUF - this cmd buffer is an IB target. Reloc's are + * processed normally, but the kernel does not setup an IB to + * this buffer in the first-level ringbuffer + * CTX_RESTORE_BUF - only executed if there has been a GPU context + * switch since the last SUBMIT ioctl + */ +#define MSM_SUBMIT_CMD_BUF 0x0001 +#define MSM_SUBMIT_CMD_IB_TARGET_BUF 0x0002 +#define MSM_SUBMIT_CMD_CTX_RESTORE_BUF 0x0003 +struct drm_msm_gem_submit_cmd { + uint32_t type; /* in, one of MSM_SUBMIT_CMD_x */ + uint32_t submit_idx; /* in, index of submit_bo cmdstream buffer */ + uint32_t submit_offset; /* in, offset into submit_bo */ + uint32_t size; /* in, cmdstream size */ + uint32_t pad; + uint32_t nr_relocs; /* in, number of submit_reloc's */ + uint64_t __user relocs; /* in, ptr to array of submit_reloc's */ +}; + +/* Each buffer referenced elsewhere in the cmdstream submit (ie. the + * cmdstream buffer(s) themselves or reloc entries) has one (and only + * one) entry in the submit->bos[] table. + * + * As a optimization, the current buffer (gpu virtual address) can be + * passed back through the 'presumed' field. If on a subsequent reloc, + * userspace passes back a 'presumed' address that is still valid, + * then patching the cmdstream for this entry is skipped. This can + * avoid kernel needing to map/access the cmdstream bo in the common + * case. + */ +#define MSM_SUBMIT_BO_READ 0x0001 +#define MSM_SUBMIT_BO_WRITE 0x0002 +struct drm_msm_gem_submit_bo { + uint32_t flags; /* in, mask of MSM_SUBMIT_BO_x */ + uint32_t handle; /* in, GEM handle */ + uint64_t presumed; /* in/out, presumed buffer address */ +}; + +/* Each cmdstream submit consists of a table of buffers involved, and + * one or more cmdstream buffers. This allows for conditional execution + * (context-restore), and IB buffers needed for per tile/bin draw cmds. + */ +struct drm_msm_gem_submit { + uint32_t pipe; /* in, MSM_PIPE_x */ + uint32_t fence; /* out */ + uint32_t nr_bos; /* in, number of submit_bo's */ + uint32_t nr_cmds; /* in, number of submit_cmd's */ + uint64_t __user bos; /* in, ptr to array of submit_bo's */ + uint64_t __user cmds; /* in, ptr to array of submit_cmd's */ +}; + +/* The normal way to synchronize with the GPU is just to CPU_PREP on + * a buffer if you need to access it from the CPU (other cmdstream + * submission from same or other contexts, PAGE_FLIP ioctl, etc, all + * handle the required synchronization under the hood). This ioctl + * mainly just exists as a way to implement the gallium pipe_fence + * APIs without requiring a dummy bo to synchronize on. + */ +struct drm_msm_wait_fence { + uint32_t fence; /* in */ + uint32_t pad; + struct drm_msm_timespec timeout; /* in */ +}; + +#define DRM_MSM_GET_PARAM 0x00 +/* placeholder: +#define DRM_MSM_SET_PARAM 0x01 + */ +#define DRM_MSM_GEM_NEW 0x02 +#define DRM_MSM_GEM_INFO 0x03 +#define DRM_MSM_GEM_CPU_PREP 0x04 +#define DRM_MSM_GEM_CPU_FINI 0x05 +#define DRM_MSM_GEM_SUBMIT 0x06 +#define DRM_MSM_WAIT_FENCE 0x07 +#define DRM_MSM_NUM_IOCTLS 0x08 + +#define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param) +#define DRM_IOCTL_MSM_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new) +#define DRM_IOCTL_MSM_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_INFO, struct drm_msm_gem_info) +#define DRM_IOCTL_MSM_GEM_CPU_PREP DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_GEM_CPU_PREP, struct drm_msm_gem_cpu_prep) +#define DRM_IOCTL_MSM_GEM_CPU_FINI DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_GEM_CPU_FINI, struct drm_msm_gem_cpu_fini) +#define DRM_IOCTL_MSM_GEM_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_SUBMIT, struct drm_msm_gem_submit) +#define DRM_IOCTL_MSM_WAIT_FENCE DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_WAIT_FENCE, struct drm_msm_wait_fence) + +#endif /* __MSM_DRM_H__ */

[PATCHv4,4/5] drm/msm: add a3xx gpu support

Commit Message

Patch