diff options
author | Rob Clark <robdclark@gmail.com> | 2013-07-19 12:59:32 -0400 |
---|---|---|
committer | Rob Clark <robdclark@gmail.com> | 2013-08-24 14:57:18 -0400 |
commit | 7198e6b03155f6dadecadba004eb83b81a6ffe4c (patch) | |
tree | ed4ae3e859fd9a722524242a145008d13606a95a /drivers/gpu/drm/msm/adreno | |
parent | 902e6eb851a78ad9e3db006c1e1df71841f633e2 (diff) |
drm/msm: add a3xx gpu support
Add initial support for a3xx 3d core.
So far, with hardware that I've seen to date, we can have:
+ zero, one, or two z180 2d cores
+ a3xx or a2xx 3d core, which share a common CP (the firmware
for the CP seems to implement some different PM4 packet types
but the basics of cmdstream submission are the same)
Which means that the eventual complete "class" hierarchy, once
support for all past and present hw is in place, becomes:
+ msm_gpu
+ adreno_gpu
+ a3xx_gpu
+ a2xx_gpu
+ z180_gpu
This commit splits out the parts that will eventually be common
between a2xx/a3xx into adreno_gpu, and the parts that are even
common to z180 into msm_gpu.
Note that there is no cmdstream validation required. All memory access
from the GPU is via IOMMU/MMU. So as long as you don't map silly things
to the GPU, there isn't much damage that the GPU can do.
Signed-off-by: Rob Clark <robdclark@gmail.com>
Diffstat (limited to 'drivers/gpu/drm/msm/adreno')
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 501 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a3xx_gpu.h | 30 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/adreno_gpu.c | 350 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/adreno_gpu.h | 142 |
4 files changed, 1023 insertions, 0 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c new file mode 100644 index 00000000000..13d61bbed30 --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -0,0 +1,501 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "a3xx_gpu.h" + +#define A3XX_INT0_MASK \ + (A3XX_INT0_RBBM_AHB_ERROR | \ + A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \ + A3XX_INT0_CP_T0_PACKET_IN_IB | \ + A3XX_INT0_CP_OPCODE_ERROR | \ + A3XX_INT0_CP_RESERVED_BIT_ERROR | \ + A3XX_INT0_CP_HW_FAULT | \ + A3XX_INT0_CP_IB1_INT | \ + A3XX_INT0_CP_IB2_INT | \ + A3XX_INT0_CP_RB_INT | \ + A3XX_INT0_CP_REG_PROTECT_FAULT | \ + A3XX_INT0_CP_AHB_ERROR_HALT | \ + A3XX_INT0_UCHE_OOB_ACCESS) + +static struct platform_device *a3xx_pdev; + +static void a3xx_me_init(struct msm_gpu *gpu) +{ + struct msm_ringbuffer *ring = gpu->rb; + + OUT_PKT3(ring, CP_ME_INIT, 17); + OUT_RING(ring, 0x000003f7); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000080); + OUT_RING(ring, 0x00000100); + OUT_RING(ring, 0x00000180); + OUT_RING(ring, 0x00006600); + OUT_RING(ring, 0x00000150); + OUT_RING(ring, 0x0000014e); + OUT_RING(ring, 0x00000154); + OUT_RING(ring, 0x00000001); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + gpu->funcs->flush(gpu); + gpu->funcs->idle(gpu); +} + +static int a3xx_hw_init(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + uint32_t *ptr, len; + int i, ret; + + DBG("%s", gpu->name); + + if (adreno_is_a305(adreno_gpu)) { + /* Set up 16 deep read/write request queues: */ + gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010); + gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010); + gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010); + gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010); + gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303); + gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010); + gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010); + /* Enable WR-REQ: */ + gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff); + /* Set up round robin arbitration between both AXI ports: */ + gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030); + /* Set up AOOO: */ + gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c); + gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c); + + } else if (adreno_is_a320(adreno_gpu)) { + /* Set up 16 deep read/write request queues: */ + gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010); + gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010); + gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010); + gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010); + gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303); + gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010); + gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010); + /* Enable WR-REQ: */ + gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff); + /* Set up round robin arbitration between both AXI ports: */ + gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030); + /* Set up AOOO: */ + gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c); + gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c); + /* Enable 1K sort: */ + gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff); + gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4); + + } else if (adreno_is_a330(adreno_gpu)) { + /* Set up 16 deep read/write request queues: */ + gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818); + gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818); + gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818); + gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818); + gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303); + gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818); + gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818); + /* Enable WR-REQ: */ + gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f); + /* Set up round robin arbitration between both AXI ports: */ + gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030); + /* Set up VBIF_ROUND_ROBIN_QOS_ARB: */ + gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001); + /* Set up AOOO: */ + gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000ffff); + gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0xffffffff); + /* Enable 1K sort: */ + gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001ffff); + gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4); + /* Disable VBIF clock gating. This is to enable AXI running + * higher frequency than GPU: + */ + gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001); + + } else { + BUG(); + } + + /* Make all blocks contribute to the GPU BUSY perf counter: */ + gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff); + + /* Tune the hystersis counters for SP and CP idle detection: */ + gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10); + gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10); + + /* Enable the RBBM error reporting bits. This lets us get + * useful information on failure: + */ + gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001); + + /* Enable AHB error reporting: */ + gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff); + + /* Turn on the power counters: */ + gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000); + + /* Turn on hang detection - this spews a lot of useful information + * into the RBBM registers on a hang: + */ + gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff); + + /* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */ + gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001); + + /* Enable Clock gating: */ + gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff); + + /* Set the OCMEM base address for A330 */ +//TODO: +// if (adreno_is_a330(adreno_gpu)) { +// gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR, +// (unsigned int)(a3xx_gpu->ocmem_base >> 14)); +// } + + /* Turn on performance counters: */ + gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01); + + /* Set SP perfcounter 7 to count SP_FS_FULL_ALU_INSTRUCTIONS + * we will use this to augment our hang detection: + */ + gpu_write(gpu, REG_A3XX_SP_PERFCOUNTER7_SELECT, + SP_FS_FULL_ALU_INSTRUCTIONS); + + gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK); + + ret = adreno_hw_init(gpu); + if (ret) + return ret; + + /* setup access protection: */ + gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007); + + /* RBBM registers */ + gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040); + gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080); + gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc); + gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108); + gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140); + gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400); + + /* CP registers */ + gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700); + gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8); + gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0); + gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178); + gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180); + + /* RB registers */ + gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300); + + /* VBIF registers */ + gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000); + + /* NOTE: PM4/micro-engine firmware registers look to be the same + * for a2xx and a3xx.. we could possibly push that part down to + * adreno_gpu base class. Or push both PM4 and PFP but + * parameterize the pfp ucode addr/data registers.. + */ + + /* Load PM4: */ + ptr = (uint32_t *)(adreno_gpu->pm4->data); + len = adreno_gpu->pm4->size / 4; + DBG("loading PM4 ucode version: %u", ptr[0]); + + gpu_write(gpu, REG_AXXX_CP_DEBUG, + AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE | + AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE); + gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0); + for (i = 1; i < len; i++) + gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]); + + /* Load PFP: */ + ptr = (uint32_t *)(adreno_gpu->pfp->data); + len = adreno_gpu->pfp->size / 4; + DBG("loading PFP ucode version: %u", ptr[0]); + + gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0); + for (i = 1; i < len; i++) + gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]); + + /* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */ + if (adreno_is_a305(adreno_gpu) || adreno_is_a320(adreno_gpu)) + gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, + AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) | + AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) | + AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14)); + + + /* clear ME_HALT to start micro engine */ + gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0); + + a3xx_me_init(gpu); + + return 0; +} + +static void a3xx_destroy(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu); + + DBG("%s", gpu->name); + + adreno_gpu_cleanup(adreno_gpu); + put_device(&a3xx_gpu->pdev->dev); + kfree(a3xx_gpu); +} + +static void a3xx_idle(struct msm_gpu *gpu) +{ + unsigned long t; + + /* wait for ringbuffer to drain: */ + adreno_idle(gpu); + + t = jiffies + ADRENO_IDLE_TIMEOUT; + + /* then wait for GPU to finish: */ + do { + uint32_t rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS); + if (!(rbbm_status & A3XX_RBBM_STATUS_GPU_BUSY)) + return; + } while(time_before(jiffies, t)); + + DRM_ERROR("timeout waiting for %s to idle!\n", gpu->name); + + /* TODO maybe we need to reset GPU here to recover from hang? */ +} + +static irqreturn_t a3xx_irq(struct msm_gpu *gpu) +{ + uint32_t status; + + status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS); + DBG("%s: %08x", gpu->name, status); + + // TODO + + gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status); + + msm_gpu_retire(gpu); + + return IRQ_HANDLED; +} + +#ifdef CONFIG_DEBUG_FS +static const unsigned int a3xx_registers[] = { + 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027, + 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c, + 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5, + 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1, + 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd, + 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff, + 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f, + 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f, + 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e, + 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f, + 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7, + 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05, + 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65, + 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7, + 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09, + 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069, + 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075, + 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109, + 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115, + 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0, + 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e, + 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8, + 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7, + 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356, + 0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d, + 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472, + 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef, + 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511, + 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed, + 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a, + 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce, + 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec, + 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749, + 0x2750, 0x2756, 0x2760, 0x2760, 0x300c, 0x300e, 0x301c, 0x301d, + 0x302a, 0x302a, 0x302c, 0x302d, 0x3030, 0x3031, 0x3034, 0x3036, + 0x303c, 0x303c, 0x305e, 0x305f, +}; + +static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m) +{ + int i; + + adreno_show(gpu, m); + seq_printf(m, "status: %08x\n", + gpu_read(gpu, REG_A3XX_RBBM_STATUS)); + + /* dump these out in a form that can be parsed by demsm: */ + seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name); + for (i = 0; i < ARRAY_SIZE(a3xx_registers); i += 2) { + uint32_t start = a3xx_registers[i]; + uint32_t end = a3xx_registers[i+1]; + uint32_t addr; + + for (addr = start; addr <= end; addr++) { + uint32_t val = gpu_read(gpu, addr); + seq_printf(m, "IO:R %08x %08x\n", addr<<2, val); + } + } +} +#endif + +static const struct adreno_gpu_funcs funcs = { + .base = { + .get_param = adreno_get_param, + .hw_init = a3xx_hw_init, + .pm_suspend = msm_gpu_pm_suspend, + .pm_resume = msm_gpu_pm_resume, + .last_fence = adreno_last_fence, + .submit = adreno_submit, + .flush = adreno_flush, + .idle = a3xx_idle, + .irq = a3xx_irq, + .destroy = a3xx_destroy, +#ifdef CONFIG_DEBUG_FS + .show = a3xx_show, +#endif + }, +}; + +struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) +{ + struct a3xx_gpu *a3xx_gpu = NULL; + struct msm_gpu *gpu; + struct platform_device *pdev = a3xx_pdev; + struct adreno_platform_config *config; + int ret; + + if (!pdev) { + dev_err(dev->dev, "no a3xx device\n"); + ret = -ENXIO; + goto fail; + } + + config = pdev->dev.platform_data; + + a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL); + if (!a3xx_gpu) { + ret = -ENOMEM; + goto fail; + } + + gpu = &a3xx_gpu->base.base; + + get_device(&pdev->dev); + a3xx_gpu->pdev = pdev; + + gpu->fast_rate = config->fast_rate; + gpu->slow_rate = config->slow_rate; + gpu->bus_freq = config->bus_freq; + + DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u", + gpu->fast_rate, gpu->slow_rate, gpu->bus_freq); + + ret = adreno_gpu_init(dev, pdev, &a3xx_gpu->base, + &funcs, config->rev); + if (ret) + goto fail; + + return &a3xx_gpu->base.base; + +fail: + if (a3xx_gpu) + a3xx_destroy(&a3xx_gpu->base.base); + + return ERR_PTR(ret); +} + +/* + * The a3xx device: + */ + +static int a3xx_probe(struct platform_device *pdev) +{ + static struct adreno_platform_config config = {}; +#ifdef CONFIG_OF + /* TODO */ +#else + uint32_t version = socinfo_get_version(); + if (cpu_is_apq8064ab()) { + config.fast_rate = 450000000; + config.slow_rate = 27000000; + config.bus_freq = 4; + config.rev = ADRENO_REV(3, 2, 1, 0); + } else if (cpu_is_apq8064() || cpu_is_msm8960ab()) { + config.fast_rate = 400000000; + config.slow_rate = 27000000; + config.bus_freq = 4; + + if (SOCINFO_VERSION_MAJOR(version) == 2) + config.rev = ADRENO_REV(3, 2, 0, 2); + else if ((SOCINFO_VERSION_MAJOR(version) == 1) && + (SOCINFO_VERSION_MINOR(version) == 1)) + config.rev = ADRENO_REV(3, 2, 0, 1); + else + config.rev = ADRENO_REV(3, 2, 0, 0); + + } else if (cpu_is_msm8930()) { + config.fast_rate = 400000000; + config.slow_rate = 27000000; + config.bus_freq = 3; + + if ((SOCINFO_VERSION_MAJOR(version) == 1) && + (SOCINFO_VERSION_MINOR(version) == 2)) + config.rev = ADRENO_REV(3, 0, 5, 2); + else + config.rev = ADRENO_REV(3, 0, 5, 0); + + } +#endif + pdev->dev.platform_data = &config; + a3xx_pdev = pdev; + return 0; +} + +static int a3xx_remove(struct platform_device *pdev) +{ + a3xx_pdev = NULL; + return 0; +} + +static struct platform_driver a3xx_driver = { + .probe = a3xx_probe, + .remove = a3xx_remove, + .driver.name = "kgsl-3d0", +}; + +void __init a3xx_register(void) +{ + platform_driver_register(&a3xx_driver); +} + +void __exit a3xx_unregister(void) +{ + platform_driver_unregister(&a3xx_driver); +} diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.h b/drivers/gpu/drm/msm/adreno/a3xx_gpu.h new file mode 100644 index 00000000000..32c398c2d00 --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __A3XX_GPU_H__ +#define __A3XX_GPU_H__ + +#include "adreno_gpu.h" +#include "a3xx.xml.h" + +struct a3xx_gpu { + struct adreno_gpu base; + struct platform_device *pdev; +}; +#define to_a3xx_gpu(x) container_of(x, struct a3xx_gpu, base) + +#endif /* __A3XX_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c new file mode 100644 index 00000000000..282163ee3fa --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -0,0 +1,350 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "adreno_gpu.h" +#include "msm_gem.h" + +struct adreno_info { + struct adreno_rev rev; + uint32_t revn; + const char *name; + const char *pm4fw, *pfpfw; + uint32_t gmem; +}; + +#define ANY_ID 0xff + +static const struct adreno_info gpulist[] = { + { + .rev = ADRENO_REV(3, 0, 5, ANY_ID), + .revn = 305, + .name = "A305", + .pm4fw = "a300_pm4.fw", + .pfpfw = "a300_pfp.fw", + .gmem = SZ_256K, + }, { + .rev = ADRENO_REV(3, 2, ANY_ID, ANY_ID), + .revn = 320, + .name = "A320", + .pm4fw = "a300_pm4.fw", + .pfpfw = "a300_pfp.fw", + .gmem = SZ_512K, + }, { + .rev = ADRENO_REV(3, 3, 0, 0), + .revn = 330, + .name = "A330", + .pm4fw = "a330_pm4.fw", + .pfpfw = "a330_pfp.fw", + .gmem = SZ_1M, + }, +}; + +#define RB_SIZE SZ_32K +#define RB_BLKSIZE 16 + +int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + + switch (param) { + case MSM_PARAM_GPU_ID: + *value = adreno_gpu->info->revn; + return 0; + case MSM_PARAM_GMEM_SIZE: + *value = adreno_gpu->info->gmem; + return 0; + default: + DBG("%s: invalid param: %u", gpu->name, param); + return -EINVAL; + } +} + +#define rbmemptr(adreno_gpu, member) \ + ((adreno_gpu)->memptrs_iova + offsetof(struct adreno_rbmemptrs, member)) + +int adreno_hw_init(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + + DBG("%s", gpu->name); + + /* Setup REG_CP_RB_CNTL: */ + gpu_write(gpu, REG_AXXX_CP_RB_CNTL, + /* size is log2(quad-words): */ + AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) | + AXXX_CP_RB_CNTL_BLKSZ(RB_BLKSIZE)); + + /* Setup ringbuffer address: */ + gpu_write(gpu, REG_AXXX_CP_RB_BASE, gpu->rb_iova); + gpu_write(gpu, REG_AXXX_CP_RB_RPTR_ADDR, rbmemptr(adreno_gpu, rptr)); + + /* Setup scratch/timestamp: */ + gpu_write(gpu, REG_AXXX_SCRATCH_ADDR, rbmemptr(adreno_gpu, fence)); + + gpu_write(gpu, REG_AXXX_SCRATCH_UMSK, 0x1); + + return 0; +} + +static uint32_t get_wptr(struct msm_ringbuffer *ring) +{ + return ring->cur - ring->start; +} + +uint32_t adreno_last_fence(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + return adreno_gpu->memptrs->fence; +} + +int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, + struct msm_file_private *ctx) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct msm_drm_private *priv = gpu->dev->dev_private; + struct msm_ringbuffer *ring = gpu->rb; + unsigned i, ibs = 0; + + adreno_gpu->last_fence = submit->fence; + + for (i = 0; i < submit->nr_cmds; i++) { + switch (submit->cmd[i].type) { + case MSM_SUBMIT_CMD_IB_TARGET_BUF: + /* ignore IB-targets */ + break; + case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: + /* ignore if there has not been a ctx switch: */ + if (priv->lastctx == ctx) + break; + case MSM_SUBMIT_CMD_BUF: + OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2); + OUT_RING(ring, submit->cmd[i].iova); + OUT_RING(ring, submit->cmd[i].size); + ibs++; + break; + } + } + + /* on a320, at least, we seem to need to pad things out to an + * even number of qwords to avoid issue w/ CP hanging on wrap- + * around: + */ + if (ibs % 2) + OUT_PKT2(ring); + + OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); + OUT_RING(ring, submit->fence); + + if (adreno_is_a3xx(adreno_gpu)) { + /* Flush HLSQ lazy updates to make sure there is nothing + * pending for indirect loads after the timestamp has + * passed: + */ + OUT_PKT3(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, HLSQ_FLUSH); + + OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); + OUT_RING(ring, 0x00000000); + } + + OUT_PKT3(ring, CP_EVENT_WRITE, 3); + OUT_RING(ring, CACHE_FLUSH_TS); + OUT_RING(ring, rbmemptr(adreno_gpu, fence)); + OUT_RING(ring, submit->fence); + + /* we could maybe be clever and only CP_COND_EXEC the interrupt: */ + OUT_PKT3(ring, CP_INTERRUPT, 1); + OUT_RING(ring, 0x80000000); + +#if 0 + if (adreno_is_a3xx(adreno_gpu)) { + /* Dummy set-constant to trigger context rollover */ + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG)); + OUT_RING(ring, 0x00000000); + } +#endif + + gpu->funcs->flush(gpu); + + return 0; +} + +void adreno_flush(struct msm_gpu *gpu) +{ + uint32_t wptr = get_wptr(gpu->rb); + + /* ensure writes to ringbuffer have hit system memory: */ + mb(); + + gpu_write(gpu, REG_AXXX_CP_RB_WPTR, wptr); +} + +void adreno_idle(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + uint32_t rptr, wptr = get_wptr(gpu->rb); + unsigned long t; + + t = jiffies + ADRENO_IDLE_TIMEOUT; + + /* then wait for CP to drain ringbuffer: */ + do { + rptr = adreno_gpu->memptrs->rptr; + if (rptr == wptr) + return; + } while(time_before(jiffies, t)); + + DRM_ERROR("timeout waiting for %s to drain ringbuffer!\n", gpu->name); + + /* TODO maybe we need to reset GPU here to recover from hang? */ +} + +#ifdef CONFIG_DEBUG_FS +void adreno_show(struct msm_gpu *gpu, struct seq_file *m) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + + seq_printf(m, "revision: %d (%d.%d.%d.%d)\n", + adreno_gpu->info->revn, adreno_gpu->rev.core, + adreno_gpu->rev.major, adreno_gpu->rev.minor, + adreno_gpu->rev.patchid); + + seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence, + adreno_gpu->last_fence); + seq_printf(m, "rptr: %d\n", adreno_gpu->memptrs->rptr); + seq_printf(m, "wptr: %d\n", adreno_gpu->memptrs->wptr); + seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb)); +} +#endif + +void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + uint32_t freedwords; + do { + uint32_t size = gpu->rb->size / 4; + uint32_t wptr = get_wptr(gpu->rb); + uint32_t rptr = adreno_gpu->memptrs->rptr; + freedwords = (rptr + (size - 1) - wptr) % size; + } while(freedwords < ndwords); +} + +static const char *iommu_ports[] = { + "gfx3d_user", "gfx3d_priv", + "gfx3d1_user", "gfx3d1_priv", +}; + +static inline bool _rev_match(uint8_t entry, uint8_t id) +{ + return (entry == ANY_ID) || (entry == id); +} + +int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, + struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs, + struct adreno_rev rev) +{ + int i, ret; + + /* identify gpu: */ + for (i = 0; i < ARRAY_SIZE(gpulist); i++) { + const struct adreno_info *info = &gpulist[i]; + if (_rev_match(info->rev.core, rev.core) && + _rev_match(info->rev.major, rev.major) && + _rev_match(info->rev.minor, rev.minor) && + _rev_match(info->rev.patchid, rev.patchid)) { + gpu->info = info; + gpu->revn = info->revn; + break; + } + } + + if (i == ARRAY_SIZE(gpulist)) { + dev_err(drm->dev, "Unknown GPU revision: %u.%u.%u.%u\n", + rev.core, rev.major, rev.minor, rev.patchid); + return -ENXIO; + } + + DBG("Found GPU: %s (%u.%u.%u.%u)", gpu->info->name, + rev.core, rev.major, rev.minor, rev.patchid); + + gpu->funcs = funcs; + gpu->rev = rev; + + ret = request_firmware(&gpu->pm4, gpu->info->pm4fw, drm->dev); + if (ret) { + dev_err(drm->dev, "failed to load %s PM4 firmware: %d\n", + gpu->info->pm4fw, ret); + return ret; + } + + ret = request_firmware(&gpu->pfp, gpu->info->pfpfw, drm->dev); + if (ret) { + dev_err(drm->dev, "failed to load %s PFP firmware: %d\n", + gpu->info->pfpfw, ret); + return ret; + } + + ret = msm_gpu_init(drm, pdev, &gpu->base, &funcs->base, + gpu->info->name, "kgsl_3d0_reg_memory", "kgsl_3d0_irq", + RB_SIZE); + if (ret) + return ret; + + ret = msm_iommu_attach(drm, gpu->base.iommu, + iommu_ports, ARRAY_SIZE(iommu_ports)); + if (ret) + return ret; + + gpu->memptrs_bo = msm_gem_new(drm, sizeof(*gpu->memptrs), + MSM_BO_UNCACHED); + if (IS_ERR(gpu->memptrs_bo)) { + ret = PTR_ERR(gpu->memptrs_bo); + gpu->memptrs_bo = NULL; + dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); + return ret; + } + + gpu->memptrs = msm_gem_vaddr_locked(gpu->memptrs_bo); + if (!gpu->memptrs) { + dev_err(drm->dev, "could not vmap memptrs\n"); + return -ENOMEM; + } + + ret = msm_gem_get_iova_locked(gpu->memptrs_bo, gpu->base.id, + &gpu->memptrs_iova); + if (ret) { + dev_err(drm->dev, "could not map memptrs: %d\n", ret); + return ret; + } + + return 0; +} + +void adreno_gpu_cleanup(struct adreno_gpu *gpu) +{ + if (gpu->memptrs_bo) { + if (gpu->memptrs_iova) + msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id); + drm_gem_object_unreference(gpu->memptrs_bo); + } + if (gpu->pm4) + release_firmware(gpu->pm4); + if (gpu->pfp) + release_firmware(gpu->pfp); + msm_gpu_cleanup(&gpu->base); +} diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h new file mode 100644 index 00000000000..6b49c4f27fe --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -0,0 +1,142 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ADRENO_GPU_H__ +#define __ADRENO_GPU_H__ + +#include <linux/firmware.h> + +#include "msm_gpu.h" + +#include "adreno_common.xml.h" +#include "adreno_pm4.xml.h" + +struct adreno_rev { + uint8_t core; + uint8_t major; + uint8_t minor; + uint8_t patchid; +}; + +#define ADRENO_REV(core, major, minor, patchid) \ + ((struct adreno_rev){ core, major, minor, patchid }) + +struct adreno_gpu_funcs { + struct msm_gpu_funcs base; +}; + +struct adreno_info; + +struct adreno_rbmemptrs { + volatile uint32_t rptr; + volatile uint32_t wptr; + volatile uint32_t fence; +}; + +struct adreno_gpu { + struct msm_gpu base; + struct adreno_rev rev; + const struct adreno_info *info; + uint32_t revn; /* numeric revision name */ + const struct adreno_gpu_funcs *funcs; + + uint32_t last_fence; + + /* firmware: */ + const struct firmware *pm4, *pfp; + + /* ringbuffer rptr/wptr: */ + // TODO should this be in msm_ringbuffer? I think it would be + // different for z180.. + struct adreno_rbmemptrs *memptrs; + struct drm_gem_object *memptrs_bo; + uint32_t memptrs_iova; +}; +#define to_adreno_gpu(x) container_of(x, struct adreno_gpu, base) + +/* platform config data (ie. from DT, or pdata) */ +struct adreno_platform_config { + struct adreno_rev rev; + uint32_t fast_rate, slow_rate, bus_freq; +}; + +#define ADRENO_IDLE_TIMEOUT (20 * 1000) + +static inline bool adreno_is_a3xx(struct adreno_gpu *gpu) +{ + return (gpu->revn >= 300) && (gpu->revn < 400); +} + +static inline bool adreno_is_a305(struct adreno_gpu *gpu) +{ + return gpu->revn == 305; +} + +static inline bool adreno_is_a320(struct adreno_gpu *gpu) +{ + return gpu->revn == 320; +} + +static inline bool adreno_is_a330(struct adreno_gpu *gpu) +{ + return gpu->revn == 330; +} + +int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value); +int adreno_hw_init(struct msm_gpu *gpu); +uint32_t adreno_last_fence(struct msm_gpu *gpu); +int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, + struct msm_file_private *ctx); +void adreno_flush(struct msm_gpu *gpu); +void adreno_idle(struct msm_gpu *gpu); +#ifdef CONFIG_DEBUG_FS +void adreno_show(struct msm_gpu *gpu, struct seq_file *m); +#endif +void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords); + +int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, + struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs, + struct adreno_rev rev); +void adreno_gpu_cleanup(struct adreno_gpu *gpu); + + +/* ringbuffer helpers (the parts that are adreno specific) */ + +static inline void +OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) +{ + adreno_wait_ring(ring->gpu, cnt+1); + OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF)); +} + +/* no-op packet: */ +static inline void +OUT_PKT2(struct msm_ringbuffer *ring) +{ + adreno_wait_ring(ring->gpu, 1); + OUT_RING(ring, CP_TYPE2_PKT); +} + +static inline void +OUT_PKT3(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) +{ + adreno_wait_ring(ring->gpu, cnt+1); + OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8)); +} + + +#endif /* __ADRENO_GPU_H__ */ |