diff options
Diffstat (limited to 'drivers/gpu/drm/msm/adreno/a3xx_gpu.c')
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 303 |
1 files changed, 255 insertions, 48 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 035bd13dc8b..942e09d898a 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -15,6 +15,10 @@ * this program. If not, see <http://www.gnu.org/licenses/>. */ +#ifdef CONFIG_MSM_OCMEM +# include <mach/ocmem.h> +#endif + #include "a3xx_gpu.h" #define A3XX_INT0_MASK \ @@ -31,7 +35,11 @@ A3XX_INT0_CP_AHB_ERROR_HALT | \ A3XX_INT0_UCHE_OOB_ACCESS) -static struct platform_device *a3xx_pdev; + +static bool hang_debug = false; +MODULE_PARM_DESC(hang_debug, "Dump registers when hang is detected (can be slow!)"); +module_param_named(hang_debug, hang_debug, bool, 0600); +static void a3xx_dump(struct msm_gpu *gpu); static void a3xx_me_init(struct msm_gpu *gpu) { @@ -63,6 +71,7 @@ static void a3xx_me_init(struct msm_gpu *gpu) static int a3xx_hw_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu); uint32_t *ptr, len; int i, ret; @@ -105,6 +114,21 @@ static int a3xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff); gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4); + } else if (adreno_is_a330v2(adreno_gpu)) { + /* + * Most of the VBIF registers on 8974v2 have the correct + * values at power on, so we won't modify those if we don't + * need to + */ + /* Enable 1k sort: */ + gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f); + gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4); + /* Enable WR-REQ: */ + gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f); + gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303); + /* Set up VBIF_ROUND_ROBIN_QOS_ARB: */ + gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003); + } else if (adreno_is_a330(adreno_gpu)) { /* Set up 16 deep read/write request queues: */ gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818); @@ -121,10 +145,10 @@ static int a3xx_hw_init(struct msm_gpu *gpu) /* Set up VBIF_ROUND_ROBIN_QOS_ARB: */ gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001); /* Set up AOOO: */ - gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000ffff); - gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0xffffffff); + gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f); + gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f); /* Enable 1K sort: */ - gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001ffff); + gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f); gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4); /* Disable VBIF clock gating. This is to enable AXI running * higher frequency than GPU: @@ -162,23 +186,32 @@ static int a3xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001); /* Enable Clock gating: */ - gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff); - - /* Set the OCMEM base address for A330 */ -//TODO: -// if (adreno_is_a330(adreno_gpu)) { -// gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR, -// (unsigned int)(a3xx_gpu->ocmem_base >> 14)); -// } + if (adreno_is_a320(adreno_gpu)) + gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff); + else if (adreno_is_a330v2(adreno_gpu)) + gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa); + else if (adreno_is_a330(adreno_gpu)) + gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff); + + if (adreno_is_a330v2(adreno_gpu)) + gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455); + else if (adreno_is_a330(adreno_gpu)) + gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000); + + /* Set the OCMEM base address for A330, etc */ + if (a3xx_gpu->ocmem_hdl) { + gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR, + (unsigned int)(a3xx_gpu->ocmem_base >> 14)); + } /* Turn on performance counters: */ gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01); - /* Set SP perfcounter 7 to count SP_FS_FULL_ALU_INSTRUCTIONS - * we will use this to augment our hang detection: - */ - gpu_write(gpu, REG_A3XX_SP_PERFCOUNTER7_SELECT, - SP_FS_FULL_ALU_INSTRUCTIONS); + /* Enable the perfcntrs that we use.. */ + for (i = 0; i < gpu->num_perfcntrs; i++) { + const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i]; + gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val); + } gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK); @@ -219,7 +252,7 @@ static int a3xx_hw_init(struct msm_gpu *gpu) /* Load PM4: */ ptr = (uint32_t *)(adreno_gpu->pm4->data); len = adreno_gpu->pm4->size / 4; - DBG("loading PM4 ucode version: %u", ptr[0]); + DBG("loading PM4 ucode version: %x", ptr[1]); gpu_write(gpu, REG_AXXX_CP_DEBUG, AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE | @@ -231,19 +264,26 @@ static int a3xx_hw_init(struct msm_gpu *gpu) /* Load PFP: */ ptr = (uint32_t *)(adreno_gpu->pfp->data); len = adreno_gpu->pfp->size / 4; - DBG("loading PFP ucode version: %u", ptr[0]); + DBG("loading PFP ucode version: %x", ptr[5]); gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0); for (i = 1; i < len; i++) gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]); /* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */ - if (adreno_is_a305(adreno_gpu) || adreno_is_a320(adreno_gpu)) + if (adreno_is_a305(adreno_gpu) || adreno_is_a320(adreno_gpu)) { gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) | AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) | AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14)); - + } else if (adreno_is_a330(adreno_gpu)) { + /* NOTE: this (value take from downstream android driver) + * includes some bits outside of the known bitfields. But + * A330 has this "MERCIU queue" thing too, which might + * explain a new bitfield or reshuffling: + */ + gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008); + } /* clear ME_HALT to start micro engine */ gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0); @@ -253,6 +293,17 @@ static int a3xx_hw_init(struct msm_gpu *gpu) return 0; } +static void a3xx_recover(struct msm_gpu *gpu) +{ + /* dump registers before resetting gpu, if enabled: */ + if (hang_debug) + a3xx_dump(gpu); + gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1); + gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD); + gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0); + adreno_recover(gpu); +} + static void a3xx_destroy(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -261,27 +312,24 @@ static void a3xx_destroy(struct msm_gpu *gpu) DBG("%s", gpu->name); adreno_gpu_cleanup(adreno_gpu); - put_device(&a3xx_gpu->pdev->dev); + +#ifdef CONFIG_MSM_OCMEM + if (a3xx_gpu->ocmem_base) + ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl); +#endif + kfree(a3xx_gpu); } static void a3xx_idle(struct msm_gpu *gpu) { - unsigned long t; - /* wait for ringbuffer to drain: */ adreno_idle(gpu); - t = jiffies + ADRENO_IDLE_TIMEOUT; - /* then wait for GPU to finish: */ - do { - uint32_t rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS); - if (!(rbbm_status & A3XX_RBBM_STATUS_GPU_BUSY)) - return; - } while(time_before(jiffies, t)); - - DRM_ERROR("timeout waiting for %s to idle!\n", gpu->name); + if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) & + A3XX_RBBM_STATUS_GPU_BUSY))) + DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name); /* TODO maybe we need to reset GPU here to recover from hang? */ } @@ -302,7 +350,6 @@ static irqreturn_t a3xx_irq(struct msm_gpu *gpu) return IRQ_HANDLED; } -#ifdef CONFIG_DEBUG_FS static const unsigned int a3xx_registers[] = { 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027, 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c, @@ -342,11 +389,18 @@ static const unsigned int a3xx_registers[] = { 0x303c, 0x303c, 0x305e, 0x305f, }; +#ifdef CONFIG_DEBUG_FS static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m) { + struct drm_device *dev = gpu->dev; int i; adreno_show(gpu, m); + + mutex_lock(&dev->struct_mutex); + + gpu->funcs->pm_resume(gpu); + seq_printf(m, "status: %08x\n", gpu_read(gpu, REG_A3XX_RBBM_STATUS)); @@ -362,16 +416,43 @@ static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m) seq_printf(m, "IO:R %08x %08x\n", addr<<2, val); } } + + gpu->funcs->pm_suspend(gpu); + + mutex_unlock(&dev->struct_mutex); } #endif +/* would be nice to not have to duplicate the _show() stuff with printk(): */ +static void a3xx_dump(struct msm_gpu *gpu) +{ + int i; + + adreno_dump(gpu); + printk("status: %08x\n", + gpu_read(gpu, REG_A3XX_RBBM_STATUS)); + + /* dump these out in a form that can be parsed by demsm: */ + printk("IO:region %s 00000000 00020000\n", gpu->name); + for (i = 0; i < ARRAY_SIZE(a3xx_registers); i += 2) { + uint32_t start = a3xx_registers[i]; + uint32_t end = a3xx_registers[i+1]; + uint32_t addr; + + for (addr = start; addr <= end; addr++) { + uint32_t val = gpu_read(gpu, addr); + printk("IO:R %08x %08x\n", addr<<2, val); + } + } +} + static const struct adreno_gpu_funcs funcs = { .base = { .get_param = adreno_get_param, .hw_init = a3xx_hw_init, .pm_suspend = msm_gpu_pm_suspend, .pm_resume = msm_gpu_pm_resume, - .recover = adreno_recover, + .recover = a3xx_recover, .last_fence = adreno_last_fence, .submit = adreno_submit, .flush = adreno_flush, @@ -384,11 +465,20 @@ static const struct adreno_gpu_funcs funcs = { }, }; +static const struct msm_gpu_perfcntr perfcntrs[] = { + { REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO, + SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" }, + { REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO, + SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" }, +}; + struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) { struct a3xx_gpu *a3xx_gpu = NULL; + struct adreno_gpu *adreno_gpu; struct msm_gpu *gpu; - struct platform_device *pdev = a3xx_pdev; + struct msm_drm_private *priv = dev->dev_private; + struct platform_device *pdev = priv->gpu_pdev; struct adreno_platform_config *config; int ret; @@ -406,24 +496,57 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) goto fail; } - gpu = &a3xx_gpu->base.base; + adreno_gpu = &a3xx_gpu->base; + gpu = &adreno_gpu->base; - get_device(&pdev->dev); a3xx_gpu->pdev = pdev; gpu->fast_rate = config->fast_rate; gpu->slow_rate = config->slow_rate; gpu->bus_freq = config->bus_freq; +#ifdef CONFIG_MSM_BUS_SCALING + gpu->bus_scale_table = config->bus_scale_table; +#endif DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u", gpu->fast_rate, gpu->slow_rate, gpu->bus_freq); - ret = adreno_gpu_init(dev, pdev, &a3xx_gpu->base, - &funcs, config->rev); + gpu->perfcntrs = perfcntrs; + gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs); + + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, config->rev); if (ret) goto fail; - return &a3xx_gpu->base.base; + /* if needed, allocate gmem: */ + if (adreno_is_a330(adreno_gpu)) { +#ifdef CONFIG_MSM_OCMEM + /* TODO this is different/missing upstream: */ + struct ocmem_buf *ocmem_hdl = + ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem); + + a3xx_gpu->ocmem_hdl = ocmem_hdl; + a3xx_gpu->ocmem_base = ocmem_hdl->addr; + adreno_gpu->gmem = ocmem_hdl->len; + DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024, + a3xx_gpu->ocmem_base); +#endif + } + + if (!gpu->mmu) { + /* TODO we think it is possible to configure the GPU to + * restrict access to VRAM carveout. But the required + * registers are unknown. For now just bail out and + * limp along with just modesetting. If it turns out + * to not be possible to restrict access, then we must + * implement a cmdstream validator. + */ + dev_err(dev->dev, "No memory protection without IOMMU\n"); + ret = -ENXIO; + goto fail; + } + + return gpu; fail: if (a3xx_gpu) @@ -436,19 +559,66 @@ fail: * The a3xx device: */ -static int a3xx_probe(struct platform_device *pdev) +#if defined(CONFIG_MSM_BUS_SCALING) && !defined(CONFIG_OF) +# include <mach/kgsl.h> +#endif + +static void set_gpu_pdev(struct drm_device *dev, + struct platform_device *pdev) +{ + struct msm_drm_private *priv = dev->dev_private; + priv->gpu_pdev = pdev; +} + +static int a3xx_bind(struct device *dev, struct device *master, void *data) { static struct adreno_platform_config config = {}; #ifdef CONFIG_OF - /* TODO */ + struct device_node *child, *node = dev->of_node; + u32 val; + int ret; + + ret = of_property_read_u32(node, "qcom,chipid", &val); + if (ret) { + dev_err(dev, "could not find chipid: %d\n", ret); + return ret; + } + + config.rev = ADRENO_REV((val >> 24) & 0xff, + (val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff); + + /* find clock rates: */ + config.fast_rate = 0; + config.slow_rate = ~0; + for_each_child_of_node(node, child) { + if (of_device_is_compatible(child, "qcom,gpu-pwrlevels")) { + struct device_node *pwrlvl; + for_each_child_of_node(child, pwrlvl) { + ret = of_property_read_u32(pwrlvl, "qcom,gpu-freq", &val); + if (ret) { + dev_err(dev, "could not find gpu-freq: %d\n", ret); + return ret; + } + config.fast_rate = max(config.fast_rate, val); + config.slow_rate = min(config.slow_rate, val); + } + } + } + + if (!config.fast_rate) { + dev_err(dev, "could not find clk rates\n"); + return -ENXIO; + } + #else + struct kgsl_device_platform_data *pdata = dev->platform_data; uint32_t version = socinfo_get_version(); if (cpu_is_apq8064ab()) { config.fast_rate = 450000000; config.slow_rate = 27000000; config.bus_freq = 4; config.rev = ADRENO_REV(3, 2, 1, 0); - } else if (cpu_is_apq8064() || cpu_is_msm8960ab()) { + } else if (cpu_is_apq8064()) { config.fast_rate = 400000000; config.slow_rate = 27000000; config.bus_freq = 4; @@ -461,6 +631,16 @@ static int a3xx_probe(struct platform_device *pdev) else config.rev = ADRENO_REV(3, 2, 0, 0); + } else if (cpu_is_msm8960ab()) { + config.fast_rate = 400000000; + config.slow_rate = 320000000; + config.bus_freq = 4; + + if (SOCINFO_VERSION_MINOR(version) == 0) + config.rev = ADRENO_REV(3, 2, 1, 0); + else + config.rev = ADRENO_REV(3, 2, 1, 1); + } else if (cpu_is_msm8930()) { config.fast_rate = 400000000; config.slow_rate = 27000000; @@ -473,22 +653,49 @@ static int a3xx_probe(struct platform_device *pdev) config.rev = ADRENO_REV(3, 0, 5, 0); } +# ifdef CONFIG_MSM_BUS_SCALING + config.bus_scale_table = pdata->bus_scale_table; +# endif #endif - pdev->dev.platform_data = &config; - a3xx_pdev = pdev; + dev->platform_data = &config; + set_gpu_pdev(dev_get_drvdata(master), to_platform_device(dev)); return 0; } +static void a3xx_unbind(struct device *dev, struct device *master, + void *data) +{ + set_gpu_pdev(dev_get_drvdata(master), NULL); +} + +static const struct component_ops a3xx_ops = { + .bind = a3xx_bind, + .unbind = a3xx_unbind, +}; + +static int a3xx_probe(struct platform_device *pdev) +{ + return component_add(&pdev->dev, &a3xx_ops); +} + static int a3xx_remove(struct platform_device *pdev) { - a3xx_pdev = NULL; + component_del(&pdev->dev, &a3xx_ops); return 0; } +static const struct of_device_id dt_match[] = { + { .compatible = "qcom,kgsl-3d0" }, + {} +}; + static struct platform_driver a3xx_driver = { .probe = a3xx_probe, .remove = a3xx_remove, - .driver.name = "kgsl-3d0", + .driver = { + .name = "kgsl-3d0", + .of_match_table = dt_match, + }, }; void __init a3xx_register(void) |
