diff options
Diffstat (limited to 'drivers/gpu/drm/msm')
33 files changed, 1869 insertions, 259 deletions
diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index c69d1e07a3a..f1238896785 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -3,7 +3,7 @@ config DRM_MSM tristate "MSM DRM" depends on DRM depends on MSM_IOMMU - depends on (ARCH_MSM && ARCH_MSM8960) || (ARM && COMPILE_TEST) + depends on ARCH_QCOM || (ARM && COMPILE_TEST) select DRM_KMS_HELPER select SHMEM select TMPFS diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 4f977a593be..93ca49c8df4 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -7,6 +7,7 @@ msm-y := \ adreno/adreno_gpu.o \ adreno/a3xx_gpu.o \ hdmi/hdmi.o \ + hdmi/hdmi_audio.o \ hdmi/hdmi_bridge.o \ hdmi/hdmi_connector.o \ hdmi/hdmi_i2c.o \ @@ -33,6 +34,8 @@ msm-y := \ msm_gem_submit.o \ msm_gpu.o \ msm_iommu.o \ + msm_perf.o \ + msm_rd.o \ msm_ringbuffer.o msm-$(CONFIG_DRM_MSM_FBDEV) += msm_fbdev.o diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 461df93e825..942e09d898a 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -35,7 +35,11 @@ A3XX_INT0_CP_AHB_ERROR_HALT | \ A3XX_INT0_UCHE_OOB_ACCESS) -static struct platform_device *a3xx_pdev; + +static bool hang_debug = false; +MODULE_PARM_DESC(hang_debug, "Dump registers when hang is detected (can be slow!)"); +module_param_named(hang_debug, hang_debug, bool, 0600); +static void a3xx_dump(struct msm_gpu *gpu); static void a3xx_me_init(struct msm_gpu *gpu) { @@ -203,11 +207,11 @@ static int a3xx_hw_init(struct msm_gpu *gpu) /* Turn on performance counters: */ gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01); - /* Set SP perfcounter 7 to count SP_FS_FULL_ALU_INSTRUCTIONS - * we will use this to augment our hang detection: - */ - gpu_write(gpu, REG_A3XX_SP_PERFCOUNTER7_SELECT, - SP_FS_FULL_ALU_INSTRUCTIONS); + /* Enable the perfcntrs that we use.. */ + for (i = 0; i < gpu->num_perfcntrs; i++) { + const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i]; + gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val); + } gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK); @@ -291,6 +295,9 @@ static int a3xx_hw_init(struct msm_gpu *gpu) static void a3xx_recover(struct msm_gpu *gpu) { + /* dump registers before resetting gpu, if enabled: */ + if (hang_debug) + a3xx_dump(gpu); gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1); gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD); gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0); @@ -311,27 +318,18 @@ static void a3xx_destroy(struct msm_gpu *gpu) ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl); #endif - put_device(&a3xx_gpu->pdev->dev); kfree(a3xx_gpu); } static void a3xx_idle(struct msm_gpu *gpu) { - unsigned long t; - /* wait for ringbuffer to drain: */ adreno_idle(gpu); - t = jiffies + ADRENO_IDLE_TIMEOUT; - /* then wait for GPU to finish: */ - do { - uint32_t rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS); - if (!(rbbm_status & A3XX_RBBM_STATUS_GPU_BUSY)) - return; - } while(time_before(jiffies, t)); - - DRM_ERROR("timeout waiting for %s to idle!\n", gpu->name); + if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) & + A3XX_RBBM_STATUS_GPU_BUSY))) + DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name); /* TODO maybe we need to reset GPU here to recover from hang? */ } @@ -352,7 +350,6 @@ static irqreturn_t a3xx_irq(struct msm_gpu *gpu) return IRQ_HANDLED; } -#ifdef CONFIG_DEBUG_FS static const unsigned int a3xx_registers[] = { 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027, 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c, @@ -392,11 +389,18 @@ static const unsigned int a3xx_registers[] = { 0x303c, 0x303c, 0x305e, 0x305f, }; +#ifdef CONFIG_DEBUG_FS static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m) { + struct drm_device *dev = gpu->dev; int i; adreno_show(gpu, m); + + mutex_lock(&dev->struct_mutex); + + gpu->funcs->pm_resume(gpu); + seq_printf(m, "status: %08x\n", gpu_read(gpu, REG_A3XX_RBBM_STATUS)); @@ -412,9 +416,36 @@ static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m) seq_printf(m, "IO:R %08x %08x\n", addr<<2, val); } } + + gpu->funcs->pm_suspend(gpu); + + mutex_unlock(&dev->struct_mutex); } #endif +/* would be nice to not have to duplicate the _show() stuff with printk(): */ +static void a3xx_dump(struct msm_gpu *gpu) +{ + int i; + + adreno_dump(gpu); + printk("status: %08x\n", + gpu_read(gpu, REG_A3XX_RBBM_STATUS)); + + /* dump these out in a form that can be parsed by demsm: */ + printk("IO:region %s 00000000 00020000\n", gpu->name); + for (i = 0; i < ARRAY_SIZE(a3xx_registers); i += 2) { + uint32_t start = a3xx_registers[i]; + uint32_t end = a3xx_registers[i+1]; + uint32_t addr; + + for (addr = start; addr <= end; addr++) { + uint32_t val = gpu_read(gpu, addr); + printk("IO:R %08x %08x\n", addr<<2, val); + } + } +} + static const struct adreno_gpu_funcs funcs = { .base = { .get_param = adreno_get_param, @@ -434,12 +465,20 @@ static const struct adreno_gpu_funcs funcs = { }, }; +static const struct msm_gpu_perfcntr perfcntrs[] = { + { REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO, + SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" }, + { REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO, + SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" }, +}; + struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) { struct a3xx_gpu *a3xx_gpu = NULL; struct adreno_gpu *adreno_gpu; struct msm_gpu *gpu; - struct platform_device *pdev = a3xx_pdev; + struct msm_drm_private *priv = dev->dev_private; + struct platform_device *pdev = priv->gpu_pdev; struct adreno_platform_config *config; int ret; @@ -460,7 +499,6 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) adreno_gpu = &a3xx_gpu->base; gpu = &adreno_gpu->base; - get_device(&pdev->dev); a3xx_gpu->pdev = pdev; gpu->fast_rate = config->fast_rate; @@ -473,6 +511,9 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u", gpu->fast_rate, gpu->slow_rate, gpu->bus_freq); + gpu->perfcntrs = perfcntrs; + gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, config->rev); if (ret) goto fail; @@ -522,17 +563,24 @@ fail: # include <mach/kgsl.h> #endif -static int a3xx_probe(struct platform_device *pdev) +static void set_gpu_pdev(struct drm_device *dev, + struct platform_device *pdev) +{ + struct msm_drm_private *priv = dev->dev_private; + priv->gpu_pdev = pdev; +} + +static int a3xx_bind(struct device *dev, struct device *master, void *data) { static struct adreno_platform_config config = {}; #ifdef CONFIG_OF - struct device_node *child, *node = pdev->dev.of_node; + struct device_node *child, *node = dev->of_node; u32 val; int ret; ret = of_property_read_u32(node, "qcom,chipid", &val); if (ret) { - dev_err(&pdev->dev, "could not find chipid: %d\n", ret); + dev_err(dev, "could not find chipid: %d\n", ret); return ret; } @@ -548,7 +596,7 @@ static int a3xx_probe(struct platform_device *pdev) for_each_child_of_node(child, pwrlvl) { ret = of_property_read_u32(pwrlvl, "qcom,gpu-freq", &val); if (ret) { - dev_err(&pdev->dev, "could not find gpu-freq: %d\n", ret); + dev_err(dev, "could not find gpu-freq: %d\n", ret); return ret; } config.fast_rate = max(config.fast_rate, val); @@ -558,12 +606,12 @@ static int a3xx_probe(struct platform_device *pdev) } if (!config.fast_rate) { - dev_err(&pdev->dev, "could not find clk rates\n"); + dev_err(dev, "could not find clk rates\n"); return -ENXIO; } #else - struct kgsl_device_platform_data *pdata = pdev->dev.platform_data; + struct kgsl_device_platform_data *pdata = dev->platform_data; uint32_t version = socinfo_get_version(); if (cpu_is_apq8064ab()) { config.fast_rate = 450000000; @@ -609,14 +657,30 @@ static int a3xx_probe(struct platform_device *pdev) config.bus_scale_table = pdata->bus_scale_table; # endif #endif - pdev->dev.platform_data = &config; - a3xx_pdev = pdev; + dev->platform_data = &config; + set_gpu_pdev(dev_get_drvdata(master), to_platform_device(dev)); return 0; } +static void a3xx_unbind(struct device *dev, struct device *master, + void *data) +{ + set_gpu_pdev(dev_get_drvdata(master), NULL); +} + +static const struct component_ops a3xx_ops = { + .bind = a3xx_bind, + .unbind = a3xx_unbind, +}; + +static int a3xx_probe(struct platform_device *pdev) +{ + return component_add(&pdev->dev, &a3xx_ops); +} + static int a3xx_remove(struct platform_device *pdev) { - a3xx_pdev = NULL; + component_del(&pdev->dev, &a3xx_ops); return 0; } @@ -624,7 +688,6 @@ static const struct of_device_id dt_match[] = { { .compatible = "qcom,kgsl-3d0" }, {} }; -MODULE_DEVICE_TABLE(of, dt_match); static struct platform_driver a3xx_driver = { .probe = a3xx_probe, diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index d321099abdd..28ca8cd8b09 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -73,6 +73,12 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) case MSM_PARAM_GMEM_SIZE: *value = adreno_gpu->gmem; return 0; + case MSM_PARAM_CHIP_ID: + *value = adreno_gpu->rev.patchid | + (adreno_gpu->rev.minor << 8) | + (adreno_gpu->rev.major << 16) | + (adreno_gpu->rev.core << 24); + return 0; default: DBG("%s: invalid param: %u", gpu->name, param); return -EINVAL; @@ -225,19 +231,11 @@ void adreno_flush(struct msm_gpu *gpu) void adreno_idle(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - uint32_t rptr, wptr = get_wptr(gpu->rb); - unsigned long t; - - t = jiffies + ADRENO_IDLE_TIMEOUT; - - /* then wait for CP to drain ringbuffer: */ - do { - rptr = adreno_gpu->memptrs->rptr; - if (rptr == wptr) - return; - } while(time_before(jiffies, t)); + uint32_t wptr = get_wptr(gpu->rb); - DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name); + /* wait for CP to drain ringbuffer: */ + if (spin_until(adreno_gpu->memptrs->rptr == wptr)) + DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name); /* TODO maybe we need to reset GPU here to recover from hang? */ } @@ -260,22 +258,37 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m) } #endif -void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords) +/* would be nice to not have to duplicate the _show() stuff with printk(): */ +void adreno_dump(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - uint32_t freedwords; - unsigned long t = jiffies + ADRENO_IDLE_TIMEOUT; - do { - uint32_t size = gpu->rb->size / 4; - uint32_t wptr = get_wptr(gpu->rb); - uint32_t rptr = adreno_gpu->memptrs->rptr; - freedwords = (rptr + (size - 1) - wptr) % size; - - if (time_after(jiffies, t)) { - DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name); - break; - } - } while(freedwords < ndwords); + + printk("revision: %d (%d.%d.%d.%d)\n", + adreno_gpu->info->revn, adreno_gpu->rev.core, + adreno_gpu->rev.major, adreno_gpu->rev.minor, + adreno_gpu->rev.patchid); + + printk("fence: %d/%d\n", adreno_gpu->memptrs->fence, + gpu->submitted_fence); + printk("rptr: %d\n", adreno_gpu->memptrs->rptr); + printk("wptr: %d\n", adreno_gpu->memptrs->wptr); + printk("rb wptr: %d\n", get_wptr(gpu->rb)); + +} + +static uint32_t ring_freewords(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + uint32_t size = gpu->rb->size / 4; + uint32_t wptr = get_wptr(gpu->rb); + uint32_t rptr = adreno_gpu->memptrs->rptr; + return (rptr + (size - 1) - wptr) % size; +} + +void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords) +{ + if (spin_until(ring_freewords(gpu) >= ndwords)) + DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name); } static const char *iommu_ports[] = { diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index ca11ea4da16..63c36ce3302 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -76,7 +76,20 @@ struct adreno_platform_config { #endif }; -#define ADRENO_IDLE_TIMEOUT (20 * 1000) +#define ADRENO_IDLE_TIMEOUT msecs_to_jiffies(1000) + +#define spin_until(X) ({ \ + int __ret = -ETIMEDOUT; \ + unsigned long __t = jiffies + ADRENO_IDLE_TIMEOUT; \ + do { \ + if (X) { \ + __ret = 0; \ + break; \ + } \ + } while (time_before(jiffies, __t)); \ + __ret; \ +}) + static inline bool adreno_is_a3xx(struct adreno_gpu *gpu) { @@ -114,6 +127,7 @@ void adreno_idle(struct msm_gpu *gpu); #ifdef CONFIG_DEBUG_FS void adreno_show(struct msm_gpu *gpu, struct seq_file *m); #endif +void adreno_dump(struct msm_gpu *gpu); void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords); int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c index 6f1588aa907..7f7aadef8a8 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi.c @@ -17,8 +17,6 @@ #include "hdmi.h" -static struct platform_device *hdmi_pdev; - void hdmi_set_mode(struct hdmi *hdmi, bool power_on) { uint32_t ctrl = 0; @@ -67,7 +65,7 @@ void hdmi_destroy(struct kref *kref) if (hdmi->i2c) hdmi_i2c_destroy(hdmi->i2c); - put_device(&hdmi->pdev->dev); + platform_set_drvdata(hdmi->pdev, NULL); } /* initialize connector */ @@ -75,7 +73,7 @@ struct hdmi *hdmi_init(struct drm_device *dev, struct drm_encoder *encoder) { struct hdmi *hdmi = NULL; struct msm_drm_private *priv = dev->dev_private; - struct platform_device *pdev = hdmi_pdev; + struct platform_device *pdev = priv->hdmi_pdev; struct hdmi_platform_config *config; int i, ret; @@ -95,13 +93,13 @@ struct hdmi *hdmi_init(struct drm_device *dev, struct drm_encoder *encoder) kref_init(&hdmi->refcount); - get_device(&pdev->dev); - hdmi->dev = dev; hdmi->pdev = pdev; hdmi->config = config; hdmi->encoder = encoder; + hdmi_audio_infoframe_init(&hdmi->audio.infoframe); + /* not sure about which phy maps to which msm.. probably I miss some */ if (config->phy_init) hdmi->phy = config->phy_init(hdmi); @@ -228,6 +226,8 @@ struct hdmi *hdmi_init(struct drm_device *dev, struct drm_encoder *encoder) priv->bridges[priv->num_bridges++] = hdmi->bridge; priv->connectors[priv->num_connectors++] = hdmi->connector; + platform_set_drvdata(pdev, hdmi); + return hdmi; fail: @@ -249,17 +249,24 @@ fail: #include <linux/of_gpio.h> -static int hdmi_dev_probe(struct platform_device *pdev) +static void set_hdmi_pdev(struct drm_device *dev, + struct platform_device *pdev) +{ + struct msm_drm_private *priv = dev->dev_private; + priv->hdmi_pdev = pdev; +} + +static int hdmi_bind(struct device *dev, struct device *master, void *data) { static struct hdmi_platform_config config = {}; #ifdef CONFIG_OF - struct device_node *of_node = pdev->dev.of_node; + struct device_node *of_node = dev->of_node; int get_gpio(const char *name) { int gpio = of_get_named_gpio(of_node, name, 0); if (gpio < 0) { - dev_err(&pdev->dev, "failed to get gpio: %s (%d)\n", + dev_err(dev, "failed to get gpio: %s (%d)\n", name, gpio); gpio = -1; } @@ -270,6 +277,7 @@ static int hdmi_dev_probe(struct platform_device *pdev) static const char *hpd_reg_names[] = {"hpd-gdsc", "hpd-5v"}; static const char *pwr_reg_names[] = {"core-vdda", "core-vcc"}; static const char *hpd_clk_names[] = {"iface_clk", "core_clk", "mdp_core_clk"}; + static unsigned long hpd_clk_freq[] = {0, 19200000, 0}; static const char *pwr_clk_names[] = {"extp_clk", "alt_iface_clk"}; config.phy_init = hdmi_phy_8x74_init; @@ -279,6 +287,7 @@ static int hdmi_dev_probe(struct platform_device *pdev) config.pwr_reg_names = pwr_reg_names; config.pwr_reg_cnt = ARRAY_SIZE(pwr_reg_names); config.hpd_clk_names = hpd_clk_names; + config.hpd_freq = hpd_clk_freq; config.hpd_clk_cnt = ARRAY_SIZE(hpd_clk_names); config.pwr_clk_names = pwr_clk_names; config.pwr_clk_cnt = ARRAY_SIZE(pwr_clk_names); @@ -305,7 +314,7 @@ static int hdmi_dev_probe(struct platform_device *pdev) config.ddc_data_gpio = 71; config.hpd_gpio = 72; config.mux_en_gpio = -1; - config.mux_sel_gpio = 13 + NR_GPIO_IRQS; + config.mux_sel_gpio = -1; } else if (cpu_is_msm8960() || cpu_is_msm8960ab()) { static const char *hpd_reg_names[] = {"8921_hdmi_mvs"}; config.phy_init = hdmi_phy_8960_init; @@ -336,14 +345,30 @@ static int hdmi_dev_probe(struct platform_device *pdev) config.mux_sel_gpio = -1; } #endif - pdev->dev.platform_data = &config; - hdmi_pdev = pdev; + dev->platform_data = &config; + set_hdmi_pdev(dev_get_drvdata(master), to_platform_device(dev)); return 0; } +static void hdmi_unbind(struct device *dev, struct device *master, + void *data) +{ + set_hdmi_pdev(dev_get_drvdata(master), NULL); +} + +static const struct component_ops hdmi_ops = { + .bind = hdmi_bind, + .unbind = hdmi_unbind, +}; + +static int hdmi_dev_probe(struct platform_device *pdev) +{ + return component_add(&pdev->dev, &hdmi_ops); +} + static int hdmi_dev_remove(struct platform_device *pdev) { - hdmi_pdev = NULL; + component_del(&pdev->dev, &hdmi_ops); return 0; } @@ -351,7 +376,6 @@ static const struct of_device_id dt_match[] = { { .compatible = "qcom,hdmi-tx" }, {} }; -MODULE_DEVICE_TABLE(of, dt_match); static struct platform_driver hdmi_driver = { .probe = hdmi_dev_probe, diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.h b/drivers/gpu/drm/msm/hdmi/hdmi.h index 41b29add70b..9d7723c6528 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.h +++ b/drivers/gpu/drm/msm/hdmi/hdmi.h @@ -22,6 +22,7 @@ #include <linux/clk.h> #include <linux/platform_device.h> #include <linux/regulator/consumer.h> +#include <linux/hdmi.h> #include "msm_drv.h" #include "hdmi.xml.h" @@ -30,6 +31,12 @@ struct hdmi_phy; struct hdmi_platform_config; +struct hdmi_audio { + bool enabled; + struct hdmi_audio_infoframe infoframe; + int rate; +}; + struct hdmi { struct kref refcount; @@ -38,6 +45,13 @@ struct hdmi { const struct hdmi_platform_config *config; + /* audio state: */ + struct hdmi_audio audio; + + /* video state: */ + bool power_on; + unsigned long int pixclock; + void __iomem *mmio; struct regulator *hpd_regs[2]; @@ -73,6 +87,7 @@ struct hdmi_platform_config { /* clks that need to be on for hpd: */ const char **hpd_clk_names; + const long unsigned *hpd_freq; int hpd_clk_cnt; /* clks that need to be on for screen pwr (ie pixel clk): */ @@ -132,6 +147,17 @@ struct hdmi_phy *hdmi_phy_8x60_init(struct hdmi *hdmi); struct hdmi_phy *hdmi_phy_8x74_init(struct hdmi *hdmi); /* + * audio: + */ + +int hdmi_audio_update(struct hdmi *hdmi); +int hdmi_audio_info_setup(struct hdmi *hdmi, bool enabled, + uint32_t num_of_channels, uint32_t channel_allocation, + uint32_t level_shift, bool down_mix); +void hdmi_audio_set_sample_rate(struct hdmi *hdmi, int rate); + + +/* * hdmi bridge: */ diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_audio.c b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c new file mode 100644 index 00000000000..872485f6013 --- /dev/null +++ b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c @@ -0,0 +1,273 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/hdmi.h> +#include "hdmi.h" + + +/* Supported HDMI Audio channels */ +#define MSM_HDMI_AUDIO_CHANNEL_2 0 +#define MSM_HDMI_AUDIO_CHANNEL_4 1 +#define MSM_HDMI_AUDIO_CHANNEL_6 2 +#define MSM_HDMI_AUDIO_CHANNEL_8 3 + +/* maps MSM_HDMI_AUDIO_CHANNEL_n consts used by audio driver to # of channels: */ +static int nchannels[] = { 2, 4, 6, 8 }; + +/* Supported HDMI Audio sample rates */ +#define MSM_HDMI_SAMPLE_RATE_32KHZ 0 +#define MSM_HDMI_SAMPLE_RATE_44_1KHZ 1 +#define MSM_HDMI_SAMPLE_RATE_48KHZ 2 +#define MSM_HDMI_SAMPLE_RATE_88_2KHZ 3 +#define MSM_HDMI_SAMPLE_RATE_96KHZ 4 +#define MSM_HDMI_SAMPLE_RATE_176_4KHZ 5 +#define MSM_HDMI_SAMPLE_RATE_192KHZ 6 +#define MSM_HDMI_SAMPLE_RATE_MAX 7 + + +struct hdmi_msm_audio_acr { + uint32_t n; /* N parameter for clock regeneration */ + uint32_t cts; /* CTS parameter for clock regeneration */ +}; + +struct hdmi_msm_audio_arcs { + unsigned long int pixclock; + struct hdmi_msm_audio_acr lut[MSM_HDMI_SAMPLE_RATE_MAX]; +}; + +#define HDMI_MSM_AUDIO_ARCS(pclk, ...) { (1000 * (pclk)), __VA_ARGS__ } + +/* Audio constants lookup table for hdmi_msm_audio_acr_setup */ +/* Valid Pixel-Clock rates: 25.2MHz, 27MHz, 27.03MHz, 74.25MHz, 148.5MHz */ +static const struct hdmi_msm_audio_arcs acr_lut[] = { + /* 25.200MHz */ + HDMI_MSM_AUDIO_ARCS(25200, { + {4096, 25200}, {6272, 28000}, {6144, 25200}, {12544, 28000}, + {12288, 25200}, {25088, 28000}, {24576, 25200} }), + /* 27.000MHz */ + HDMI_MSM_AUDIO_ARCS(27000, { + {4096, 27000}, {6272, 30000}, {6144, 27000}, {12544, 30000}, + {12288, 27000}, {25088, 30000}, {24576, 27000} }), + /* 27.027MHz */ + HDMI_MSM_AUDIO_ARCS(27030, { + {4096, 27027}, {6272, 30030}, {6144, 27027}, {12544, 30030}, + {12288, 27027}, {25088, 30030}, {24576, 27027} }), + /* 74.250MHz */ + HDMI_MSM_AUDIO_ARCS(74250, { + {4096, 74250}, {6272, 82500}, {6144, 74250}, {12544, 82500}, + {12288, 74250}, {25088, 82500}, {24576, 74250} }), + /* 148.500MHz */ + HDMI_MSM_AUDIO_ARCS(148500, { + {4096, 148500}, {6272, 165000}, {6144, 148500}, {12544, 165000}, + {12288, 148500}, {25088, 165000}, {24576, 148500} }), +}; + +static const struct hdmi_msm_audio_arcs *get_arcs(unsigned long int pixclock) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(acr_lut); i++) { + const struct hdmi_msm_audio_arcs *arcs = &acr_lut[i]; + if (arcs->pixclock == pixclock) + return arcs; + } + + return NULL; +} + +int hdmi_audio_update(struct hdmi *hdmi) +{ + struct hdmi_audio *audio = &hdmi->audio; + struct hdmi_audio_infoframe *info = &audio->infoframe; + const struct hdmi_msm_audio_arcs *arcs = NULL; + bool enabled = audio->enabled; + uint32_t acr_pkt_ctrl, vbi_pkt_ctrl, aud_pkt_ctrl; + uint32_t infofrm_ctrl, audio_config; + + DBG("audio: enabled=%d, channels=%d, channel_allocation=0x%x, " + "level_shift_value=%d, downmix_inhibit=%d, rate=%d", + audio->enabled, info->channels, info->channel_allocation, + info->level_shift_value, info->downmix_inhibit, audio->rate); + DBG("video: power_on=%d, pixclock=%lu", hdmi->power_on, hdmi->pixclock); + + if (enabled && !(hdmi->power_on && hdmi->pixclock)) { + DBG("disabling audio: no video"); + enabled = false; + } + + if (enabled) { + arcs = get_arcs(hdmi->pixclock); + if (!arcs) { + DBG("disabling audio: unsupported pixclock: %lu", + hdmi->pixclock); + enabled = false; + } + } + + /* Read first before writing */ + acr_pkt_ctrl = hdmi_read(hdmi, REG_HDMI_ACR_PKT_CTRL); + vbi_pkt_ctrl = hdmi_read(hdmi, REG_HDMI_VBI_PKT_CTRL); + aud_pkt_ctrl = hdmi_read(hdmi, REG_HDMI_AUDIO_PKT_CTRL1); + infofrm_ctrl = hdmi_read(hdmi, REG_HDMI_INFOFRAME_CTRL0); + audio_config = hdmi_read(hdmi, REG_HDMI_AUDIO_CFG); + + /* Clear N/CTS selection bits */ + acr_pkt_ctrl &= ~HDMI_ACR_PKT_CTRL_SELECT__MASK; + + if (enabled) { + uint32_t n, cts, multiplier; + enum hdmi_acr_cts select; + uint8_t buf[14]; + + n = arcs->lut[audio->rate].n; + cts = arcs->lut[audio->rate].cts; + + if ((MSM_HDMI_SAMPLE_RATE_192KHZ == audio->rate) || + (MSM_HDMI_SAMPLE_RATE_176_4KHZ == audio->rate)) { + multiplier = 4; + n >>= 2; /* divide N by 4 and use multiplier */ + } else if ((MSM_HDMI_SAMPLE_RATE_96KHZ == audio->rate) || + (MSM_HDMI_SAMPLE_RATE_88_2KHZ == audio->rate)) { + multiplier = 2; + n >>= 1; /* divide N by 2 and use multiplier */ + } else { + multiplier = 1; + } + + DBG("n=%u, cts=%u, multiplier=%u", n, cts, multiplier); + + acr_pkt_ctrl |= HDMI_ACR_PKT_CTRL_SOURCE; + acr_pkt_ctrl |= HDMI_ACR_PKT_CTRL_AUDIO_PRIORITY; + acr_pkt_ctrl |= HDMI_ACR_PKT_CTRL_N_MULTIPLIER(multiplier); + + if ((MSM_HDMI_SAMPLE_RATE_48KHZ == audio->rate) || + (MSM_HDMI_SAMPLE_RATE_96KHZ == audio->rate) || + (MSM_HDMI_SAMPLE_RATE_192KHZ == audio->rate)) + select = ACR_48; + else if ((MSM_HDMI_SAMPLE_RATE_44_1KHZ == audio->rate) || + (MSM_HDMI_SAMPLE_RATE_88_2KHZ == audio->rate) || + (MSM_HDMI_SAMPLE_RATE_176_4KHZ == audio->rate)) + select = ACR_44; + else /* default to 32k */ + select = ACR_32; + + acr_pkt_ctrl |= HDMI_ACR_PKT_CTRL_SELECT(select); + + hdmi_write(hdmi, REG_HDMI_ACR_0(select - 1), + HDMI_ACR_0_CTS(cts)); + hdmi_write(hdmi, REG_HDMI_ACR_1(select - 1), + HDMI_ACR_1_N(n)); + + hdmi_write(hdmi, REG_HDMI_AUDIO_PKT_CTRL2, + COND(info->channels != 2, HDMI_AUDIO_PKT_CTRL2_LAYOUT) | + HDMI_AUDIO_PKT_CTRL2_OVERRIDE); + + acr_pkt_ctrl |= HDMI_ACR_PKT_CTRL_CONT; + acr_pkt_ctrl |= HDMI_ACR_PKT_CTRL_SEND; + + /* configure infoframe: */ + hdmi_audio_infoframe_pack(info, buf, sizeof(buf)); + hdmi_write(hdmi, REG_HDMI_AUDIO_INFO0, + (buf[3] << 0) || (buf[4] << 8) || + (buf[5] << 16) || (buf[6] << 24)); + hdmi_write(hdmi, REG_HDMI_AUDIO_INFO1, + (buf[7] << 0) || (buf[8] << 8)); + + hdmi_write(hdmi, REG_HDMI_GC, 0); + + vbi_pkt_ctrl |= HDMI_VBI_PKT_CTRL_GC_ENABLE; + vbi_pkt_ctrl |= HDMI_VBI_PKT_CTRL_GC_EVERY_FRAME; + + aud_pkt_ctrl |= HDMI_AUDIO_PKT_CTRL1_AUDIO_SAMPLE_SEND; + + infofrm_ctrl |= HDMI_INFOFRAME_CTRL0_AUDIO_INFO_SEND; + infofrm_ctrl |= HDMI_INFOFRAME_CTRL0_AUDIO_INFO_CONT; + infofrm_ctrl |= HDMI_INFOFRAME_CTRL0_AUDIO_INFO_SOURCE; + infofrm_ctrl |= HDMI_INFOFRAME_CTRL0_AUDIO_INFO_UPDATE; + + audio_config &= ~HDMI_AUDIO_CFG_FIFO_WATERMARK__MASK; + audio_config |= HDMI_AUDIO_CFG_FIFO_WATERMARK(4); + audio_config |= HDMI_AUDIO_CFG_ENGINE_ENABLE; + } else { + hdmi_write(hdmi, REG_HDMI_GC, HDMI_GC_MUTE); + acr_pkt_ctrl &= ~HDMI_ACR_PKT_CTRL_CONT; + acr_pkt_ctrl &= ~HDMI_ACR_PKT_CTRL_SEND; + vbi_pkt_ctrl &= ~HDMI_VBI_PKT_CTRL_GC_ENABLE; + vbi_pkt_ctrl &= ~HDMI_VBI_PKT_CTRL_GC_EVERY_FRAME; + aud_pkt_ctrl &= ~HDMI_AUDIO_PKT_CTRL1_AUDIO_SAMPLE_SEND; + infofrm_ctrl &= ~HDMI_INFOFRAME_CTRL0_AUDIO_INFO_SEND; + infofrm_ctrl &= ~HDMI_INFOFRAME_CTRL0_AUDIO_INFO_CONT; + infofrm_ctrl &= ~HDMI_INFOFRAME_CTRL0_AUDIO_INFO_SOURCE; + infofrm_ctrl &= ~HDMI_INFOFRAME_CTRL0_AUDIO_INFO_UPDATE; + audio_config &= ~HDMI_AUDIO_CFG_ENGINE_ENABLE; + } + + hdmi_write(hdmi, REG_HDMI_ACR_PKT_CTRL, acr_pkt_ctrl); + hdmi_write(hdmi, REG_HDMI_VBI_PKT_CTRL, vbi_pkt_ctrl); + hdmi_write(hdmi, REG_HDMI_AUDIO_PKT_CTRL1, aud_pkt_ctrl); + hdmi_write(hdmi, REG_HDMI_INFOFRAME_CTRL0, infofrm_ctrl); + + hdmi_write(hdmi, REG_HDMI_AUD_INT, + COND(enabled, HDMI_AUD_INT_AUD_FIFO_URUN_INT) | + COND(enabled, HDMI_AUD_INT_AUD_SAM_DROP_INT)); + + hdmi_write(hdmi, REG_HDMI_AUDIO_CFG, audio_config); + + + DBG("audio %sabled", enabled ? "en" : "dis"); + + return 0; +} + +int hdmi_audio_info_setup(struct hdmi *hdmi, bool enabled, + uint32_t num_of_channels, uint32_t channel_allocation, + uint32_t level_shift, bool down_mix) +{ + struct hdmi_audio *audio; + + if (!hdmi) + return -ENXIO; + + audio = &hdmi->audio; + + if (num_of_channels >= ARRAY_SIZE(nchannels)) + return -EINVAL; + + audio->enabled = enabled; + audio->infoframe.channels = nchannels[num_of_channels]; + audio->infoframe.channel_allocation = channel_allocation; + audio->infoframe.level_shift_value = level_shift; + audio->infoframe.downmix_inhibit = down_mix; + + return hdmi_audio_update(hdmi); +} + +void hdmi_audio_set_sample_rate(struct hdmi *hdmi, int rate) +{ + struct hdmi_audio *audio; + + if (!hdmi) + return; + + audio = &hdmi->audio; + + if ((rate < 0) || (rate >= MSM_HDMI_SAMPLE_RATE_MAX)) + return; + + audio->rate = rate; + hdmi_audio_update(hdmi); +} diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c b/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c index 7d10e55403c..f6cf745c249 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c @@ -19,11 +19,7 @@ struct hdmi_bridge { struct drm_bridge base; - struct hdmi *hdmi; - bool power_on; - - unsigned long int pixclock; }; #define to_hdmi_bridge(x) container_of(x, struct hdmi_bridge, base) @@ -52,8 +48,8 @@ static void power_on(struct drm_bridge *bridge) } if (config->pwr_clk_cnt > 0) { - DBG("pixclock: %lu", hdmi_bridge->pixclock); - ret = clk_set_rate(hdmi->pwr_clks[0], hdmi_bridge->pixclock); + DBG("pixclock: %lu", hdmi->pixclock); + ret = clk_set_rate(hdmi->pwr_clks[0], hdmi->pixclock); if (ret) { dev_err(dev->dev, "failed to set pixel clk: %s (%d)\n", config->pwr_clk_names[0], ret); @@ -102,12 +98,13 @@ static void hdmi_bridge_pre_enable(struct drm_bridge *bridge) DBG("power up"); - if (!hdmi_bridge->power_on) { + if (!hdmi->power_on) { power_on(bridge); - hdmi_bridge->power_on = true; + hdmi->power_on = true; + hdmi_audio_update(hdmi); } - phy->funcs->powerup(phy, hdmi_bridge->pixclock); + phy->funcs->powerup(phy, hdmi->pixclock); hdmi_set_mode(hdmi, true); } @@ -129,9 +126,10 @@ static void hdmi_bridge_post_disable(struct drm_bridge *bridge) hdmi_set_mode(hdmi, false); phy->funcs->powerdown(phy); - if (hdmi_bridge->power_on) { + if (hdmi->power_on) { power_off(bridge); - hdmi_bridge->power_on = false; + hdmi->power_on = false; + hdmi_audio_update(hdmi); } } @@ -146,7 +144,7 @@ static void hdmi_bridge_mode_set(struct drm_bridge *bridge, mode = adjusted_mode; - hdmi_bridge->pixclock = mode->clock * 1000; + hdmi->pixclock = mode->clock * 1000; hdmi->hdmi_mode = drm_match_cea_mode(mode) > 1; @@ -194,9 +192,7 @@ static void hdmi_bridge_mode_set(struct drm_bridge *bridge, DBG("frame_ctrl=%08x", frame_ctrl); hdmi_write(hdmi, REG_HDMI_FRAME_CTRL, frame_ctrl); - // TODO until we have audio, this might be safest: - if (hdmi->hdmi_mode) - hdmi_write(hdmi, REG_HDMI_GC, HDMI_GC_MUTE); + hdmi_audio_update(hdmi); } static const struct drm_bridge_funcs hdmi_bridge_funcs = { diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c index 7dedfdd1207..28f7e3ec6c2 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c @@ -127,6 +127,14 @@ static int hpd_enable(struct hdmi_connector *hdmi_connector) } for (i = 0; i < config->hpd_clk_cnt; i++) { + if (config->hpd_freq && config->hpd_freq[i]) { + ret = clk_set_rate(hdmi->hpd_clks[i], + config->hpd_freq[i]); + if (ret) + dev_warn(dev->dev, "failed to set clk %s (%d)\n", + config->hpd_clk_names[i], ret); + } + ret = clk_prepare_enable(hdmi->hpd_clks[i]); if (ret) { dev_err(dev->dev, "failed to enable hpd clk: %s (%d)\n", @@ -247,36 +255,49 @@ void hdmi_connector_irq(struct drm_connector *connector) } } +static enum drm_connector_status detect_reg(struct hdmi *hdmi) +{ + uint32_t hpd_int_status = hdmi_read(hdmi, REG_HDMI_HPD_INT_STATUS); + return (hpd_int_status & HDMI_HPD_INT_STATUS_CABLE_DETECTED) ? + connector_status_connected : connector_status_disconnected; +} + +static enum drm_connector_status detect_gpio(struct hdmi *hdmi) +{ + const struct hdmi_platform_config *config = hdmi->config; + return gpio_get_value(config->hpd_gpio) ? + connector_status_connected : + connector_status_disconnected; +} + static enum drm_connector_status hdmi_connector_detect( struct drm_connector *connector, bool force) { struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector); struct hdmi *hdmi = hdmi_connector->hdmi; - const struct hdmi_platform_config *config = hdmi->config; - uint32_t hpd_int_status; + enum drm_connector_status stat_gpio, stat_reg; int retry = 20; - hpd_int_status = hdmi_read(hdmi, REG_HDMI_HPD_INT_STATUS); + do { + stat_gpio = detect_gpio(hdmi); + stat_reg = detect_reg(hdmi); - /* sense seems to in some cases be momentarily de-asserted, don't - * let that trick us into thinking the monitor is gone: - */ - while (retry-- && !(hpd_int_status & HDMI_HPD_INT_STATUS_CABLE_DETECTED)) { - /* hdmi debounce logic seems to get stuck sometimes, - * read directly the gpio to get a second opinion: - */ - if (gpio_get_value(config->hpd_gpio)) { - DBG("gpio tells us we are connected!"); - hpd_int_status |= HDMI_HPD_INT_STATUS_CABLE_DETECTED; + if (stat_gpio == stat_reg) break; - } + mdelay(10); - hpd_int_status = hdmi_read(hdmi, REG_HDMI_HPD_INT_STATUS); - DBG("status=%08x", hpd_int_status); + } while (--retry); + + /* the status we get from reading gpio seems to be more reliable, + * so trust that one the most if we didn't manage to get hdmi and + * gpio status to agree: + */ + if (stat_gpio != stat_reg) { + DBG("HDMI_HPD_INT_STATUS tells us: %d", stat_reg); + DBG("hpd gpio tells us: %d", stat_gpio); } - return (hpd_int_status & HDMI_HPD_INT_STATUS_CABLE_DETECTED) ? - connector_status_connected : connector_status_disconnected; + return stat_gpio; } static void hdmi_connector_destroy(struct drm_connector *connector) @@ -389,7 +410,8 @@ struct drm_connector *hdmi_connector_init(struct hdmi *hdmi) DRM_MODE_CONNECTOR_HDMIA); drm_connector_helper_add(connector, &hdmi_connector_helper_funcs); - connector->polled = DRM_CONNECTOR_POLL_HPD; + connector->polled = DRM_CONNECTOR_POLL_CONNECT | + DRM_CONNECTOR_POLL_DISCONNECT; connector->interlace_allowed = 1; connector->doublescan_allowed = 0; diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c index 1964f4f0d45..74cebb51e8c 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c @@ -39,6 +39,7 @@ struct mdp4_crtc { spinlock_t lock; bool stale; uint32_t width, height; + uint32_t x, y; /* next cursor to scan-out: */ uint32_t next_iova; @@ -57,9 +58,16 @@ struct mdp4_crtc { #define PENDING_FLIP 0x2 atomic_t pending; - /* the fb that we currently hold a scanout ref to: */ + /* the fb that we logically (from PoV of KMS API) hold a ref + * to. Which we may not yet be scanning out (we may still + * be scanning out previous in case of page_flip while waiting + * for gpu rendering to complete: + */ struct drm_framebuffer *fb; + /* the fb that we currently hold a scanout ref to: */ + struct drm_framebuffer *scanout_fb; + /* for unref'ing framebuffers after scanout completes: */ struct drm_flip_work unref_fb_work; @@ -77,24 +85,73 @@ static struct mdp4_kms *get_kms(struct drm_crtc *crtc) return to_mdp4_kms(to_mdp_kms(priv->kms)); } -static void update_fb(struct drm_crtc *crtc, bool async, - struct drm_framebuffer *new_fb) +static void request_pending(struct drm_crtc *crtc, uint32_t pending) { struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); - struct drm_framebuffer *old_fb = mdp4_crtc->fb; - if (old_fb) - drm_flip_work_queue(&mdp4_crtc->unref_fb_work, old_fb); + atomic_or(pending, &mdp4_crtc->pending); + mdp_irq_register(&get_kms(crtc)->base, &mdp4_crtc->vblank); +} + +static void crtc_flush(struct drm_crtc *crtc) +{ + struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); + struct mdp4_kms *mdp4_kms = get_kms(crtc); + uint32_t i, flush = 0; + + for (i = 0; i < ARRAY_SIZE(mdp4_crtc->planes); i++) { + struct drm_plane *plane = mdp4_crtc->planes[i]; + if (plane) { + enum mdp4_pipe pipe_id = mdp4_plane_pipe(plane); + flush |= pipe2flush(pipe_id); + } + } + flush |= ovlp2flush(mdp4_crtc->ovlp); + + DBG("%s: flush=%08x", mdp4_crtc->name, flush); + + mdp4_write(mdp4_kms, REG_MDP4_OVERLAY_FLUSH, flush); +} + +static void update_fb(struct drm_crtc *crtc, struct drm_framebuffer *new_fb) +{ + struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); + struct drm_framebuffer *old_fb = mdp4_crtc->fb; /* grab reference to incoming scanout fb: */ drm_framebuffer_reference(new_fb); - mdp4_crtc->base.fb = new_fb; + mdp4_crtc->base.primary->fb = new_fb; mdp4_crtc->fb = new_fb; - if (!async) { - /* enable vblank to pick up the old_fb */ - mdp_irq_register(&get_kms(crtc)->base, &mdp4_crtc->vblank); - } + if (old_fb) + drm_flip_work_queue(&mdp4_crtc->unref_fb_work, old_fb); +} + +/* unlike update_fb(), take a ref to the new scanout fb *before* updating + * plane, then call this. Needed to ensure we don't unref the buffer that + * is actually still being scanned out. + * + * Note that this whole thing goes away with atomic.. since we can defer + * calling into driver until rendering is done. + */ +static void update_scanout(struct drm_crtc *crtc, struct drm_framebuffer *fb) +{ + struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); + + /* flush updates, to make sure hw is updated to new scanout fb, + * so that we can safely queue unref to current fb (ie. next + * vblank we know hw is done w/ previous scanout_fb). + */ + crtc_flush(crtc); + + if (mdp4_crtc->scanout_fb) + drm_flip_work_queue(&mdp4_crtc->unref_fb_work, + mdp4_crtc->scanout_fb); + + mdp4_crtc->scanout_fb = fb; + + /* enable vblank to complete flip: */ + request_pending(crtc, PENDING_FLIP); } /* if file!=NULL, this is preclose potential cancel-flip path */ @@ -120,49 +177,19 @@ static void complete_flip(struct drm_crtc *crtc, struct drm_file *file) spin_unlock_irqrestore(&dev->event_lock, flags); } -static void crtc_flush(struct drm_crtc *crtc) -{ - struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); - struct mdp4_kms *mdp4_kms = get_kms(crtc); - uint32_t i, flush = 0; - - for (i = 0; i < ARRAY_SIZE(mdp4_crtc->planes); i++) { - struct drm_plane *plane = mdp4_crtc->planes[i]; - if (plane) { - enum mdp4_pipe pipe_id = mdp4_plane_pipe(plane); - flush |= pipe2flush(pipe_id); - } - } - flush |= ovlp2flush(mdp4_crtc->ovlp); - - DBG("%s: flush=%08x", mdp4_crtc->name, flush); - - mdp4_write(mdp4_kms, REG_MDP4_OVERLAY_FLUSH, flush); -} - -static void request_pending(struct drm_crtc *crtc, uint32_t pending) -{ - struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); - - atomic_or(pending, &mdp4_crtc->pending); - mdp_irq_register(&get_kms(crtc)->base, &mdp4_crtc->vblank); -} - static void pageflip_cb(struct msm_fence_cb *cb) { struct mdp4_crtc *mdp4_crtc = container_of(cb, struct mdp4_crtc, pageflip_cb); struct drm_crtc *crtc = &mdp4_crtc->base; - struct drm_framebuffer *fb = crtc->fb; + struct drm_framebuffer *fb = crtc->primary->fb; if (!fb) return; + drm_framebuffer_reference(fb); mdp4_plane_set_scanout(mdp4_crtc->plane, fb); - crtc_flush(crtc); - - /* enable vblank to complete flip: */ - request_pending(crtc, PENDING_FLIP); + update_scanout(crtc, fb); } static void unref_fb_worker(struct drm_flip_work *work, void *val) @@ -190,8 +217,6 @@ static void mdp4_crtc_destroy(struct drm_crtc *crtc) { struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); - mdp4_crtc->plane->funcs->destroy(mdp4_crtc->plane); - drm_crtc_cleanup(crtc); drm_flip_work_cleanup(&mdp4_crtc->unref_fb_work); drm_flip_work_cleanup(&mdp4_crtc->unref_cursor_work); @@ -320,6 +345,20 @@ static int mdp4_crtc_mode_set(struct drm_crtc *crtc, mode->vsync_end, mode->vtotal, mode->type, mode->flags); + /* grab extra ref for update_scanout() */ + drm_framebuffer_reference(crtc->primary->fb); + + ret = mdp4_plane_mode_set(mdp4_crtc->plane, crtc, crtc->primary->fb, + 0, 0, mode->hdisplay, mode->vdisplay, + x << 16, y << 16, + mode->hdisplay << 16, mode->vdisplay << 16); + if (ret) { + drm_framebuffer_unreference(crtc->primary->fb); + dev_err(crtc->dev->dev, "%s: failed to set mode on plane: %d\n", + mdp4_crtc->name, ret); + return ret; + } + mdp4_write(mdp4_kms, REG_MDP4_DMA_SRC_SIZE(dma), MDP4_DMA_SRC_SIZE_WIDTH(mode->hdisplay) | MDP4_DMA_SRC_SIZE_HEIGHT(mode->vdisplay)); @@ -327,7 +366,7 @@ static int mdp4_crtc_mode_set(struct drm_crtc *crtc, /* take data from pipe: */ mdp4_write(mdp4_kms, REG_MDP4_DMA_SRC_BASE(dma), 0); mdp4_write(mdp4_kms, REG_MDP4_DMA_SRC_STRIDE(dma), - crtc->fb->pitches[0]); + crtc->primary->fb->pitches[0]); mdp4_write(mdp4_kms, REG_MDP4_DMA_DST_SIZE(dma), MDP4_DMA_DST_SIZE_WIDTH(0) | MDP4_DMA_DST_SIZE_HEIGHT(0)); @@ -337,28 +376,19 @@ static int mdp4_crtc_mode_set(struct drm_crtc *crtc, MDP4_OVLP_SIZE_WIDTH(mode->hdisplay) | MDP4_OVLP_SIZE_HEIGHT(mode->vdisplay)); mdp4_write(mdp4_kms, REG_MDP4_OVLP_STRIDE(ovlp), - crtc->fb->pitches[0]); + crtc->primary->fb->pitches[0]); mdp4_write(mdp4_kms, REG_MDP4_OVLP_CFG(ovlp), 1); - update_fb(crtc, false, crtc->fb); - - ret = mdp4_plane_mode_set(mdp4_crtc->plane, crtc, crtc->fb, - 0, 0, mode->hdisplay, mode->vdisplay, - x << 16, y << 16, - mode->hdisplay << 16, mode->vdisplay << 16); - if (ret) { - dev_err(crtc->dev->dev, "%s: failed to set mode on plane: %d\n", - mdp4_crtc->name, ret); - return ret; - } - if (dma == DMA_E) { mdp4_write(mdp4_kms, REG_MDP4_DMA_E_QUANT(0), 0x00ff0000); mdp4_write(mdp4_kms, REG_MDP4_DMA_E_QUANT(1), 0x00ff0000); mdp4_write(mdp4_kms, REG_MDP4_DMA_E_QUANT(2), 0x00ff0000); } + update_fb(crtc, crtc->primary->fb); + update_scanout(crtc, crtc->primary->fb); + return 0; } @@ -385,13 +415,24 @@ static int mdp4_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y, struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); struct drm_plane *plane = mdp4_crtc->plane; struct drm_display_mode *mode = &crtc->mode; + int ret; - update_fb(crtc, false, crtc->fb); + /* grab extra ref for update_scanout() */ + drm_framebuffer_reference(crtc->primary->fb); - return mdp4_plane_mode_set(plane, crtc, crtc->fb, + ret = mdp4_plane_mode_set(plane, crtc, crtc->primary->fb, 0, 0, mode->hdisplay, mode->vdisplay, x << 16, y << 16, mode->hdisplay << 16, mode->vdisplay << 16); + if (ret) { + drm_framebuffer_unreference(crtc->primary->fb); + return ret; + } + + update_fb(crtc, crtc->primary->fb); + update_scanout(crtc, crtc->primary->fb); + + return 0; } static void mdp4_crtc_load_lut(struct drm_crtc *crtc) @@ -419,7 +460,7 @@ static int mdp4_crtc_page_flip(struct drm_crtc *crtc, mdp4_crtc->event = event; spin_unlock_irqrestore(&dev->event_lock, flags); - update_fb(crtc, true, new_fb); + update_fb(crtc, new_fb); return msm_gem_queue_inactive_cb(obj, &mdp4_crtc->pageflip_cb); } @@ -442,12 +483,12 @@ static int mdp4_crtc_set_property(struct drm_crtc *crtc, static void update_cursor(struct drm_crtc *crtc) { struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); + struct mdp4_kms *mdp4_kms = get_kms(crtc); enum mdp4_dma dma = mdp4_crtc->dma; unsigned long flags; spin_lock_irqsave(&mdp4_crtc->cursor.lock, flags); if (mdp4_crtc->cursor.stale) { - struct mdp4_kms *mdp4_kms = get_kms(crtc); struct drm_gem_object *next_bo = mdp4_crtc->cursor.next_bo; struct drm_gem_object *prev_bo = mdp4_crtc->cursor.scanout_bo; uint32_t iova = mdp4_crtc->cursor.next_iova; @@ -467,9 +508,8 @@ static void update_cursor(struct drm_crtc *crtc) MDP4_DMA_CURSOR_BLEND_CONFIG_CURSOR_EN); } else { /* disable cursor: */ - mdp4_write(mdp4_kms, REG_MDP4_DMA_CURSOR_BASE(dma), 0); - mdp4_write(mdp4_kms, REG_MDP4_DMA_CURSOR_BLEND_CONFIG(dma), - MDP4_DMA_CURSOR_BLEND_CONFIG_FORMAT(CURSOR_ARGB)); + mdp4_write(mdp4_kms, REG_MDP4_DMA_CURSOR_BASE(dma), + mdp4_kms->blank_cursor_iova); } /* and drop the iova ref + obj rev when done scanning out: */ @@ -479,6 +519,11 @@ static void update_cursor(struct drm_crtc *crtc) mdp4_crtc->cursor.scanout_bo = next_bo; mdp4_crtc->cursor.stale = false; } + + mdp4_write(mdp4_kms, REG_MDP4_DMA_CURSOR_POS(dma), + MDP4_DMA_CURSOR_POS_X(mdp4_crtc->cursor.x) | + MDP4_DMA_CURSOR_POS_Y(mdp4_crtc->cursor.y)); + spin_unlock_irqrestore(&mdp4_crtc->cursor.lock, flags); } @@ -526,8 +571,7 @@ static int mdp4_crtc_cursor_set(struct drm_crtc *crtc, if (old_bo) { /* drop our previous reference: */ - msm_gem_put_iova(old_bo, mdp4_kms->id); - drm_gem_object_unreference_unlocked(old_bo); + drm_flip_work_queue(&mdp4_crtc->unref_cursor_work, old_bo); } request_pending(crtc, PENDING_CURSOR); @@ -542,12 +586,15 @@ fail: static int mdp4_crtc_cursor_move(struct drm_crtc *crtc, int x, int y) { struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc); - struct mdp4_kms *mdp4_kms = get_kms(crtc); - enum mdp4_dma dma = mdp4_crtc->dma; + unsigned long flags; - mdp4_write(mdp4_kms, REG_MDP4_DMA_CURSOR_POS(dma), - MDP4_DMA_CURSOR_POS_X(x) | - MDP4_DMA_CURSOR_POS_Y(y)); + spin_lock_irqsave(&mdp4_crtc->cursor.lock, flags); + mdp4_crtc->cursor.x = x; + mdp4_crtc->cursor.y = y; + spin_unlock_irqrestore(&mdp4_crtc->cursor.lock, flags); + + crtc_flush(crtc); + request_pending(crtc, PENDING_CURSOR); return 0; } @@ -688,6 +735,9 @@ void mdp4_crtc_attach(struct drm_crtc *crtc, struct drm_plane *plane) void mdp4_crtc_detach(struct drm_crtc *crtc, struct drm_plane *plane) { + /* don't actually detatch our primary plane: */ + if (to_mdp4_crtc(crtc)->plane == plane) + return; set_attach(crtc, mdp4_plane_pipe(plane), NULL); } @@ -713,6 +763,7 @@ struct drm_crtc *mdp4_crtc_init(struct drm_device *dev, crtc = &mdp4_crtc->base; mdp4_crtc->plane = plane; + mdp4_crtc->id = id; mdp4_crtc->ovlp = ovlp_id; mdp4_crtc->dma = dma_id; @@ -738,7 +789,7 @@ struct drm_crtc *mdp4_crtc_init(struct drm_device *dev, INIT_FENCE_CB(&mdp4_crtc->pageflip_cb, pageflip_cb); - drm_crtc_init(dev, crtc, &mdp4_crtc_funcs); + drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp4_crtc_funcs); drm_crtc_helper_add(crtc, &mdp4_crtc_helper_funcs); mdp4_plane_install_properties(mdp4_crtc->plane, &crtc->base); diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_irq.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_irq.c index c740ccd1cc6..8edd531cb62 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_irq.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_irq.c @@ -70,12 +70,12 @@ irqreturn_t mdp4_irq(struct msm_kms *kms) VERB("status=%08x", status); + mdp_dispatch_irqs(mdp_kms, status); + for (id = 0; id < priv->num_crtcs; id++) if (status & mdp4_crtc_vblank(priv->crtcs[id])) drm_handle_vblank(dev, id); - mdp_dispatch_irqs(mdp_kms, status); - return IRQ_HANDLED; } diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c index 272e707c948..0bb4faa1752 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c @@ -144,6 +144,10 @@ static void mdp4_preclose(struct msm_kms *kms, struct drm_file *file) static void mdp4_destroy(struct msm_kms *kms) { struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms)); + if (mdp4_kms->blank_cursor_iova) + msm_gem_put_iova(mdp4_kms->blank_cursor_bo, mdp4_kms->id); + if (mdp4_kms->blank_cursor_bo) + drm_gem_object_unreference(mdp4_kms->blank_cursor_bo); kfree(mdp4_kms); } @@ -372,6 +376,23 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev) goto fail; } + mutex_lock(&dev->struct_mutex); + mdp4_kms->blank_cursor_bo = msm_gem_new(dev, SZ_16K, MSM_BO_WC); + mutex_unlock(&dev->struct_mutex); + if (IS_ERR(mdp4_kms->blank_cursor_bo)) { + ret = PTR_ERR(mdp4_kms->blank_cursor_bo); + dev_err(dev->dev, "could not allocate blank-cursor bo: %d\n", ret); + mdp4_kms->blank_cursor_bo = NULL; + goto fail; + } + + ret = msm_gem_get_iova(mdp4_kms->blank_cursor_bo, mdp4_kms->id, + &mdp4_kms->blank_cursor_iova); + if (ret) { + dev_err(dev->dev, "could not pin blank-cursor bo: %d\n", ret); + goto fail; + } + return kms; fail: diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h index 66a4d31aec8..715520c54cd 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h @@ -44,6 +44,10 @@ struct mdp4_kms { struct clk *lut_clk; struct mdp_irq error_handler; + + /* empty/blank cursor bo to use when cursor is "disabled" */ + struct drm_gem_object *blank_cursor_bo; + uint32_t blank_cursor_iova; }; #define to_mdp4_kms(x) container_of(x, struct mdp4_kms, base) diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c index 2406027200e..66f33dba1eb 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c @@ -170,8 +170,8 @@ int mdp4_plane_mode_set(struct drm_plane *plane, MDP4_PIPE_DST_SIZE_HEIGHT(crtc_h)); mdp4_write(mdp4_kms, REG_MDP4_PIPE_DST_XY(pipe), - MDP4_PIPE_SRC_XY_X(crtc_x) | - MDP4_PIPE_SRC_XY_Y(crtc_y)); + MDP4_PIPE_DST_XY_X(crtc_x) | + MDP4_PIPE_DST_XY_Y(crtc_y)); mdp4_plane_set_scanout(plane, fb); @@ -222,6 +222,7 @@ struct drm_plane *mdp4_plane_init(struct drm_device *dev, struct drm_plane *plane = NULL; struct mdp4_plane *mdp4_plane; int ret; + enum drm_plane_type type; mdp4_plane = kzalloc(sizeof(*mdp4_plane), GFP_KERNEL); if (!mdp4_plane) { @@ -237,9 +238,10 @@ struct drm_plane *mdp4_plane_init(struct drm_device *dev, mdp4_plane->nformats = mdp4_get_formats(pipe_id, mdp4_plane->formats, ARRAY_SIZE(mdp4_plane->formats)); - drm_plane_init(dev, plane, 0xff, &mdp4_plane_funcs, - mdp4_plane->formats, mdp4_plane->nformats, - private_plane); + type = private_plane ? DRM_PLANE_TYPE_PRIMARY : DRM_PLANE_TYPE_OVERLAY; + drm_universal_plane_init(dev, plane, 0xff, &mdp4_plane_funcs, + mdp4_plane->formats, mdp4_plane->nformats, + type); mdp4_plane_install_properties(plane, &plane->base); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c index 71a3b2345eb..ebe2e60f3ab 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c @@ -102,7 +102,7 @@ static void update_fb(struct drm_crtc *crtc, struct drm_framebuffer *new_fb) /* grab reference to incoming scanout fb: */ drm_framebuffer_reference(new_fb); - mdp5_crtc->base.fb = new_fb; + mdp5_crtc->base.primary->fb = new_fb; mdp5_crtc->fb = new_fb; if (old_fb) @@ -195,8 +195,6 @@ static void mdp5_crtc_destroy(struct drm_crtc *crtc) { struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc); - mdp5_crtc->plane->funcs->destroy(mdp5_crtc->plane); - drm_crtc_cleanup(crtc); drm_flip_work_cleanup(&mdp5_crtc->unref_fb_work); @@ -289,13 +287,14 @@ static int mdp5_crtc_mode_set(struct drm_crtc *crtc, mode->type, mode->flags); /* grab extra ref for update_scanout() */ - drm_framebuffer_reference(crtc->fb); + drm_framebuffer_reference(crtc->primary->fb); - ret = mdp5_plane_mode_set(mdp5_crtc->plane, crtc, crtc->fb, + ret = mdp5_plane_mode_set(mdp5_crtc->plane, crtc, crtc->primary->fb, 0, 0, mode->hdisplay, mode->vdisplay, x << 16, y << 16, mode->hdisplay << 16, mode->vdisplay << 16); if (ret) { + drm_framebuffer_unreference(crtc->primary->fb); dev_err(crtc->dev->dev, "%s: failed to set mode on plane: %d\n", mdp5_crtc->name, ret); return ret; @@ -305,8 +304,8 @@ static int mdp5_crtc_mode_set(struct drm_crtc *crtc, MDP5_LM_OUT_SIZE_WIDTH(mode->hdisplay) | MDP5_LM_OUT_SIZE_HEIGHT(mode->vdisplay)); - update_fb(crtc, crtc->fb); - update_scanout(crtc, crtc->fb); + update_fb(crtc, crtc->primary->fb); + update_scanout(crtc, crtc->primary->fb); return 0; } @@ -337,17 +336,21 @@ static int mdp5_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y, int ret; /* grab extra ref for update_scanout() */ - drm_framebuffer_reference(crtc->fb); + drm_framebuffer_reference(crtc->primary->fb); - ret = mdp5_plane_mode_set(plane, crtc, crtc->fb, + ret = mdp5_plane_mode_set(plane, crtc, crtc->primary->fb, 0, 0, mode->hdisplay, mode->vdisplay, x << 16, y << 16, mode->hdisplay << 16, mode->vdisplay << 16); + if (ret) { + drm_framebuffer_unreference(crtc->primary->fb); + return ret; + } - update_fb(crtc, crtc->fb); - update_scanout(crtc, crtc->fb); + update_fb(crtc, crtc->primary->fb); + update_scanout(crtc, crtc->primary->fb); - return ret; + return 0; } static void mdp5_crtc_load_lut(struct drm_crtc *crtc) @@ -519,6 +522,9 @@ void mdp5_crtc_attach(struct drm_crtc *crtc, struct drm_plane *plane) void mdp5_crtc_detach(struct drm_crtc *crtc, struct drm_plane *plane) { + /* don't actually detatch our primary plane: */ + if (to_mdp5_crtc(crtc)->plane == plane) + return; set_attach(crtc, mdp5_plane_pipe(plane), NULL); } @@ -554,7 +560,7 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev, INIT_FENCE_CB(&mdp5_crtc->pageflip_cb, pageflip_cb); - drm_crtc_init(dev, crtc, &mdp5_crtc_funcs); + drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp5_crtc_funcs); drm_crtc_helper_add(crtc, &mdp5_crtc_helper_funcs); mdp5_plane_install_properties(mdp5_crtc->plane, &crtc->base); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c index 353d494a497..f2b985bc2ad 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c @@ -71,11 +71,11 @@ static void mdp5_irq_mdp(struct mdp_kms *mdp_kms) VERB("status=%08x", status); + mdp_dispatch_irqs(mdp_kms, status); + for (id = 0; id < priv->num_crtcs; id++) if (status & mdp5_crtc_vblank(priv->crtcs[id])) drm_handle_vblank(dev, id); - - mdp_dispatch_irqs(mdp_kms, status); } irqreturn_t mdp5_irq(struct msm_kms *kms) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c index ee8446c1b5f..71510ee26e9 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c @@ -20,6 +20,10 @@ #include "msm_mmu.h" #include "mdp5_kms.h" +static const char *iommu_ports[] = { + "mdp_0", +}; + static struct mdp5_platform_config *mdp5_get_config(struct platform_device *dev); static int mdp5_hw_init(struct msm_kms *kms) @@ -104,6 +108,12 @@ static void mdp5_preclose(struct msm_kms *kms, struct drm_file *file) static void mdp5_destroy(struct msm_kms *kms) { struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms)); + struct msm_mmu *mmu = mdp5_kms->mmu; + + if (mmu) { + mmu->funcs->detach(mmu, iommu_ports, ARRAY_SIZE(iommu_ports)); + mmu->funcs->destroy(mmu); + } kfree(mdp5_kms); } @@ -216,10 +226,6 @@ fail: return ret; } -static const char *iommu_ports[] = { - "mdp_0", -}; - static int get_clk(struct platform_device *pdev, struct clk **clkp, const char *name) { @@ -280,12 +286,22 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev) goto fail; } - ret = get_clk(pdev, &mdp5_kms->axi_clk, "bus_clk") || - get_clk(pdev, &mdp5_kms->ahb_clk, "iface_clk") || - get_clk(pdev, &mdp5_kms->src_clk, "core_clk_src") || - get_clk(pdev, &mdp5_kms->core_clk, "core_clk") || - get_clk(pdev, &mdp5_kms->lut_clk, "lut_clk") || - get_clk(pdev, &mdp5_kms->vsync_clk, "vsync_clk"); + ret = get_clk(pdev, &mdp5_kms->axi_clk, "bus_clk"); + if (ret) + goto fail; + ret = get_clk(pdev, &mdp5_kms->ahb_clk, "iface_clk"); + if (ret) + goto fail; + ret = get_clk(pdev, &mdp5_kms->src_clk, "core_clk_src"); + if (ret) + goto fail; + ret = get_clk(pdev, &mdp5_kms->core_clk, "core_clk"); + if (ret) + goto fail; + ret = get_clk(pdev, &mdp5_kms->lut_clk, "lut_clk"); + if (ret) + goto fail; + ret = get_clk(pdev, &mdp5_kms->vsync_clk, "vsync_clk"); if (ret) goto fail; @@ -307,17 +323,23 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev) mmu = msm_iommu_new(dev, config->iommu); if (IS_ERR(mmu)) { ret = PTR_ERR(mmu); + dev_err(dev->dev, "failed to init iommu: %d\n", ret); goto fail; } + ret = mmu->funcs->attach(mmu, iommu_ports, ARRAY_SIZE(iommu_ports)); - if (ret) + if (ret) { + dev_err(dev->dev, "failed to attach iommu: %d\n", ret); + mmu->funcs->destroy(mmu); goto fail; + } } else { dev_info(dev->dev, "no iommu, fallback to phys " "contig buffers for scanout\n"); mmu = NULL; } + mdp5_kms->mmu = mmu; mdp5_kms->id = msm_register_mmu(dev, mmu); if (mdp5_kms->id < 0) { diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h index c8b1a2522c2..6e981b692d1 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h @@ -33,6 +33,7 @@ struct mdp5_kms { /* mapper-id used to request GEM buffer mapped for scanout: */ int id; + struct msm_mmu *mmu; /* for tracking smp allocation amongst pipes: */ mdp5_smp_state_t smp_state; diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c index 0ac8bb5e7e8..f3daec4412a 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c @@ -85,8 +85,11 @@ static int mdp5_plane_disable(struct drm_plane *plane) static void mdp5_plane_destroy(struct drm_plane *plane) { struct mdp5_plane *mdp5_plane = to_mdp5_plane(plane); + struct msm_drm_private *priv = plane->dev->dev_private; + + if (priv->kms) + mdp5_plane_disable(plane); - mdp5_plane_disable(plane); drm_plane_cleanup(plane); kfree(mdp5_plane); @@ -358,6 +361,7 @@ struct drm_plane *mdp5_plane_init(struct drm_device *dev, struct drm_plane *plane = NULL; struct mdp5_plane *mdp5_plane; int ret; + enum drm_plane_type type; mdp5_plane = kzalloc(sizeof(*mdp5_plane), GFP_KERNEL); if (!mdp5_plane) { @@ -373,9 +377,10 @@ struct drm_plane *mdp5_plane_init(struct drm_device *dev, mdp5_plane->nformats = mdp5_get_formats(pipe, mdp5_plane->formats, ARRAY_SIZE(mdp5_plane->formats)); - drm_plane_init(dev, plane, 0xff, &mdp5_plane_funcs, - mdp5_plane->formats, mdp5_plane->nformats, - private_plane); + type = private_plane ? DRM_PLANE_TYPE_PRIMARY : DRM_PLANE_TYPE_OVERLAY; + drm_universal_plane_init(dev, plane, 0xff, &mdp5_plane_funcs, + mdp5_plane->formats, mdp5_plane->nformats, + type); mdp5_plane_install_properties(plane, &plane->base); diff --git a/drivers/gpu/drm/msm/mdp/mdp_kms.c b/drivers/gpu/drm/msm/mdp/mdp_kms.c index 3be48f7c36b..03455b64a24 100644 --- a/drivers/gpu/drm/msm/mdp/mdp_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp_kms.c @@ -101,7 +101,8 @@ void mdp_irq_wait(struct mdp_kms *mdp_kms, uint32_t irqmask) .count = 1, }; mdp_irq_register(mdp_kms, &wait.irq); - wait_event(wait_event, (wait.count <= 0)); + wait_event_timeout(wait_event, (wait.count <= 0), + msecs_to_jiffies(100)); mdp_irq_unregister(mdp_kms, &wait.irq); } diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index e6adafc7eff..9a5d87db5c2 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -56,6 +56,10 @@ static char *vram; MODULE_PARM_DESC(vram, "Configure VRAM size (for devices without IOMMU/GPUMMU"); module_param(vram, charp, 0); +/* + * Util/helpers: + */ + void __iomem *msm_ioremap(struct platform_device *pdev, const char *name, const char *dbgname) { @@ -143,6 +147,8 @@ static int msm_unload(struct drm_device *dev) priv->vram.paddr, &attrs); } + component_unbind_all(dev->dev, dev); + dev->dev_private = NULL; kfree(priv); @@ -153,7 +159,7 @@ static int msm_unload(struct drm_device *dev) static int get_mdp_ver(struct platform_device *pdev) { #ifdef CONFIG_OF - const static struct of_device_id match_types[] = { { + static const struct of_device_id match_types[] = { { .compatible = "qcom,mdss_mdp", .data = (void *)5, }, { @@ -175,6 +181,7 @@ static int msm_load(struct drm_device *dev, unsigned long flags) struct msm_kms *kms; int ret; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) { dev_err(dev->dev, "failed to allocate private data\n"); @@ -213,7 +220,7 @@ static int msm_load(struct drm_device *dev, unsigned long flags) * is bogus, but non-null if allocation succeeded: */ p = dma_alloc_attrs(dev->dev, size, - &priv->vram.paddr, 0, &attrs); + &priv->vram.paddr, GFP_KERNEL, &attrs); if (!p) { dev_err(dev->dev, "failed to allocate VRAM\n"); priv->vram.paddr = 0; @@ -226,6 +233,13 @@ static int msm_load(struct drm_device *dev, unsigned long flags) (uint32_t)(priv->vram.paddr + size)); } + platform_set_drvdata(pdev, dev); + + /* Bind all our sub-components: */ + ret = component_bind_all(dev->dev, dev); + if (ret) + return ret; + switch (get_mdp_ver(pdev)) { case 4: kms = mdp4_kms_init(dev); @@ -274,19 +288,21 @@ static int msm_load(struct drm_device *dev, unsigned long flags) } pm_runtime_get_sync(dev->dev); - ret = drm_irq_install(dev); + ret = drm_irq_install(dev, platform_get_irq(dev->platformdev, 0)); pm_runtime_put_sync(dev->dev); if (ret < 0) { dev_err(dev->dev, "failed to install IRQ handler\n"); goto fail; } - platform_set_drvdata(pdev, dev); - #ifdef CONFIG_DRM_MSM_FBDEV priv->fbdev = msm_fbdev_init(dev); #endif + ret = msm_debugfs_late_init(dev); + if (ret) + goto fail; + drm_kms_helper_poll_init(dev); return 0; @@ -311,7 +327,6 @@ static void load_gpu(struct drm_device *dev) gpu = NULL; /* not fatal */ } - mutex_unlock(&dev->struct_mutex); if (gpu) { int ret; @@ -321,10 +336,16 @@ static void load_gpu(struct drm_device *dev) dev_err(dev->dev, "gpu hw init failed: %d\n", ret); gpu->funcs->destroy(gpu); gpu = NULL; + } else { + /* give inactive pm a chance to kick in: */ + msm_gpu_retire(gpu); } + } priv->gpu = gpu; + + mutex_unlock(&dev->struct_mutex); } static int msm_open(struct drm_device *dev, struct drm_file *file) @@ -365,11 +386,8 @@ static void msm_preclose(struct drm_device *dev, struct drm_file *file) static void msm_lastclose(struct drm_device *dev) { struct msm_drm_private *priv = dev->dev_private; - if (priv->fbdev) { - drm_modeset_lock_all(dev); - drm_fb_helper_restore_fbdev_mode(priv->fbdev); - drm_modeset_unlock_all(dev); - } + if (priv->fbdev) + drm_fb_helper_restore_fbdev_mode_unlocked(priv->fbdev); } static irqreturn_t msm_irq(int irq, void *arg) @@ -514,6 +532,41 @@ static struct drm_info_list msm_debugfs_list[] = { { "fb", show_locked, 0, msm_fb_show }, }; +static int late_init_minor(struct drm_minor *minor) +{ + int ret; + + if (!minor) + return 0; + + ret = msm_rd_debugfs_init(minor); + if (ret) { + dev_err(minor->dev->dev, "could not install rd debugfs\n"); + return ret; + } + + ret = msm_perf_debugfs_init(minor); + if (ret) { + dev_err(minor->dev->dev, "could not install perf debugfs\n"); + return ret; + } + + return 0; +} + +int msm_debugfs_late_init(struct drm_device *dev) +{ + int ret; + ret = late_init_minor(dev->primary); + if (ret) + return ret; + ret = late_init_minor(dev->render); + if (ret) + return ret; + ret = late_init_minor(dev->control); + return ret; +} + static int msm_debugfs_init(struct drm_minor *minor) { struct drm_device *dev = minor->dev; @@ -528,13 +581,17 @@ static int msm_debugfs_init(struct drm_minor *minor) return ret; } - return ret; + return 0; } static void msm_debugfs_cleanup(struct drm_minor *minor) { drm_debugfs_remove_files(msm_debugfs_list, ARRAY_SIZE(msm_debugfs_list), minor); + if (!minor->dev->dev_private) + return; + msm_rd_debugfs_cleanup(minor); + msm_perf_debugfs_cleanup(minor); } #endif @@ -647,6 +704,12 @@ static int msm_ioctl_gem_new(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_msm_gem_new *args = data; + + if (args->flags & ~MSM_BO_FLAGS) { + DRM_ERROR("invalid flags: %08x\n", args->flags); + return -EINVAL; + } + return msm_gem_new_handle(dev, file, args->size, args->flags, &args->handle); } @@ -660,6 +723,11 @@ static int msm_ioctl_gem_cpu_prep(struct drm_device *dev, void *data, struct drm_gem_object *obj; int ret; + if (args->op & ~MSM_PREP_FLAGS) { + DRM_ERROR("invalid op: %08x\n", args->op); + return -EINVAL; + } + obj = drm_gem_object_lookup(dev, file, args->handle); if (!obj) return -ENOENT; @@ -714,7 +782,14 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_msm_wait_fence *args = data; - return msm_wait_fence_interruptable(dev, args->fence, &TS(args->timeout)); + + if (args->pad) { + DRM_ERROR("invalid pad: %08x\n", args->pad); + return -EINVAL; + } + + return msm_wait_fence_interruptable(dev, args->fence, + &TS(args->timeout)); } static const struct drm_ioctl_desc msm_ioctls[] = { @@ -819,18 +894,110 @@ static const struct dev_pm_ops msm_pm_ops = { }; /* + * Componentized driver support: + */ + +#ifdef CONFIG_OF +/* NOTE: the CONFIG_OF case duplicates the same code as exynos or imx + * (or probably any other).. so probably some room for some helpers + */ +static int compare_of(struct device *dev, void *data) +{ + return dev->of_node == data; +} + +static int msm_drm_add_components(struct device *master, struct master *m) +{ + struct device_node *np = master->of_node; + unsigned i; + int ret; + + for (i = 0; ; i++) { + struct device_node *node; + + node = of_parse_phandle(np, "connectors", i); + if (!node) + break; + + ret = component_master_add_child(m, compare_of, node); + of_node_put(node); + + if (ret) + return ret; + } + return 0; +} +#else +static int compare_dev(struct device *dev, void *data) +{ + return dev == data; +} + +static int msm_drm_add_components(struct device *master, struct master *m) +{ + /* For non-DT case, it kinda sucks. We don't actually have a way + * to know whether or not we are waiting for certain devices (or if + * they are simply not present). But for non-DT we only need to + * care about apq8064/apq8060/etc (all mdp4/a3xx): + */ + static const char *devnames[] = { + "hdmi_msm.0", "kgsl-3d0.0", + }; + int i; + + DBG("Adding components.."); + + for (i = 0; i < ARRAY_SIZE(devnames); i++) { + struct device *dev; + int ret; + + dev = bus_find_device_by_name(&platform_bus_type, + NULL, devnames[i]); + if (!dev) { + dev_info(master, "still waiting for %s\n", devnames[i]); + return -EPROBE_DEFER; + } + + ret = component_master_add_child(m, compare_dev, dev); + if (ret) { + DBG("could not add child: %d", ret); + return ret; + } + } + + return 0; +} +#endif + +static int msm_drm_bind(struct device *dev) +{ + return drm_platform_init(&msm_driver, to_platform_device(dev)); +} + +static void msm_drm_unbind(struct device *dev) +{ + drm_put_dev(platform_get_drvdata(to_platform_device(dev))); +} + +static const struct component_master_ops msm_drm_ops = { + .add_components = msm_drm_add_components, + .bind = msm_drm_bind, + .unbind = msm_drm_unbind, +}; + +/* * Platform driver: */ static int msm_pdev_probe(struct platform_device *pdev) { pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32); - return drm_platform_init(&msm_driver, pdev); + return component_master_add(&pdev->dev, &msm_drm_ops); } static int msm_pdev_remove(struct platform_device *pdev) { - drm_put_dev(platform_get_drvdata(pdev)); + component_master_del(&pdev->dev, &msm_drm_ops); return 0; } diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 3d63269c5b2..8a2c5fd0893 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -22,6 +22,7 @@ #include <linux/clk.h> #include <linux/cpufreq.h> #include <linux/module.h> +#include <linux/component.h> #include <linux/platform_device.h> #include <linux/pm.h> #include <linux/pm_runtime.h> @@ -32,7 +33,7 @@ #include <asm/sizes.h> -#if defined(CONFIG_COMPILE_TEST) && !defined(CONFIG_ARCH_MSM) +#if defined(CONFIG_COMPILE_TEST) && !defined(CONFIG_ARCH_QCOM) /* stubs we need for compile-test: */ static inline struct device *msm_iommu_get_ctx(const char *ctx_name) { @@ -54,6 +55,9 @@ static inline struct device *msm_iommu_get_ctx(const char *ctx_name) struct msm_kms; struct msm_gpu; struct msm_mmu; +struct msm_rd_state; +struct msm_perf_state; +struct msm_gem_submit; #define NUM_DOMAINS 2 /* one for KMS, then one per gpu core (?) */ @@ -69,6 +73,9 @@ struct msm_drm_private { struct msm_kms *kms; + /* subordinate devices, if present: */ + struct platform_device *hdmi_pdev, *gpu_pdev; + /* when we have more than one 'msm_gpu' these need to be an array: */ struct msm_gpu *gpu; struct msm_file_private *lastctx; @@ -78,6 +85,9 @@ struct msm_drm_private { uint32_t next_fence, completed_fence; wait_queue_head_t fence_event; + struct msm_rd_state *rd; + struct msm_perf_state *perf; + /* list of GEM objects: */ struct list_head inactive_list; @@ -200,6 +210,15 @@ void __exit hdmi_unregister(void); void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m); void msm_gem_describe_objects(struct list_head *list, struct seq_file *m); void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m); +int msm_debugfs_late_init(struct drm_device *dev); +int msm_rd_debugfs_init(struct drm_minor *minor); +void msm_rd_debugfs_cleanup(struct drm_minor *minor); +void msm_rd_dump_submit(struct msm_gem_submit *submit); +int msm_perf_debugfs_init(struct drm_minor *minor); +void msm_perf_debugfs_cleanup(struct drm_minor *minor); +#else +static inline int msm_debugfs_late_init(struct drm_device *dev) { return 0; } +static inline void msm_rd_dump_submit(struct msm_gem_submit *submit) {} #endif void __iomem *msm_ioremap(struct platform_device *pdev, const char *name, diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c index 6c6d7d4c9b4..5107fc4826b 100644 --- a/drivers/gpu/drm/msm/msm_fbdev.c +++ b/drivers/gpu/drm/msm/msm_fbdev.c @@ -59,14 +59,11 @@ static int msm_fbdev_create(struct drm_fb_helper *helper, struct drm_framebuffer *fb = NULL; struct fb_info *fbi = NULL; struct drm_mode_fb_cmd2 mode_cmd = {0}; - dma_addr_t paddr; + uint32_t paddr; int ret, size; - /* only doing ARGB32 since this is what is needed to alpha-blend - * with video overlays: - */ sizes->surface_bpp = 32; - sizes->surface_depth = 32; + sizes->surface_depth = 24; DBG("create fbdev: %dx%d@%d (%dx%d)", sizes->surface_width, sizes->surface_height, sizes->surface_bpp, diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index d8d60c969ac..690d7e7b6d1 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -118,8 +118,10 @@ static void put_pages(struct drm_gem_object *obj) if (iommu_present(&platform_bus_type)) drm_gem_put_pages(obj, msm_obj->pages, true, false); - else + else { drm_mm_remove_node(msm_obj->vram_node); + drm_free_large(msm_obj->pages); + } msm_obj->pages = NULL; } @@ -276,6 +278,7 @@ int msm_gem_get_iova_locked(struct drm_gem_object *obj, int id, uint32_t *iova) { struct msm_gem_object *msm_obj = to_msm_bo(obj); + struct drm_device *dev = obj->dev; int ret = 0; if (!msm_obj->domain[id].iova) { @@ -283,6 +286,11 @@ int msm_gem_get_iova_locked(struct drm_gem_object *obj, int id, struct msm_mmu *mmu = priv->mmus[id]; struct page **pages = get_pages(obj); + if (!mmu) { + dev_err(dev->dev, "null MMU pointer\n"); + return -EINVAL; + } + if (IS_ERR(pages)) return PTR_ERR(pages); @@ -644,7 +652,7 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev, fail: if (obj) - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_unreference(obj); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 3246bb46c4f..bfb052688f8 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -90,6 +90,7 @@ struct msm_gem_submit { uint32_t type; uint32_t size; /* in dwords */ uint32_t iova; + uint32_t idx; /* cmdstream buffer idx in bos[] */ } cmd[MAX_CMDS]; struct { uint32_t flags; diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 5281d4bc37f..cd0554f6831 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -23,7 +23,6 @@ * Cmdstream submission: */ -#define BO_INVALID_FLAGS ~(MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE) /* make sure these don't conflict w/ MSM_SUBMIT_BO_x */ #define BO_VALID 0x8000 #define BO_LOCKED 0x4000 @@ -77,7 +76,7 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, goto out_unlock; } - if (submit_bo.flags & BO_INVALID_FLAGS) { + if (submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) { DRM_ERROR("invalid flags: %x\n", submit_bo.flags); ret = -EINVAL; goto out_unlock; @@ -163,7 +162,7 @@ retry: /* if locking succeeded, pin bo: */ - ret = msm_gem_get_iova(&msm_obj->base, + ret = msm_gem_get_iova_locked(&msm_obj->base, submit->gpu->id, &iova); /* this would break the logic in the fail path.. there is no @@ -247,7 +246,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob /* For now, just map the entire thing. Eventually we probably * to do it page-by-page, w/ kmap() if not vmap()d.. */ - ptr = msm_gem_vaddr(&obj->base); + ptr = msm_gem_vaddr_locked(&obj->base); if (IS_ERR(ptr)) { ret = PTR_ERR(ptr); @@ -307,14 +306,12 @@ static void submit_cleanup(struct msm_gem_submit *submit, bool fail) { unsigned i; - mutex_lock(&submit->dev->struct_mutex); for (i = 0; i < submit->nr_bos; i++) { struct msm_gem_object *msm_obj = submit->bos[i].obj; submit_unlock_unpin_bo(submit, i); list_del_init(&msm_obj->submit_entry); drm_gem_object_unreference(&msm_obj->base); } - mutex_unlock(&submit->dev->struct_mutex); ww_acquire_fini(&submit->ticket); kfree(submit); @@ -342,6 +339,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (args->nr_cmds > MAX_CMDS) return -EINVAL; + mutex_lock(&dev->struct_mutex); + submit = submit_create(dev, gpu, args->nr_bos); if (!submit) { ret = -ENOMEM; @@ -369,6 +368,18 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, goto out; } + /* validate input from userspace: */ + switch (submit_cmd.type) { + case MSM_SUBMIT_CMD_BUF: + case MSM_SUBMIT_CMD_IB_TARGET_BUF: + case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: + break; + default: + DRM_ERROR("invalid type: %08x\n", submit_cmd.type); + ret = -EINVAL; + goto out; + } + ret = submit_bo(submit, submit_cmd.submit_idx, &msm_obj, &iova, NULL); if (ret) @@ -391,6 +402,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, submit->cmd[i].type = submit_cmd.type; submit->cmd[i].size = submit_cmd.size / 4; submit->cmd[i].iova = iova + submit_cmd.submit_offset; + submit->cmd[i].idx = submit_cmd.submit_idx; if (submit->valid) continue; @@ -410,5 +422,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, out: if (submit) submit_cleanup(submit, !!ret); + mutex_unlock(&dev->struct_mutex); return ret; } diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 4ebce8be489..c6322197db8 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -154,9 +154,18 @@ static int disable_axi(struct msm_gpu *gpu) int msm_gpu_pm_resume(struct msm_gpu *gpu) { + struct drm_device *dev = gpu->dev; int ret; - DBG("%s", gpu->name); + DBG("%s: active_cnt=%d", gpu->name, gpu->active_cnt); + + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + + if (gpu->active_cnt++ > 0) + return 0; + + if (WARN_ON(gpu->active_cnt <= 0)) + return -EINVAL; ret = enable_pwrrail(gpu); if (ret) @@ -175,9 +184,18 @@ int msm_gpu_pm_resume(struct msm_gpu *gpu) int msm_gpu_pm_suspend(struct msm_gpu *gpu) { + struct drm_device *dev = gpu->dev; int ret; - DBG("%s", gpu->name); + DBG("%s: active_cnt=%d", gpu->name, gpu->active_cnt); + + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + + if (--gpu->active_cnt > 0) + return 0; + + if (WARN_ON(gpu->active_cnt < 0)) + return -EINVAL; ret = disable_axi(gpu); if (ret) @@ -195,6 +213,55 @@ int msm_gpu_pm_suspend(struct msm_gpu *gpu) } /* + * Inactivity detection (for suspend): + */ + +static void inactive_worker(struct work_struct *work) +{ + struct msm_gpu *gpu = container_of(work, struct msm_gpu, inactive_work); + struct drm_device *dev = gpu->dev; + + if (gpu->inactive) + return; + + DBG("%s: inactive!\n", gpu->name); + mutex_lock(&dev->struct_mutex); + if (!(msm_gpu_active(gpu) || gpu->inactive)) { + disable_axi(gpu); + disable_clk(gpu); + gpu->inactive = true; + } + mutex_unlock(&dev->struct_mutex); +} + +static void inactive_handler(unsigned long data) +{ + struct msm_gpu *gpu = (struct msm_gpu *)data; + struct msm_drm_private *priv = gpu->dev->dev_private; + + queue_work(priv->wq, &gpu->inactive_work); +} + +/* cancel inactive timer and make sure we are awake: */ +static void inactive_cancel(struct msm_gpu *gpu) +{ + DBG("%s", gpu->name); + del_timer(&gpu->inactive_timer); + if (gpu->inactive) { + enable_clk(gpu); + enable_axi(gpu); + gpu->inactive = false; + } +} + +static void inactive_start(struct msm_gpu *gpu) +{ + DBG("%s", gpu->name); + mod_timer(&gpu->inactive_timer, + round_jiffies_up(jiffies + DRM_MSM_INACTIVE_JIFFIES)); +} + +/* * Hangcheck detection for locked gpu: */ @@ -206,7 +273,10 @@ static void recover_worker(struct work_struct *work) dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); mutex_lock(&dev->struct_mutex); - gpu->funcs->recover(gpu); + if (msm_gpu_active(gpu)) { + inactive_cancel(gpu); + gpu->funcs->recover(gpu); + } mutex_unlock(&dev->struct_mutex); msm_gpu_retire(gpu); @@ -250,6 +320,101 @@ static void hangcheck_handler(unsigned long data) } /* + * Performance Counters: + */ + +/* called under perf_lock */ +static int update_hw_cntrs(struct msm_gpu *gpu, uint32_t ncntrs, uint32_t *cntrs) +{ + uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)]; + int i, n = min(ncntrs, gpu->num_perfcntrs); + + /* read current values: */ + for (i = 0; i < gpu->num_perfcntrs; i++) + current_cntrs[i] = gpu_read(gpu, gpu->perfcntrs[i].sample_reg); + + /* update cntrs: */ + for (i = 0; i < n; i++) + cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i]; + + /* save current values: */ + for (i = 0; i < gpu->num_perfcntrs; i++) + gpu->last_cntrs[i] = current_cntrs[i]; + + return n; +} + +static void update_sw_cntrs(struct msm_gpu *gpu) +{ + ktime_t time; + uint32_t elapsed; + unsigned long flags; + + spin_lock_irqsave(&gpu->perf_lock, flags); + if (!gpu->perfcntr_active) + goto out; + + time = ktime_get(); + elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time)); + + gpu->totaltime += elapsed; + if (gpu->last_sample.active) + gpu->activetime += elapsed; + + gpu->last_sample.active = msm_gpu_active(gpu); + gpu->last_sample.time = time; + +out: + spin_unlock_irqrestore(&gpu->perf_lock, flags); +} + +void msm_gpu_perfcntr_start(struct msm_gpu *gpu) +{ + unsigned long flags; + + spin_lock_irqsave(&gpu->perf_lock, flags); + /* we could dynamically enable/disable perfcntr registers too.. */ + gpu->last_sample.active = msm_gpu_active(gpu); + gpu->last_sample.time = ktime_get(); + gpu->activetime = gpu->totaltime = 0; + gpu->perfcntr_active = true; + update_hw_cntrs(gpu, 0, NULL); + spin_unlock_irqrestore(&gpu->perf_lock, flags); +} + +void msm_gpu_perfcntr_stop(struct msm_gpu *gpu) +{ + gpu->perfcntr_active = false; +} + +/* returns -errno or # of cntrs sampled */ +int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, + uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&gpu->perf_lock, flags); + + if (!gpu->perfcntr_active) { + ret = -EINVAL; + goto out; + } + + *activetime = gpu->activetime; + *totaltime = gpu->totaltime; + + gpu->activetime = gpu->totaltime = 0; + + ret = update_hw_cntrs(gpu, ncntrs, cntrs); + +out: + spin_unlock_irqrestore(&gpu->perf_lock, flags); + + return ret; +} + +/* * Cmdstream submission/retirement: */ @@ -281,6 +446,9 @@ static void retire_worker(struct work_struct *work) } mutex_unlock(&dev->struct_mutex); + + if (!msm_gpu_active(gpu)) + inactive_start(gpu); } /* call from irq handler to schedule work to retire bo's */ @@ -288,6 +456,7 @@ void msm_gpu_retire(struct msm_gpu *gpu) { struct msm_drm_private *priv = gpu->dev->dev_private; queue_work(priv->wq, &gpu->retire_work); + update_sw_cntrs(gpu); } /* add bo's to gpu's ring, and kick gpu: */ @@ -298,12 +467,18 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_drm_private *priv = dev->dev_private; int i, ret; - mutex_lock(&dev->struct_mutex); - submit->fence = ++priv->next_fence; gpu->submitted_fence = submit->fence; + inactive_cancel(gpu); + + msm_rd_dump_submit(submit); + + gpu->submitted_fence = submit->fence; + + update_sw_cntrs(gpu); + ret = gpu->funcs->submit(gpu, submit, ctx); priv->lastctx = ctx; @@ -331,7 +506,6 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence); } hangcheck_timer_reset(gpu); - mutex_unlock(&dev->struct_mutex); return ret; } @@ -357,17 +531,26 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct iommu_domain *iommu; int i, ret; + if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) + gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); + gpu->dev = drm; gpu->funcs = funcs; gpu->name = name; + gpu->inactive = true; INIT_LIST_HEAD(&gpu->active_list); INIT_WORK(&gpu->retire_work, retire_worker); + INIT_WORK(&gpu->inactive_work, inactive_worker); INIT_WORK(&gpu->recover_work, recover_worker); + setup_timer(&gpu->inactive_timer, inactive_handler, + (unsigned long)gpu); setup_timer(&gpu->hangcheck_timer, hangcheck_handler, (unsigned long)gpu); + spin_lock_init(&gpu->perf_lock); + BUG_ON(ARRAY_SIZE(clk_names) != ARRAY_SIZE(gpu->grp_clks)); /* Map registers: */ diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 458db8c64c2..9b579b79284 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -25,6 +25,7 @@ #include "msm_ringbuffer.h" struct msm_gem_submit; +struct msm_gpu_perfcntr; /* So far, with hardware that I've seen to date, we can have: * + zero, one, or two z180 2d cores @@ -64,6 +65,18 @@ struct msm_gpu { struct drm_device *dev; const struct msm_gpu_funcs *funcs; + /* performance counters (hw & sw): */ + spinlock_t perf_lock; + bool perfcntr_active; + struct { + bool active; + ktime_t time; + } last_sample; + uint32_t totaltime, activetime; /* sw counters */ + uint32_t last_cntrs[5]; /* hw counters */ + const struct msm_gpu_perfcntr *perfcntrs; + uint32_t num_perfcntrs; + struct msm_ringbuffer *rb; uint32_t rb_iova; @@ -72,6 +85,10 @@ struct msm_gpu { uint32_t submitted_fence; + /* is gpu powered/active? */ + int active_cnt; + bool inactive; + /* worker for handling active-list retiring: */ struct work_struct retire_work; @@ -91,7 +108,12 @@ struct msm_gpu { uint32_t bsc; #endif - /* Hang Detction: */ + /* Hang and Inactivity Detection: + */ +#define DRM_MSM_INACTIVE_PERIOD 66 /* in ms (roughly four frames) */ +#define DRM_MSM_INACTIVE_JIFFIES msecs_to_jiffies(DRM_MSM_INACTIVE_PERIOD) + struct timer_list inactive_timer; + struct work_struct inactive_work; #define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */ #define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD) struct timer_list hangcheck_timer; @@ -99,6 +121,24 @@ struct msm_gpu { struct work_struct recover_work; }; +static inline bool msm_gpu_active(struct msm_gpu *gpu) +{ + return gpu->submitted_fence > gpu->funcs->last_fence(gpu); +} + +/* Perf-Counters: + * The select_reg and select_val are just there for the benefit of the child + * class that actually enables the perf counter.. but msm_gpu base class + * will handle sampling/displaying the counters. + */ + +struct msm_gpu_perfcntr { + uint32_t select_reg; + uint32_t sample_reg; + uint32_t select_val; + const char *name; +}; + static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data) { msm_writel(data, gpu->mmio + (reg << 2)); @@ -112,6 +152,11 @@ static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg) int msm_gpu_pm_suspend(struct msm_gpu *gpu); int msm_gpu_pm_resume(struct msm_gpu *gpu); +void msm_gpu_perfcntr_start(struct msm_gpu *gpu); +void msm_gpu_perfcntr_stop(struct msm_gpu *gpu); +int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, + uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs); + void msm_gpu_retire(struct msm_gpu *gpu); int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx); diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 92b74598623..4b2ad9181ed 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -28,7 +28,7 @@ static int msm_fault_handler(struct iommu_domain *iommu, struct device *dev, unsigned long iova, int flags, void *arg) { DBG("*** fault: iova=%08lx, flags=%d", iova, flags); - return 0; + return -ENOSYS; } static int msm_iommu_attach(struct msm_mmu *mmu, const char **names, int cnt) @@ -40,8 +40,10 @@ static int msm_iommu_attach(struct msm_mmu *mmu, const char **names, int cnt) for (i = 0; i < cnt; i++) { struct device *msm_iommu_get_ctx(const char *ctx_name); struct device *ctx = msm_iommu_get_ctx(names[i]); - if (IS_ERR_OR_NULL(ctx)) + if (IS_ERR_OR_NULL(ctx)) { + dev_warn(dev->dev, "couldn't get %s context", names[i]); continue; + } ret = iommu_attach_device(iommu->domain, ctx); if (ret) { dev_warn(dev->dev, "could not attach iommu to %s", names[i]); @@ -52,6 +54,20 @@ static int msm_iommu_attach(struct msm_mmu *mmu, const char **names, int cnt) return 0; } +static void msm_iommu_detach(struct msm_mmu *mmu, const char **names, int cnt) +{ + struct msm_iommu *iommu = to_msm_iommu(mmu); + int i; + + for (i = 0; i < cnt; i++) { + struct device *msm_iommu_get_ctx(const char *ctx_name); + struct device *ctx = msm_iommu_get_ctx(names[i]); + if (IS_ERR_OR_NULL(ctx)) + continue; + iommu_detach_device(iommu->domain, ctx); + } +} + static int msm_iommu_map(struct msm_mmu *mmu, uint32_t iova, struct sg_table *sgt, unsigned len, int prot) { @@ -110,7 +126,7 @@ static int msm_iommu_unmap(struct msm_mmu *mmu, uint32_t iova, VERB("unmap[%d]: %08x(%x)", i, iova, bytes); - BUG_ON(!IS_ALIGNED(bytes, PAGE_SIZE)); + BUG_ON(!PAGE_ALIGNED(bytes)); da += bytes; } @@ -127,6 +143,7 @@ static void msm_iommu_destroy(struct msm_mmu *mmu) static const struct msm_mmu_funcs funcs = { .attach = msm_iommu_attach, + .detach = msm_iommu_detach, .map = msm_iommu_map, .unmap = msm_iommu_unmap, .destroy = msm_iommu_destroy, diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h index 030324482b4..21da6d154f7 100644 --- a/drivers/gpu/drm/msm/msm_mmu.h +++ b/drivers/gpu/drm/msm/msm_mmu.h @@ -22,6 +22,7 @@ struct msm_mmu_funcs { int (*attach)(struct msm_mmu *mmu, const char **names, int cnt); + void (*detach)(struct msm_mmu *mmu, const char **names, int cnt); int (*map)(struct msm_mmu *mmu, uint32_t iova, struct sg_table *sgt, unsigned len, int prot); int (*unmap)(struct msm_mmu *mmu, uint32_t iova, struct sg_table *sgt, diff --git a/drivers/gpu/drm/msm/msm_perf.c b/drivers/gpu/drm/msm/msm_perf.c new file mode 100644 index 00000000000..830857c47c8 --- /dev/null +++ b/drivers/gpu/drm/msm/msm_perf.c @@ -0,0 +1,275 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/* For profiling, userspace can: + * + * tail -f /sys/kernel/debug/dri/<minor>/gpu + * + * This will enable performance counters/profiling to track the busy time + * and any gpu specific performance counters that are supported. + */ + +#ifdef CONFIG_DEBUG_FS + +#include <linux/debugfs.h> + +#include "msm_drv.h" +#include "msm_gpu.h" + +struct msm_perf_state { + struct drm_device *dev; + + bool open; + int cnt; + struct mutex read_lock; + + char buf[256]; + int buftot, bufpos; + + unsigned long next_jiffies; + + struct dentry *ent; + struct drm_info_node *node; +}; + +#define SAMPLE_TIME (HZ/4) + +/* wait for next sample time: */ +static int wait_sample(struct msm_perf_state *perf) +{ + unsigned long start_jiffies = jiffies; + + if (time_after(perf->next_jiffies, start_jiffies)) { + unsigned long remaining_jiffies = + perf->next_jiffies - start_jiffies; + int ret = schedule_timeout_interruptible(remaining_jiffies); + if (ret > 0) { + /* interrupted */ + return -ERESTARTSYS; + } + } + perf->next_jiffies += SAMPLE_TIME; + return 0; +} + +static int refill_buf(struct msm_perf_state *perf) +{ + struct msm_drm_private *priv = perf->dev->dev_private; + struct msm_gpu *gpu = priv->gpu; + char *ptr = perf->buf; + int rem = sizeof(perf->buf); + int i, n; + + if ((perf->cnt++ % 32) == 0) { + /* Header line: */ + n = snprintf(ptr, rem, "%%BUSY"); + ptr += n; + rem -= n; + + for (i = 0; i < gpu->num_perfcntrs; i++) { + const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i]; + n = snprintf(ptr, rem, "\t%s", perfcntr->name); + ptr += n; + rem -= n; + } + } else { + /* Sample line: */ + uint32_t activetime = 0, totaltime = 0; + uint32_t cntrs[5]; + uint32_t val; + int ret; + + /* sleep until next sample time: */ + ret = wait_sample(perf); + if (ret) + return ret; + + ret = msm_gpu_perfcntr_sample(gpu, &activetime, &totaltime, + ARRAY_SIZE(cntrs), cntrs); + if (ret < 0) + return ret; + + val = totaltime ? 1000 * activetime / totaltime : 0; + n = snprintf(ptr, rem, "%3d.%d%%", val / 10, val % 10); + ptr += n; + rem -= n; + + for (i = 0; i < ret; i++) { + /* cycle counters (I think).. convert to MHz.. */ + val = cntrs[i] / 10000; + n = snprintf(ptr, rem, "\t%5d.%02d", + val / 100, val % 100); + ptr += n; + rem -= n; + } + } + + n = snprintf(ptr, rem, "\n"); + ptr += n; + rem -= n; + + perf->bufpos = 0; + perf->buftot = ptr - perf->buf; + + return 0; +} + +static ssize_t perf_read(struct file *file, char __user *buf, + size_t sz, loff_t *ppos) +{ + struct msm_perf_state *perf = file->private_data; + int n = 0, ret; + + mutex_lock(&perf->read_lock); + + if (perf->bufpos >= perf->buftot) { + ret = refill_buf(perf); + if (ret) + goto out; + } + + n = min((int)sz, perf->buftot - perf->bufpos); + ret = copy_to_user(buf, &perf->buf[perf->bufpos], n); + if (ret) + goto out; + + perf->bufpos += n; + *ppos += n; + +out: + mutex_unlock(&perf->read_lock); + if (ret) + return ret; + return n; +} + +static int perf_open(struct inode *inode, struct file *file) +{ + struct msm_perf_state *perf = inode->i_private; + struct drm_device *dev = perf->dev; + struct msm_drm_private *priv = dev->dev_private; + struct msm_gpu *gpu = priv->gpu; + int ret = 0; + + mutex_lock(&dev->struct_mutex); + + if (perf->open || !gpu) { + ret = -EBUSY; + goto out; + } + + file->private_data = perf; + perf->open = true; + perf->cnt = 0; + perf->buftot = 0; + perf->bufpos = 0; + msm_gpu_perfcntr_start(gpu); + perf->next_jiffies = jiffies + SAMPLE_TIME; + +out: + mutex_unlock(&dev->struct_mutex); + return ret; +} + +static int perf_release(struct inode *inode, struct file *file) +{ + struct msm_perf_state *perf = inode->i_private; + struct msm_drm_private *priv = perf->dev->dev_private; + msm_gpu_perfcntr_stop(priv->gpu); + perf->open = false; + return 0; +} + + +static const struct file_operations perf_debugfs_fops = { + .owner = THIS_MODULE, + .open = perf_open, + .read = perf_read, + .llseek = no_llseek, + .release = perf_release, +}; + +int msm_perf_debugfs_init(struct drm_minor *minor) +{ + struct msm_drm_private *priv = minor->dev->dev_private; + struct msm_perf_state *perf; + + /* only create on first minor: */ + if (priv->perf) + return 0; + + perf = kzalloc(sizeof(*perf), GFP_KERNEL); + if (!perf) + return -ENOMEM; + + perf->dev = minor->dev; + + mutex_init(&perf->read_lock); + priv->perf = perf; + + perf->node = kzalloc(sizeof(*perf->node), GFP_KERNEL); + if (!perf->node) + goto fail; + + perf->ent = debugfs_create_file("perf", S_IFREG | S_IRUGO, + minor->debugfs_root, perf, &perf_debugfs_fops); + if (!perf->ent) { + DRM_ERROR("Cannot create /sys/kernel/debug/dri/%s/perf\n", + minor->debugfs_root->d_name.name); + goto fail; + } + + perf->node->minor = minor; + perf->node->dent = perf->ent; + perf->node->info_ent = NULL; + + mutex_lock(&minor->debugfs_lock); + list_add(&perf->node->list, &minor->debugfs_list); + mutex_unlock(&minor->debugfs_lock); + + return 0; + +fail: + msm_perf_debugfs_cleanup(minor); + return -1; +} + +void msm_perf_debugfs_cleanup(struct drm_minor *minor) +{ + struct msm_drm_private *priv = minor->dev->dev_private; + struct msm_perf_state *perf = priv->perf; + + if (!perf) + return; + + priv->perf = NULL; + + debugfs_remove(perf->ent); + + if (perf->node) { + mutex_lock(&minor->debugfs_lock); + list_del(&perf->node->list); + mutex_unlock(&minor->debugfs_lock); + kfree(perf->node); + } + + mutex_destroy(&perf->read_lock); + + kfree(perf); +} + +#endif diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c new file mode 100644 index 00000000000..9a78c48817c --- /dev/null +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -0,0 +1,337 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/* For debugging crashes, userspace can: + * + * tail -f /sys/kernel/debug/dri/<minor>/rd > logfile.rd + * + * To log the cmdstream in a format that is understood by freedreno/cffdump + * utility. By comparing the last successfully completed fence #, to the + * cmdstream for the next fence, you can narrow down which process and submit + * caused the gpu crash/lockup. + * + * This bypasses drm_debugfs_create_files() mainly because we need to use + * our own fops for a bit more control. In particular, we don't want to + * do anything if userspace doesn't have the debugfs file open. + */ + +#ifdef CONFIG_DEBUG_FS + +#include <linux/kfifo.h> +#include <linux/debugfs.h> +#include <linux/circ_buf.h> +#include <linux/wait.h> + +#include "msm_drv.h" +#include "msm_gpu.h" +#include "msm_gem.h" + +enum rd_sect_type { + RD_NONE, + RD_TEST, /* ascii text */ + RD_CMD, /* ascii text */ + RD_GPUADDR, /* u32 gpuaddr, u32 size */ + RD_CONTEXT, /* raw dump */ + RD_CMDSTREAM, /* raw dump */ + RD_CMDSTREAM_ADDR, /* gpu addr of cmdstream */ + RD_PARAM, /* u32 param_type, u32 param_val, u32 bitlen */ + RD_FLUSH, /* empty, clear previous params */ + RD_PROGRAM, /* shader program, raw dump */ + RD_VERT_SHADER, + RD_FRAG_SHADER, + RD_BUFFER_CONTENTS, + RD_GPU_ID, +}; + +#define BUF_SZ 512 /* should be power of 2 */ + +/* space used: */ +#define circ_count(circ) \ + (CIRC_CNT((circ)->head, (circ)->tail, BUF_SZ)) +#define circ_count_to_end(circ) \ + (CIRC_CNT_TO_END((circ)->head, (circ)->tail, BUF_SZ)) +/* space available: */ +#define circ_space(circ) \ + (CIRC_SPACE((circ)->head, (circ)->tail, BUF_SZ)) +#define circ_space_to_end(circ) \ + (CIRC_SPACE_TO_END((circ)->head, (circ)->tail, BUF_SZ)) + +struct msm_rd_state { + struct drm_device *dev; + + bool open; + + struct dentry *ent; + struct drm_info_node *node; + + /* current submit to read out: */ + struct msm_gem_submit *submit; + + /* fifo access is synchronized on the producer side by + * struct_mutex held by submit code (otherwise we could + * end up w/ cmds logged in different order than they + * were executed). And read_lock synchronizes the reads + */ + struct mutex read_lock; + + wait_queue_head_t fifo_event; + struct circ_buf fifo; + + char buf[BUF_SZ]; +}; + +static void rd_write(struct msm_rd_state *rd, const void *buf, int sz) +{ + struct circ_buf *fifo = &rd->fifo; + const char *ptr = buf; + + while (sz > 0) { + char *fptr = &fifo->buf[fifo->head]; + int n; + + wait_event(rd->fifo_event, circ_space(&rd->fifo) > 0); + + n = min(sz, circ_space_to_end(&rd->fifo)); + memcpy(fptr, ptr, n); + + fifo->head = (fifo->head + n) & (BUF_SZ - 1); + sz -= n; + ptr += n; + + wake_up_all(&rd->fifo_event); + } +} + +static void rd_write_section(struct msm_rd_state *rd, + enum rd_sect_type type, const void *buf, int sz) +{ + rd_write(rd, &type, 4); + rd_write(rd, &sz, 4); + rd_write(rd, buf, sz); +} + +static ssize_t rd_read(struct file *file, char __user *buf, + size_t sz, loff_t *ppos) +{ + struct msm_rd_state *rd = file->private_data; + struct circ_buf *fifo = &rd->fifo; + const char *fptr = &fifo->buf[fifo->tail]; + int n = 0, ret = 0; + + mutex_lock(&rd->read_lock); + + ret = wait_event_interruptible(rd->fifo_event, + circ_count(&rd->fifo) > 0); + if (ret) + goto out; + + n = min_t(int, sz, circ_count_to_end(&rd->fifo)); + ret = copy_to_user(buf, fptr, n); + if (ret) + goto out; + + fifo->tail = (fifo->tail + n) & (BUF_SZ - 1); + *ppos += n; + + wake_up_all(&rd->fifo_event); + +out: + mutex_unlock(&rd->read_lock); + if (ret) + return ret; + return n; +} + +static int rd_open(struct inode *inode, struct file *file) +{ + struct msm_rd_state *rd = inode->i_private; + struct drm_device *dev = rd->dev; + struct msm_drm_private *priv = dev->dev_private; + struct msm_gpu *gpu = priv->gpu; + uint64_t val; + uint32_t gpu_id; + int ret = 0; + + mutex_lock(&dev->struct_mutex); + + if (rd->open || !gpu) { + ret = -EBUSY; + goto out; + } + + file->private_data = rd; + rd->open = true; + + /* the parsing tools need to know gpu-id to know which + * register database to load. + */ + gpu->funcs->get_param(gpu, MSM_PARAM_GPU_ID, &val); + gpu_id = val; + + rd_write_section(rd, RD_GPU_ID, &gpu_id, sizeof(gpu_id)); + +out: + mutex_unlock(&dev->struct_mutex); + return ret; +} + +static int rd_release(struct inode *inode, struct file *file) +{ + struct msm_rd_state *rd = inode->i_private; + rd->open = false; + return 0; +} + + +static const struct file_operations rd_debugfs_fops = { + .owner = THIS_MODULE, + .open = rd_open, + .read = rd_read, + .llseek = no_llseek, + .release = rd_release, +}; + +int msm_rd_debugfs_init(struct drm_minor *minor) +{ + struct msm_drm_private *priv = minor->dev->dev_private; + struct msm_rd_state *rd; + + /* only create on first minor: */ + if (priv->rd) + return 0; + + rd = kzalloc(sizeof(*rd), GFP_KERNEL); + if (!rd) + return -ENOMEM; + + rd->dev = minor->dev; + rd->fifo.buf = rd->buf; + + mutex_init(&rd->read_lock); + priv->rd = rd; + + init_waitqueue_head(&rd->fifo_event); + + rd->node = kzalloc(sizeof(*rd->node), GFP_KERNEL); + if (!rd->node) + goto fail; + + rd->ent = debugfs_create_file("rd", S_IFREG | S_IRUGO, + minor->debugfs_root, rd, &rd_debugfs_fops); + if (!rd->ent) { + DRM_ERROR("Cannot create /sys/kernel/debug/dri/%s/rd\n", + minor->debugfs_root->d_name.name); + goto fail; + } + + rd->node->minor = minor; + rd->node->dent = rd->ent; + rd->node->info_ent = NULL; + + mutex_lock(&minor->debugfs_lock); + list_add(&rd->node->list, &minor->debugfs_list); + mutex_unlock(&minor->debugfs_lock); + + return 0; + +fail: + msm_rd_debugfs_cleanup(minor); + return -1; +} + +void msm_rd_debugfs_cleanup(struct drm_minor *minor) +{ + struct msm_drm_private *priv = minor->dev->dev_private; + struct msm_rd_state *rd = priv->rd; + + if (!rd) + return; + + priv->rd = NULL; + + debugfs_remove(rd->ent); + + if (rd->node) { + mutex_lock(&minor->debugfs_lock); + list_del(&rd->node->list); + mutex_unlock(&minor->debugfs_lock); + kfree(rd->node); + } + + mutex_destroy(&rd->read_lock); + + kfree(rd); +} + +/* called under struct_mutex */ +void msm_rd_dump_submit(struct msm_gem_submit *submit) +{ + struct drm_device *dev = submit->dev; + struct msm_drm_private *priv = dev->dev_private; + struct msm_rd_state *rd = priv->rd; + char msg[128]; + int i, n; + + if (!rd->open) + return; + + /* writing into fifo is serialized by caller, and + * rd->read_lock is used to serialize the reads + */ + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + + n = snprintf(msg, sizeof(msg), "%.*s/%d: fence=%u", + TASK_COMM_LEN, current->comm, task_pid_nr(current), + submit->fence); + + rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4)); + + /* could be nice to have an option (module-param?) to snapshot + * all the bo's associated with the submit. Handy to see vtx + * buffers, etc. For now just the cmdstream bo's is enough. + */ + + for (i = 0; i < submit->nr_cmds; i++) { + uint32_t idx = submit->cmd[i].idx; + uint32_t iova = submit->cmd[i].iova; + uint32_t szd = submit->cmd[i].size; /* in dwords */ + struct msm_gem_object *obj = submit->bos[idx].obj; + const char *buf = msm_gem_vaddr_locked(&obj->base); + + buf += iova - submit->bos[idx].iova; + + rd_write_section(rd, RD_GPUADDR, + (uint32_t[2]){ iova, szd * 4 }, 8); + rd_write_section(rd, RD_BUFFER_CONTENTS, + buf, szd * 4); + + switch (submit->cmd[i].type) { + case MSM_SUBMIT_CMD_IB_TARGET_BUF: + /* ignore IB-targets, we've logged the buffer, the + * parser tool will follow the IB based on the logged + * buffer/gpuaddr, so nothing more to do. + */ + break; + case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: + case MSM_SUBMIT_CMD_BUF: + rd_write_section(rd, RD_CMDSTREAM_ADDR, + (uint32_t[2]){ iova, szd }, 8); + break; + } + } +} +#endif |
