diff options
Diffstat (limited to 'arch/arm/mm/cache-l2x0.c')
-rw-r--r-- | arch/arm/mm/cache-l2x0.c | 1498 |
1 files changed, 1001 insertions, 497 deletions
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 7abde2ce897..efc5cabf70e 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -16,18 +16,33 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/cpu.h> #include <linux/err.h> #include <linux/init.h> +#include <linux/smp.h> #include <linux/spinlock.h> #include <linux/io.h> #include <linux/of.h> #include <linux/of_address.h> #include <asm/cacheflush.h> +#include <asm/cp15.h> +#include <asm/cputype.h> #include <asm/hardware/cache-l2x0.h> #include "cache-tauros3.h" #include "cache-aurora-l2.h" +struct l2c_init_data { + const char *type; + unsigned way_size_0; + unsigned num_lock; + void (*of_parse)(const struct device_node *, u32 *, u32 *); + void (*enable)(void __iomem *, u32, unsigned); + void (*fixup)(void __iomem *, u32, struct outer_cache_fns *); + void (*save)(void __iomem *); + struct outer_cache_fns outer_cache; +}; + #define CACHE_LINE_SIZE 32 static void __iomem *l2x0_base; @@ -36,96 +51,116 @@ static u32 l2x0_way_mask; /* Bitmask of active ways */ static u32 l2x0_size; static unsigned long sync_reg_offset = L2X0_CACHE_SYNC; -/* Aurora don't have the cache ID register available, so we have to - * pass it though the device tree */ -static u32 cache_id_part_number_from_dt; - struct l2x0_regs l2x0_saved_regs; -struct l2x0_of_data { - void (*setup)(const struct device_node *, u32 *, u32 *); - void (*save)(void); - struct outer_cache_fns outer_cache; -}; - -static bool of_init = false; - -static inline void cache_wait_way(void __iomem *reg, unsigned long mask) +/* + * Common code for all cache controllers. + */ +static inline void l2c_wait_mask(void __iomem *reg, unsigned long mask) { /* wait for cache operation by line or way to complete */ while (readl_relaxed(reg) & mask) cpu_relax(); } -#ifdef CONFIG_CACHE_PL310 -static inline void cache_wait(void __iomem *reg, unsigned long mask) +/* + * By default, we write directly to secure registers. Platforms must + * override this if they are running non-secure. + */ +static void l2c_write_sec(unsigned long val, void __iomem *base, unsigned reg) { - /* cache operations by line are atomic on PL310 */ + if (val == readl_relaxed(base + reg)) + return; + if (outer_cache.write_sec) + outer_cache.write_sec(val, reg); + else + writel_relaxed(val, base + reg); } -#else -#define cache_wait cache_wait_way -#endif -static inline void cache_sync(void) +/* + * This should only be called when we have a requirement that the + * register be written due to a work-around, as platforms running + * in non-secure mode may not be able to access this register. + */ +static inline void l2c_set_debug(void __iomem *base, unsigned long val) { - void __iomem *base = l2x0_base; - - writel_relaxed(0, base + sync_reg_offset); - cache_wait(base + L2X0_CACHE_SYNC, 1); + l2c_write_sec(val, base, L2X0_DEBUG_CTRL); } -static inline void l2x0_clean_line(unsigned long addr) +static void __l2c_op_way(void __iomem *reg) { - void __iomem *base = l2x0_base; - cache_wait(base + L2X0_CLEAN_LINE_PA, 1); - writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(l2x0_way_mask, reg); + l2c_wait_mask(reg, l2x0_way_mask); } -static inline void l2x0_inv_line(unsigned long addr) +static inline void l2c_unlock(void __iomem *base, unsigned num) { - void __iomem *base = l2x0_base; - cache_wait(base + L2X0_INV_LINE_PA, 1); - writel_relaxed(addr, base + L2X0_INV_LINE_PA); + unsigned i; + + for (i = 0; i < num; i++) { + writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_D_BASE + + i * L2X0_LOCKDOWN_STRIDE); + writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_I_BASE + + i * L2X0_LOCKDOWN_STRIDE); + } } -#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915) -static inline void debug_writel(unsigned long val) +/* + * Enable the L2 cache controller. This function must only be + * called when the cache controller is known to be disabled. + */ +static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock) { - if (outer_cache.set_debug) - outer_cache.set_debug(val); + unsigned long flags; + + l2c_write_sec(aux, base, L2X0_AUX_CTRL); + + l2c_unlock(base, num_lock); + + local_irq_save(flags); + __l2c_op_way(base + L2X0_INV_WAY); + writel_relaxed(0, base + sync_reg_offset); + l2c_wait_mask(base + sync_reg_offset, 1); + local_irq_restore(flags); + + l2c_write_sec(L2X0_CTRL_EN, base, L2X0_CTRL); } -static void pl310_set_debug(unsigned long val) +static void l2c_disable(void) { - writel_relaxed(val, l2x0_base + L2X0_DEBUG_CTRL); + void __iomem *base = l2x0_base; + + outer_cache.flush_all(); + l2c_write_sec(0, base, L2X0_CTRL); + dsb(st); } -#else -/* Optimised out for non-errata case */ -static inline void debug_writel(unsigned long val) + +#ifdef CONFIG_CACHE_PL310 +static inline void cache_wait(void __iomem *reg, unsigned long mask) { + /* cache operations by line are atomic on PL310 */ } - -#define pl310_set_debug NULL +#else +#define cache_wait l2c_wait_mask #endif -#ifdef CONFIG_PL310_ERRATA_588369 -static inline void l2x0_flush_line(unsigned long addr) +static inline void cache_sync(void) { void __iomem *base = l2x0_base; - /* Clean by PA followed by Invalidate by PA */ - cache_wait(base + L2X0_CLEAN_LINE_PA, 1); - writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA); - cache_wait(base + L2X0_INV_LINE_PA, 1); - writel_relaxed(addr, base + L2X0_INV_LINE_PA); + writel_relaxed(0, base + sync_reg_offset); + cache_wait(base + L2X0_CACHE_SYNC, 1); } -#else -static inline void l2x0_flush_line(unsigned long addr) +#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915) +static inline void debug_writel(unsigned long val) +{ + l2c_set_debug(l2x0_base, val); +} +#else +/* Optimised out for non-errata case */ +static inline void debug_writel(unsigned long val) { - void __iomem *base = l2x0_base; - cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); - writel_relaxed(addr, base + L2X0_CLEAN_INV_LINE_PA); } #endif @@ -141,8 +176,7 @@ static void l2x0_cache_sync(void) static void __l2x0_flush_all(void) { debug_writel(0x03); - writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_INV_WAY); - cache_wait_way(l2x0_base + L2X0_CLEAN_INV_WAY, l2x0_way_mask); + __l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY); cache_sync(); debug_writel(0x00); } @@ -157,275 +191,883 @@ static void l2x0_flush_all(void) raw_spin_unlock_irqrestore(&l2x0_lock, flags); } -static void l2x0_clean_all(void) +static void l2x0_disable(void) { unsigned long flags; - /* clean all ways */ raw_spin_lock_irqsave(&l2x0_lock, flags); - writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_WAY); - cache_wait_way(l2x0_base + L2X0_CLEAN_WAY, l2x0_way_mask); - cache_sync(); + __l2x0_flush_all(); + l2c_write_sec(0, l2x0_base, L2X0_CTRL); + dsb(st); raw_spin_unlock_irqrestore(&l2x0_lock, flags); } -static void l2x0_inv_all(void) +static void l2c_save(void __iomem *base) { - unsigned long flags; + l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); +} - /* invalidate all ways */ - raw_spin_lock_irqsave(&l2x0_lock, flags); - /* Invalidating when L2 is enabled is a nono */ - BUG_ON(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN); - writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY); - cache_wait_way(l2x0_base + L2X0_INV_WAY, l2x0_way_mask); - cache_sync(); - raw_spin_unlock_irqrestore(&l2x0_lock, flags); +/* + * L2C-210 specific code. + * + * The L2C-2x0 PA, set/way and sync operations are atomic, but we must + * ensure that no background operation is running. The way operations + * are all background tasks. + * + * While a background operation is in progress, any new operation is + * ignored (unspecified whether this causes an error.) Thankfully, not + * used on SMP. + * + * Never has a different sync register other than L2X0_CACHE_SYNC, but + * we use sync_reg_offset here so we can share some of this with L2C-310. + */ +static void __l2c210_cache_sync(void __iomem *base) +{ + writel_relaxed(0, base + sync_reg_offset); } -static void l2x0_inv_range(unsigned long start, unsigned long end) +static void __l2c210_op_pa_range(void __iomem *reg, unsigned long start, + unsigned long end) +{ + while (start < end) { + writel_relaxed(start, reg); + start += CACHE_LINE_SIZE; + } +} + +static void l2c210_inv_range(unsigned long start, unsigned long end) { void __iomem *base = l2x0_base; - unsigned long flags; - raw_spin_lock_irqsave(&l2x0_lock, flags); if (start & (CACHE_LINE_SIZE - 1)) { start &= ~(CACHE_LINE_SIZE - 1); - debug_writel(0x03); - l2x0_flush_line(start); - debug_writel(0x00); + writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA); start += CACHE_LINE_SIZE; } if (end & (CACHE_LINE_SIZE - 1)) { end &= ~(CACHE_LINE_SIZE - 1); - debug_writel(0x03); - l2x0_flush_line(end); - debug_writel(0x00); + writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA); } + __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c210_clean_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + + start &= ~(CACHE_LINE_SIZE - 1); + __l2c210_op_pa_range(base + L2X0_CLEAN_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c210_flush_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + + start &= ~(CACHE_LINE_SIZE - 1); + __l2c210_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c210_flush_all(void) +{ + void __iomem *base = l2x0_base; + + BUG_ON(!irqs_disabled()); + + __l2c_op_way(base + L2X0_CLEAN_INV_WAY); + __l2c210_cache_sync(base); +} + +static void l2c210_sync(void) +{ + __l2c210_cache_sync(l2x0_base); +} + +static void l2c210_resume(void) +{ + void __iomem *base = l2x0_base; + + if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) + l2c_enable(base, l2x0_saved_regs.aux_ctrl, 1); +} + +static const struct l2c_init_data l2c210_data __initconst = { + .type = "L2C-210", + .way_size_0 = SZ_8K, + .num_lock = 1, + .enable = l2c_enable, + .save = l2c_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c_disable, + .sync = l2c210_sync, + .resume = l2c210_resume, + }, +}; + +/* + * L2C-220 specific code. + * + * All operations are background operations: they have to be waited for. + * Conflicting requests generate a slave error (which will cause an + * imprecise abort.) Never uses sync_reg_offset, so we hard-code the + * sync register here. + * + * However, we can re-use the l2c210_resume call. + */ +static inline void __l2c220_cache_sync(void __iomem *base) +{ + writel_relaxed(0, base + L2X0_CACHE_SYNC); + l2c_wait_mask(base + L2X0_CACHE_SYNC, 1); +} + +static void l2c220_op_way(void __iomem *base, unsigned reg) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2c_op_way(base + reg); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static unsigned long l2c220_op_pa_range(void __iomem *reg, unsigned long start, + unsigned long end, unsigned long flags) +{ + raw_spinlock_t *lock = &l2x0_lock; + while (start < end) { unsigned long blk_end = start + min(end - start, 4096UL); while (start < blk_end) { - l2x0_inv_line(start); + l2c_wait_mask(reg, 1); + writel_relaxed(start, reg); start += CACHE_LINE_SIZE; } if (blk_end < end) { - raw_spin_unlock_irqrestore(&l2x0_lock, flags); - raw_spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(lock, flags); + raw_spin_lock_irqsave(lock, flags); } } - cache_wait(base + L2X0_INV_LINE_PA, 1); - cache_sync(); - raw_spin_unlock_irqrestore(&l2x0_lock, flags); + + return flags; } -static void l2x0_clean_range(unsigned long start, unsigned long end) +static void l2c220_inv_range(unsigned long start, unsigned long end) { void __iomem *base = l2x0_base; unsigned long flags; - if ((end - start) >= l2x0_size) { - l2x0_clean_all(); - return; - } - raw_spin_lock_irqsave(&l2x0_lock, flags); - start &= ~(CACHE_LINE_SIZE - 1); - while (start < end) { - unsigned long blk_end = start + min(end - start, 4096UL); - - while (start < blk_end) { - l2x0_clean_line(start); + if ((start | end) & (CACHE_LINE_SIZE - 1)) { + if (start & (CACHE_LINE_SIZE - 1)) { + start &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA); start += CACHE_LINE_SIZE; } - if (blk_end < end) { - raw_spin_unlock_irqrestore(&l2x0_lock, flags); - raw_spin_lock_irqsave(&l2x0_lock, flags); + if (end & (CACHE_LINE_SIZE - 1)) { + end &= ~(CACHE_LINE_SIZE - 1); + l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); + writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA); } } - cache_wait(base + L2X0_CLEAN_LINE_PA, 1); - cache_sync(); + + flags = l2c220_op_pa_range(base + L2X0_INV_LINE_PA, + start, end, flags); + l2c_wait_mask(base + L2X0_INV_LINE_PA, 1); + __l2c220_cache_sync(base); raw_spin_unlock_irqrestore(&l2x0_lock, flags); } -static void l2x0_flush_range(unsigned long start, unsigned long end) +static void l2c220_clean_range(unsigned long start, unsigned long end) { void __iomem *base = l2x0_base; unsigned long flags; + start &= ~(CACHE_LINE_SIZE - 1); if ((end - start) >= l2x0_size) { - l2x0_flush_all(); + l2c220_op_way(base, L2X0_CLEAN_WAY); return; } raw_spin_lock_irqsave(&l2x0_lock, flags); + flags = l2c220_op_pa_range(base + L2X0_CLEAN_LINE_PA, + start, end, flags); + l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_flush_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + start &= ~(CACHE_LINE_SIZE - 1); + if ((end - start) >= l2x0_size) { + l2c220_op_way(base, L2X0_CLEAN_INV_WAY); + return; + } + + raw_spin_lock_irqsave(&l2x0_lock, flags); + flags = l2c220_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, + start, end, flags); + l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_flush_all(void) +{ + l2c220_op_way(l2x0_base, L2X0_CLEAN_INV_WAY); +} + +static void l2c220_sync(void) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2c220_cache_sync(l2x0_base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_enable(void __iomem *base, u32 aux, unsigned num_lock) +{ + /* + * Always enable non-secure access to the lockdown registers - + * we write to them as part of the L2C enable sequence so they + * need to be accessible. + */ + aux |= L220_AUX_CTRL_NS_LOCKDOWN; + + l2c_enable(base, aux, num_lock); +} + +static const struct l2c_init_data l2c220_data = { + .type = "L2C-220", + .way_size_0 = SZ_8K, + .num_lock = 1, + .enable = l2c220_enable, + .save = l2c_save, + .outer_cache = { + .inv_range = l2c220_inv_range, + .clean_range = l2c220_clean_range, + .flush_range = l2c220_flush_range, + .flush_all = l2c220_flush_all, + .disable = l2c_disable, + .sync = l2c220_sync, + .resume = l2c210_resume, + }, +}; + +/* + * L2C-310 specific code. + * + * Very similar to L2C-210, the PA, set/way and sync operations are atomic, + * and the way operations are all background tasks. However, issuing an + * operation while a background operation is in progress results in a + * SLVERR response. We can reuse: + * + * __l2c210_cache_sync (using sync_reg_offset) + * l2c210_sync + * l2c210_inv_range (if 588369 is not applicable) + * l2c210_clean_range + * l2c210_flush_range (if 588369 is not applicable) + * l2c210_flush_all (if 727915 is not applicable) + * + * Errata: + * 588369: PL310 R0P0->R1P0, fixed R2P0. + * Affects: all clean+invalidate operations + * clean and invalidate skips the invalidate step, so we need to issue + * separate operations. We also require the above debug workaround + * enclosing this code fragment on affected parts. On unaffected parts, + * we must not use this workaround without the debug register writes + * to avoid exposing a problem similar to 727915. + * + * 727915: PL310 R2P0->R3P0, fixed R3P1. + * Affects: clean+invalidate by way + * clean and invalidate by way runs in the background, and a store can + * hit the line between the clean operation and invalidate operation, + * resulting in the store being lost. + * + * 752271: PL310 R3P0->R3P1-50REL0, fixed R3P2. + * Affects: 8x64-bit (double fill) line fetches + * double fill line fetches can fail to cause dirty data to be evicted + * from the cache before the new data overwrites the second line. + * + * 753970: PL310 R3P0, fixed R3P1. + * Affects: sync + * prevents merging writes after the sync operation, until another L2C + * operation is performed (or a number of other conditions.) + * + * 769419: PL310 R0P0->R3P1, fixed R3P2. + * Affects: store buffer + * store buffer is not automatically drained. + */ +static void l2c310_inv_range_erratum(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + + if ((start | end) & (CACHE_LINE_SIZE - 1)) { + unsigned long flags; + + /* Erratum 588369 for both clean+invalidate operations */ + raw_spin_lock_irqsave(&l2x0_lock, flags); + l2c_set_debug(base, 0x03); + + if (start & (CACHE_LINE_SIZE - 1)) { + start &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(start, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(start, base + L2X0_INV_LINE_PA); + start += CACHE_LINE_SIZE; + } + + if (end & (CACHE_LINE_SIZE - 1)) { + end &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(end, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(end, base + L2X0_INV_LINE_PA); + } + + l2c_set_debug(base, 0x00); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + } + + __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c310_flush_range_erratum(unsigned long start, unsigned long end) +{ + raw_spinlock_t *lock = &l2x0_lock; + unsigned long flags; + void __iomem *base = l2x0_base; + + raw_spin_lock_irqsave(lock, flags); while (start < end) { unsigned long blk_end = start + min(end - start, 4096UL); - debug_writel(0x03); + l2c_set_debug(base, 0x03); while (start < blk_end) { - l2x0_flush_line(start); + writel_relaxed(start, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(start, base + L2X0_INV_LINE_PA); start += CACHE_LINE_SIZE; } - debug_writel(0x00); + l2c_set_debug(base, 0x00); if (blk_end < end) { - raw_spin_unlock_irqrestore(&l2x0_lock, flags); - raw_spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(lock, flags); + raw_spin_lock_irqsave(lock, flags); } } - cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); - cache_sync(); - raw_spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(lock, flags); + __l2c210_cache_sync(base); } -static void l2x0_disable(void) +static void l2c310_flush_all_erratum(void) { + void __iomem *base = l2x0_base; unsigned long flags; raw_spin_lock_irqsave(&l2x0_lock, flags); - __l2x0_flush_all(); - writel_relaxed(0, l2x0_base + L2X0_CTRL); - dsb(st); + l2c_set_debug(base, 0x03); + __l2c_op_way(base + L2X0_CLEAN_INV_WAY); + l2c_set_debug(base, 0x00); + __l2c210_cache_sync(base); raw_spin_unlock_irqrestore(&l2x0_lock, flags); } -static void l2x0_unlock(u32 cache_id) +static void __init l2c310_save(void __iomem *base) { - int lockregs; - int i; + unsigned revision; - switch (cache_id & L2X0_CACHE_ID_PART_MASK) { - case L2X0_CACHE_ID_PART_L310: - lockregs = 8; - break; - case AURORA_CACHE_ID: - lockregs = 4; + l2c_save(base); + + l2x0_saved_regs.tag_latency = readl_relaxed(base + + L310_TAG_LATENCY_CTRL); + l2x0_saved_regs.data_latency = readl_relaxed(base + + L310_DATA_LATENCY_CTRL); + l2x0_saved_regs.filter_end = readl_relaxed(base + + L310_ADDR_FILTER_END); + l2x0_saved_regs.filter_start = readl_relaxed(base + + L310_ADDR_FILTER_START); + + revision = readl_relaxed(base + L2X0_CACHE_ID) & + L2X0_CACHE_ID_RTL_MASK; + + /* From r2p0, there is Prefetch offset/control register */ + if (revision >= L310_CACHE_ID_RTL_R2P0) + l2x0_saved_regs.prefetch_ctrl = readl_relaxed(base + + L310_PREFETCH_CTRL); + + /* From r3p0, there is Power control register */ + if (revision >= L310_CACHE_ID_RTL_R3P0) + l2x0_saved_regs.pwr_ctrl = readl_relaxed(base + + L310_POWER_CTRL); +} + +static void l2c310_resume(void) +{ + void __iomem *base = l2x0_base; + + if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) { + unsigned revision; + + /* restore pl310 setup */ + writel_relaxed(l2x0_saved_regs.tag_latency, + base + L310_TAG_LATENCY_CTRL); + writel_relaxed(l2x0_saved_regs.data_latency, + base + L310_DATA_LATENCY_CTRL); + writel_relaxed(l2x0_saved_regs.filter_end, + base + L310_ADDR_FILTER_END); + writel_relaxed(l2x0_saved_regs.filter_start, + base + L310_ADDR_FILTER_START); + + revision = readl_relaxed(base + L2X0_CACHE_ID) & + L2X0_CACHE_ID_RTL_MASK; + + if (revision >= L310_CACHE_ID_RTL_R2P0) + l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base, + L310_PREFETCH_CTRL); + if (revision >= L310_CACHE_ID_RTL_R3P0) + l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base, + L310_POWER_CTRL); + + l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8); + + /* Re-enable full-line-of-zeros for Cortex-A9 */ + if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO) + set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); + } +} + +static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, void *data) +{ + switch (act & ~CPU_TASKS_FROZEN) { + case CPU_STARTING: + set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); break; - default: - /* L210 and unknown types */ - lockregs = 1; + case CPU_DYING: + set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1))); break; } + return NOTIFY_OK; +} - for (i = 0; i < lockregs; i++) { - writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_D_BASE + - i * L2X0_LOCKDOWN_STRIDE); - writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_I_BASE + - i * L2X0_LOCKDOWN_STRIDE); +static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock) +{ + unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_PART_MASK; + bool cortex_a9 = read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9; + + if (rev >= L310_CACHE_ID_RTL_R2P0) { + if (cortex_a9) { + aux |= L310_AUX_CTRL_EARLY_BRESP; + pr_info("L2C-310 enabling early BRESP for Cortex-A9\n"); + } else if (aux & L310_AUX_CTRL_EARLY_BRESP) { + pr_warn("L2C-310 early BRESP only supported with Cortex-A9\n"); + aux &= ~L310_AUX_CTRL_EARLY_BRESP; + } + } + + if (cortex_a9) { + u32 aux_cur = readl_relaxed(base + L2X0_AUX_CTRL); + u32 acr = get_auxcr(); + + pr_debug("Cortex-A9 ACR=0x%08x\n", acr); + + if (acr & BIT(3) && !(aux_cur & L310_AUX_CTRL_FULL_LINE_ZERO)) + pr_err("L2C-310: full line of zeros enabled in Cortex-A9 but not L2C-310 - invalid\n"); + + if (aux & L310_AUX_CTRL_FULL_LINE_ZERO && !(acr & BIT(3))) + pr_err("L2C-310: enabling full line of zeros but not enabled in Cortex-A9\n"); + + if (!(aux & L310_AUX_CTRL_FULL_LINE_ZERO) && !outer_cache.write_sec) { + aux |= L310_AUX_CTRL_FULL_LINE_ZERO; + pr_info("L2C-310 full line of zeros enabled for Cortex-A9\n"); + } + } else if (aux & (L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP)) { + pr_err("L2C-310: disabling Cortex-A9 specific feature bits\n"); + aux &= ~(L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP); + } + + if (aux & (L310_AUX_CTRL_DATA_PREFETCH | L310_AUX_CTRL_INSTR_PREFETCH)) { + u32 prefetch = readl_relaxed(base + L310_PREFETCH_CTRL); + + pr_info("L2C-310 %s%s prefetch enabled, offset %u lines\n", + aux & L310_AUX_CTRL_INSTR_PREFETCH ? "I" : "", + aux & L310_AUX_CTRL_DATA_PREFETCH ? "D" : "", + 1 + (prefetch & L310_PREFETCH_CTRL_OFFSET_MASK)); + } + + /* r3p0 or later has power control register */ + if (rev >= L310_CACHE_ID_RTL_R3P0) { + u32 power_ctrl; + + l2c_write_sec(L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN, + base, L310_POWER_CTRL); + power_ctrl = readl_relaxed(base + L310_POWER_CTRL); + pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n", + power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis", + power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis"); + } + + /* + * Always enable non-secure access to the lockdown registers - + * we write to them as part of the L2C enable sequence so they + * need to be accessible. + */ + aux |= L310_AUX_CTRL_NS_LOCKDOWN; + + l2c_enable(base, aux, num_lock); + + if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) { + set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); + cpu_notifier(l2c310_cpu_enable_flz, 0); } } -void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) +static void __init l2c310_fixup(void __iomem *base, u32 cache_id, + struct outer_cache_fns *fns) { - u32 aux; - u32 cache_id; - u32 way_size = 0; - int ways; - int way_size_shift = L2X0_WAY_SIZE_SHIFT; - const char *type; + unsigned revision = cache_id & L2X0_CACHE_ID_RTL_MASK; + const char *errata[8]; + unsigned n = 0; + + if (IS_ENABLED(CONFIG_PL310_ERRATA_588369) && + revision < L310_CACHE_ID_RTL_R2P0 && + /* For bcm compatibility */ + fns->inv_range == l2c210_inv_range) { + fns->inv_range = l2c310_inv_range_erratum; + fns->flush_range = l2c310_flush_range_erratum; + errata[n++] = "588369"; + } - l2x0_base = base; - if (cache_id_part_number_from_dt) - cache_id = cache_id_part_number_from_dt; - else - cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); - aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); + if (IS_ENABLED(CONFIG_PL310_ERRATA_727915) && + revision >= L310_CACHE_ID_RTL_R2P0 && + revision < L310_CACHE_ID_RTL_R3P1) { + fns->flush_all = l2c310_flush_all_erratum; + errata[n++] = "727915"; + } + + if (revision >= L310_CACHE_ID_RTL_R3P0 && + revision < L310_CACHE_ID_RTL_R3P2) { + u32 val = readl_relaxed(base + L310_PREFETCH_CTRL); + /* I don't think bit23 is required here... but iMX6 does so */ + if (val & (BIT(30) | BIT(23))) { + val &= ~(BIT(30) | BIT(23)); + l2c_write_sec(val, base, L310_PREFETCH_CTRL); + errata[n++] = "752271"; + } + } + + if (IS_ENABLED(CONFIG_PL310_ERRATA_753970) && + revision == L310_CACHE_ID_RTL_R3P0) { + sync_reg_offset = L2X0_DUMMY_REG; + errata[n++] = "753970"; + } + + if (IS_ENABLED(CONFIG_PL310_ERRATA_769419)) + errata[n++] = "769419"; + + if (n) { + unsigned i; + pr_info("L2C-310 errat%s", n > 1 ? "a" : "um"); + for (i = 0; i < n; i++) + pr_cont(" %s", errata[i]); + pr_cont(" enabled\n"); + } +} + +static void l2c310_disable(void) +{ + /* + * If full-line-of-zeros is enabled, we must first disable it in the + * Cortex-A9 auxiliary control register before disabling the L2 cache. + */ + if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO) + set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1))); + + l2c_disable(); +} + +static const struct l2c_init_data l2c310_init_fns __initconst = { + .type = "L2C-310", + .way_size_0 = SZ_8K, + .num_lock = 8, + .enable = l2c310_enable, + .fixup = l2c310_fixup, + .save = l2c310_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c310_disable, + .sync = l2c210_sync, + .resume = l2c310_resume, + }, +}; + +static void __init __l2c_init(const struct l2c_init_data *data, + u32 aux_val, u32 aux_mask, u32 cache_id) +{ + struct outer_cache_fns fns; + unsigned way_size_bits, ways; + u32 aux, old_aux; + + /* + * Sanity check the aux values. aux_mask is the bits we preserve + * from reading the hardware register, and aux_val is the bits we + * set. + */ + if (aux_val & aux_mask) + pr_alert("L2C: platform provided aux values permit register corruption.\n"); + + old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); aux &= aux_mask; aux |= aux_val; + if (old_aux != aux) + pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n", + old_aux, aux); + /* Determine the number of ways */ switch (cache_id & L2X0_CACHE_ID_PART_MASK) { case L2X0_CACHE_ID_PART_L310: + if ((aux_val | ~aux_mask) & (L2C_AUX_CTRL_WAY_SIZE_MASK | L310_AUX_CTRL_ASSOCIATIVITY_16)) + pr_warn("L2C: DT/platform tries to modify or specify cache size\n"); if (aux & (1 << 16)) ways = 16; else ways = 8; - type = "L310"; -#ifdef CONFIG_PL310_ERRATA_753970 - /* Unmapped register. */ - sync_reg_offset = L2X0_DUMMY_REG; -#endif - if ((cache_id & L2X0_CACHE_ID_RTL_MASK) <= L2X0_CACHE_ID_RTL_R3P0) - outer_cache.set_debug = pl310_set_debug; break; + case L2X0_CACHE_ID_PART_L210: + case L2X0_CACHE_ID_PART_L220: ways = (aux >> 13) & 0xf; - type = "L210"; break; case AURORA_CACHE_ID: - sync_reg_offset = AURORA_SYNC_REG; ways = (aux >> 13) & 0xf; ways = 2 << ((ways + 1) >> 2); - way_size_shift = AURORA_WAY_SIZE_SHIFT; - type = "Aurora"; break; + default: /* Assume unknown chips have 8 ways */ ways = 8; - type = "L2x0 series"; break; } l2x0_way_mask = (1 << ways) - 1; /* - * L2 cache Size = Way size * Number of ways + * way_size_0 is the size that a way_size value of zero would be + * given the calculation: way_size = way_size_0 << way_size_bits. + * So, if way_size_bits=0 is reserved, but way_size_bits=1 is 16k, + * then way_size_0 would be 8k. + * + * L2 cache size = number of ways * way size. */ - way_size = (aux & L2X0_AUX_CTRL_WAY_SIZE_MASK) >> 17; - way_size = 1 << (way_size + way_size_shift); + way_size_bits = (aux & L2C_AUX_CTRL_WAY_SIZE_MASK) >> + L2C_AUX_CTRL_WAY_SIZE_SHIFT; + l2x0_size = ways * (data->way_size_0 << way_size_bits); - l2x0_size = ways * way_size * SZ_1K; + fns = data->outer_cache; + fns.write_sec = outer_cache.write_sec; + if (data->fixup) + data->fixup(l2x0_base, cache_id, &fns); /* - * Check if l2x0 controller is already enabled. - * If you are booting from non-secure mode - * accessing the below registers will fault. + * Check if l2x0 controller is already enabled. If we are booting + * in non-secure mode accessing the below registers will fault. */ - if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { - /* Make sure that I&D is not locked down when starting */ - l2x0_unlock(cache_id); + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) + data->enable(l2x0_base, aux, data->num_lock); - /* l2x0 controller is disabled */ - writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL); + outer_cache = fns; - l2x0_inv_all(); - - /* enable L2X0 */ - writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL); - } + /* + * It is strange to save the register state before initialisation, + * but hey, this is what the DT implementations decided to do. + */ + if (data->save) + data->save(l2x0_base); /* Re-read it in case some bits are reserved. */ aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); - /* Save the value for resuming. */ - l2x0_saved_regs.aux_ctrl = aux; + pr_info("%s cache controller enabled, %d ways, %d kB\n", + data->type, ways, l2x0_size >> 10); + pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n", + data->type, cache_id, aux); +} + +void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) +{ + const struct l2c_init_data *data; + u32 cache_id; + + l2x0_base = base; + + cache_id = readl_relaxed(base + L2X0_CACHE_ID); + + switch (cache_id & L2X0_CACHE_ID_PART_MASK) { + default: + case L2X0_CACHE_ID_PART_L210: + data = &l2c210_data; + break; - if (!of_init) { - outer_cache.inv_range = l2x0_inv_range; - outer_cache.clean_range = l2x0_clean_range; - outer_cache.flush_range = l2x0_flush_range; - outer_cache.sync = l2x0_cache_sync; - outer_cache.flush_all = l2x0_flush_all; - outer_cache.inv_all = l2x0_inv_all; - outer_cache.disable = l2x0_disable; + case L2X0_CACHE_ID_PART_L220: + data = &l2c220_data; + break; + + case L2X0_CACHE_ID_PART_L310: + data = &l2c310_init_fns; + break; } - pr_info("%s cache controller enabled\n", type); - pr_info("l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d kB\n", - ways, cache_id, aux, l2x0_size >> 10); + __l2c_init(data, aux_val, aux_mask, cache_id); } #ifdef CONFIG_OF static int l2_wt_override; +/* Aurora don't have the cache ID register available, so we have to + * pass it though the device tree */ +static u32 cache_id_part_number_from_dt; + +static void __init l2x0_of_parse(const struct device_node *np, + u32 *aux_val, u32 *aux_mask) +{ + u32 data[2] = { 0, 0 }; + u32 tag = 0; + u32 dirty = 0; + u32 val = 0, mask = 0; + + of_property_read_u32(np, "arm,tag-latency", &tag); + if (tag) { + mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK; + val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT; + } + + of_property_read_u32_array(np, "arm,data-latency", + data, ARRAY_SIZE(data)); + if (data[0] && data[1]) { + mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK | + L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK; + val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) | + ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT); + } + + of_property_read_u32(np, "arm,dirty-latency", &dirty); + if (dirty) { + mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK; + val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT; + } + + *aux_val &= ~mask; + *aux_val |= val; + *aux_mask &= ~mask; +} + +static const struct l2c_init_data of_l2c210_data __initconst = { + .type = "L2C-210", + .way_size_0 = SZ_8K, + .num_lock = 1, + .of_parse = l2x0_of_parse, + .enable = l2c_enable, + .save = l2c_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, |